将 git lfs ls-files 与 git ls-files ':(attr:filter=lfs)' 进行比较是一种检测不受 lfs 管理的 lfs 文件的可靠方法吗?
Is comparing git lfs ls-files with git ls-files ':(attr:filter=lfs)' a reliable way to detect lfs files that are not managed by lfs?
我试图找到一种方法来确定 git 存储库中的文件是否由 git-lfs 正确管理。
到目前为止,我发现比较 git lfs ls-files
和 git ls-files ':(attr:filter=lfs)'
的结果似乎可以满足我的要求。
Add-Type -AssemblyName 'System.Linq';
[string[]] $actualLfsFilePaths = git lfs ls-files | `
ForEach-Object `
{
#'12345678 * my.dll' - not all lfs versions support -n flag, so it is better to be conservative
$_.Split(' ', 3)[2]
};
[string[]] $shouldBeUnderLfsFilePaths = git ls-files ':(attr:filter=lfs)';
$filePathsNotUnderLfs = [System.Linq.Enumerable]::ToArray(
[System.Linq.Enumerable]::Except($shouldBeUnderLfsFilePaths, $actualLfsFilePaths)
);
$count = $filePathsNotUnderLfs.Count;
Write-Host "There are $($count) lfs files that are not under lfs";
foreach ($filePathNotUnderLfs in $filePathsNotUnderLfs)
{
Write-Host "`t'$filePathNotUnderLfs'";
}
if ($count -gt 0)
{
Write-Error "There are $count lfs files that are not under lfs";
return 1;
}
return 0;
我仍然不确定这是一种可靠的方法。那么,它是reliable/correct方式吗?或者也许有其他可用的选择?
P.S.: 我想要实现的是创建一个拉取请求验证构建(对于 TFS 存储库),它将检测经常发生的 lfs 违规(提交的文件未添加到 lfs)或其他原因,并导致不必要的警告和回购膨胀,最好的情况是主要 nuisances 最坏的情况。
编辑:
嗯,发现当前代码有一些问题:
- empty files处理
- octal encoding for non-ASCII chars in ls-files
- 符号链接
但它们相对容易修复,不会改变问题的要点。
我还没有找到它是否是一个真正可靠的方法,但到目前为止我的测试似乎证实了这一点。
我们最终使用以下脚本作为构建管道的一部分:
#See
[CmdletBinding()]
[OutputType([int])]
Param (
[Parameter(Mandatory = $true)]
[string]
$RepositoryLocalPath
)
$ErrorActionPreference = "Stop";
function Assert-LastExitCodeIs0
{
if ($LASTEXITCODE -ne 0)
{
throw (New-Object System.InvalidOpertationException("LASTEXITCODE is $LASTEXITCODE"));
}
}
function Get-StringFromOctal
{
[CmdLetBinding()]
[OutputType([string])]
Param (
[Parameter(Mandatory = $true)]
[string]
$String
)
if ([String]::IsNullOrEmpty($String))
{
return $String;
}
if (($String[0] -ne '"') -or
($String[$String.Length - 1] -ne '"'))
{
return $String;
}
if ($String.Length -lt 2)
{
return $String;
}
$String = $String.Substring(1, $String.Length -2);
[regex] $regex = '(\[0-9]+)+';
$encoding = [System.Text.Encoding]::UTF8;
return $regex.Replace(
$String,
{
Param ($Capture)
$bytes = $Capture.Groups[1].Captures |
ForEach-Object { [System.Convert]::ToByte($_.Value.Substring(1), 8) };
return $encoding.GetString(@($bytes));
});
}
function Normalize-GitLsFilesPath
{
[CmdLetBinding()]
[OutputType([string])]
Param (
[Parameter(Mandatory = $true)]
[string]
$Path
)
# Normalize octets paths
# Technically we could try ls-files -z, but there is no such option for lfs ls-files.
# Or we could try to set core.quotePath to 0, but it brings a slew of problems with encodings and etc.
# And by doing so we would change git config settings, that is not very desirable, even if we will always(?) succeed in reverting it.
# So it is seems simpler just to parse octal paths.
return Get-StringFromOctal -String ($Path.Trim());
}
function Is-FileEmpty
{
[CmdLetBinding()]
[OutputType([bool])]
Param (
[Parameter(Mandatory = $true)]
[string]
$Path
)
return ((Get-Item -LiteralPath $Path).Length -eq 0);
}
function Is-NotEmptyLfsFile
{
[CmdLetBinding()]
[OutputType([bool])]
Param (
[Parameter(Mandatory = $true)]
[string]
$Path
)
# Empty files will be empty files whether they are under lfs or not - https://github.com/git-lfs/git-lfs/issues/2863
# But they seem not to get the lfs attributes, so we have no other choice but to filter them.
return !(Is-FileEmpty -Path $Path);
}
function Get-ActualLfsFilePaths
{
[CmdletBinding()]
[OutputType([string[]])]
Param()
Write-Host 'Getting actual lfs file paths.';
$lfsLsFilesOutput = git lfs ls-files;
Assert-LastExitCodeIs0;
$actualLfsFilePaths = ($lfsLsFilesOutput | `
ForEach-Object `
{
#'12345678 * my.dll' - not all lfs versions support -n flag, so it is better to be conservative
$lsFilePath = $_.Split(' ', 3)[2];
return Normalize-GitLsFilesPath -Path $lsFilePath;
});
Write-Host "There are $($actualLfsFilePaths.Count) files that are actually under lfs";
return , $actualLfsFilePaths;
}
function Get-ShouldBeLfsFilePaths
{
[CmdletBinding()]
[OutputType([string[]])]
Param()
Write-Host 'Getting should be under lfs file paths.';
[string[]] $shouldBeUnderLfsFilePaths = git ls-files -s ':(attr:filter=lfs)';
Assert-LastExitCodeIs0;
$shouldBeUnderLfsFilePaths = ($shouldBeUnderLfsFilePaths | `
ForEach-Object `
{
#120000 1234567890abcdef 0 mylib.dylib
, $_.Split($null, 4);
} | `
Where `
{
$symlinkBitMaskString = '120000';
$fileChmodString = $_[0];
return ($fileChmodString -ne $symlinkBitMaskString); # Perhaps we should check actual bitmask?
} | `
ForEach-Object `
{
Normalize-GitLsFilesPath -Path $_[3];
});
Write-Host "There are $($shouldBeUnderLfsFilePaths.Count) files that should be under lfs (may include flukes - empty files)";
return , $shouldBeUnderLfsFilePaths;
}
function Get-LfsFilePathsNotUnderLfs
{
[CmdletBinding()]
[OutputType([string[]])]
Param()
[string[]] $actualLfsFilePaths = Get-ActualLfsFilePaths;
[string[]] $shouldBeUnderLfsFilePaths = Get-ShouldBeLfsFilePaths;
Add-Type -AssemblyName 'System.Linq';
Write-Host 'Comparing actual lfs and should be under lfs files.'
return , [System.Linq.Enumerable]::ToArray(
[System.Linq.Enumerable]::Where(
[System.Linq.Enumerable]::Except($shouldBeUnderLfsFilePaths, $actualLfsFilePaths),
[Func[String, Boolean]] ${function:Is-NotEmptyLfsFile}
)
);
}
function Main-WithRepositoryAsWorkingFolder
{
$filePathsNotUnderLfs = Get-LfsFilePathsNotUnderLfs;
$count = $filePathsNotUnderLfs.Count;
Write-Host "There are $($count) lfs files that are not under lfs";
foreach ($filePathNotUnderLfs in $filePathsNotUnderLfs)
{
Write-Host "`t'$filePathNotUnderLfs'";
}
if ($count -eq 0)
{
return 0;
}
return 1;
}
function Main
{
[CmdletBinding()]
[OutputType([int])]
Param (
[Parameter(Mandatory = $true)]
[string]
$RepositoryLocalPath
)
Write-Host "RepositoryLocalPath = $RepositoryLocalPath";
# It is simpler to set working dir rather than use Process.Start with WorkingDirectory everywhere, and more reliable than Start-Process (that one may in some cases have issues with return exit code).
$originalGetLocation = (Get-Location).Path;
Set-Location -LiteralPath $RepositoryLocalPath;
$originalCurrentDirectory = [System.IO.Directory]::GetCurrentDirectory();
[System.IO.Directory]::SetCurrentDirectory($RepositoryLocalPath);
# Otherwise we won't get proper file paths from git lfs ls-files, as it doesn't use octal encoding
# And using output redirection may lead us to even bigger problems|difficulties.
$originalOutputEncoding = $OutputEncoding;
$OutputEncoding = [System.Text.Encoding]::UTF8;
$originalConsoleOutputEncoding = [Console]::OutputEncoding;
[Console]::OutputEncoding = [System.Text.Encoding]::UTF8;
$originalConsoleInputEncoding = [Console]::InputEncoding;
[Console]::InputEncoding = [System.Text.Encoding]::UTF8;
try
{
return Main-WithRepositoryAsWorkingFolder;
}
catch
{
Write-Host "$_ $($_.ScriptStackTrace)";
return 2;
}
finally
{
Set-Location -LiteralPath $originalGetLocation;
[System.IO.Directory]::SetCurrentDirectory($originalCurrentDirectory);
$OutputEncoding = $originalOutputEncoding;
[Console]::OutputEncoding = $originalConsoleOutputEncoding;
[Console]::InputEncoding = $originalConsoleInputEncoding;
}
}
exit (Main -RepositoryLocalPath $RepositoryLocalPath);
我试图找到一种方法来确定 git 存储库中的文件是否由 git-lfs 正确管理。
到目前为止,我发现比较 git lfs ls-files
和 git ls-files ':(attr:filter=lfs)'
的结果似乎可以满足我的要求。
Add-Type -AssemblyName 'System.Linq';
[string[]] $actualLfsFilePaths = git lfs ls-files | `
ForEach-Object `
{
#'12345678 * my.dll' - not all lfs versions support -n flag, so it is better to be conservative
$_.Split(' ', 3)[2]
};
[string[]] $shouldBeUnderLfsFilePaths = git ls-files ':(attr:filter=lfs)';
$filePathsNotUnderLfs = [System.Linq.Enumerable]::ToArray(
[System.Linq.Enumerable]::Except($shouldBeUnderLfsFilePaths, $actualLfsFilePaths)
);
$count = $filePathsNotUnderLfs.Count;
Write-Host "There are $($count) lfs files that are not under lfs";
foreach ($filePathNotUnderLfs in $filePathsNotUnderLfs)
{
Write-Host "`t'$filePathNotUnderLfs'";
}
if ($count -gt 0)
{
Write-Error "There are $count lfs files that are not under lfs";
return 1;
}
return 0;
我仍然不确定这是一种可靠的方法。那么,它是reliable/correct方式吗?或者也许有其他可用的选择?
P.S.: 我想要实现的是创建一个拉取请求验证构建(对于 TFS 存储库),它将检测经常发生的 lfs 违规(提交的文件未添加到 lfs)或其他原因,并导致不必要的警告和回购膨胀,最好的情况是主要 nuisances 最坏的情况。
编辑: 嗯,发现当前代码有一些问题:
- empty files处理
- octal encoding for non-ASCII chars in ls-files
- 符号链接
但它们相对容易修复,不会改变问题的要点。
我还没有找到它是否是一个真正可靠的方法,但到目前为止我的测试似乎证实了这一点。
我们最终使用以下脚本作为构建管道的一部分:
#See
[CmdletBinding()]
[OutputType([int])]
Param (
[Parameter(Mandatory = $true)]
[string]
$RepositoryLocalPath
)
$ErrorActionPreference = "Stop";
function Assert-LastExitCodeIs0
{
if ($LASTEXITCODE -ne 0)
{
throw (New-Object System.InvalidOpertationException("LASTEXITCODE is $LASTEXITCODE"));
}
}
function Get-StringFromOctal
{
[CmdLetBinding()]
[OutputType([string])]
Param (
[Parameter(Mandatory = $true)]
[string]
$String
)
if ([String]::IsNullOrEmpty($String))
{
return $String;
}
if (($String[0] -ne '"') -or
($String[$String.Length - 1] -ne '"'))
{
return $String;
}
if ($String.Length -lt 2)
{
return $String;
}
$String = $String.Substring(1, $String.Length -2);
[regex] $regex = '(\[0-9]+)+';
$encoding = [System.Text.Encoding]::UTF8;
return $regex.Replace(
$String,
{
Param ($Capture)
$bytes = $Capture.Groups[1].Captures |
ForEach-Object { [System.Convert]::ToByte($_.Value.Substring(1), 8) };
return $encoding.GetString(@($bytes));
});
}
function Normalize-GitLsFilesPath
{
[CmdLetBinding()]
[OutputType([string])]
Param (
[Parameter(Mandatory = $true)]
[string]
$Path
)
# Normalize octets paths
# Technically we could try ls-files -z, but there is no such option for lfs ls-files.
# Or we could try to set core.quotePath to 0, but it brings a slew of problems with encodings and etc.
# And by doing so we would change git config settings, that is not very desirable, even if we will always(?) succeed in reverting it.
# So it is seems simpler just to parse octal paths.
return Get-StringFromOctal -String ($Path.Trim());
}
function Is-FileEmpty
{
[CmdLetBinding()]
[OutputType([bool])]
Param (
[Parameter(Mandatory = $true)]
[string]
$Path
)
return ((Get-Item -LiteralPath $Path).Length -eq 0);
}
function Is-NotEmptyLfsFile
{
[CmdLetBinding()]
[OutputType([bool])]
Param (
[Parameter(Mandatory = $true)]
[string]
$Path
)
# Empty files will be empty files whether they are under lfs or not - https://github.com/git-lfs/git-lfs/issues/2863
# But they seem not to get the lfs attributes, so we have no other choice but to filter them.
return !(Is-FileEmpty -Path $Path);
}
function Get-ActualLfsFilePaths
{
[CmdletBinding()]
[OutputType([string[]])]
Param()
Write-Host 'Getting actual lfs file paths.';
$lfsLsFilesOutput = git lfs ls-files;
Assert-LastExitCodeIs0;
$actualLfsFilePaths = ($lfsLsFilesOutput | `
ForEach-Object `
{
#'12345678 * my.dll' - not all lfs versions support -n flag, so it is better to be conservative
$lsFilePath = $_.Split(' ', 3)[2];
return Normalize-GitLsFilesPath -Path $lsFilePath;
});
Write-Host "There are $($actualLfsFilePaths.Count) files that are actually under lfs";
return , $actualLfsFilePaths;
}
function Get-ShouldBeLfsFilePaths
{
[CmdletBinding()]
[OutputType([string[]])]
Param()
Write-Host 'Getting should be under lfs file paths.';
[string[]] $shouldBeUnderLfsFilePaths = git ls-files -s ':(attr:filter=lfs)';
Assert-LastExitCodeIs0;
$shouldBeUnderLfsFilePaths = ($shouldBeUnderLfsFilePaths | `
ForEach-Object `
{
#120000 1234567890abcdef 0 mylib.dylib
, $_.Split($null, 4);
} | `
Where `
{
$symlinkBitMaskString = '120000';
$fileChmodString = $_[0];
return ($fileChmodString -ne $symlinkBitMaskString); # Perhaps we should check actual bitmask?
} | `
ForEach-Object `
{
Normalize-GitLsFilesPath -Path $_[3];
});
Write-Host "There are $($shouldBeUnderLfsFilePaths.Count) files that should be under lfs (may include flukes - empty files)";
return , $shouldBeUnderLfsFilePaths;
}
function Get-LfsFilePathsNotUnderLfs
{
[CmdletBinding()]
[OutputType([string[]])]
Param()
[string[]] $actualLfsFilePaths = Get-ActualLfsFilePaths;
[string[]] $shouldBeUnderLfsFilePaths = Get-ShouldBeLfsFilePaths;
Add-Type -AssemblyName 'System.Linq';
Write-Host 'Comparing actual lfs and should be under lfs files.'
return , [System.Linq.Enumerable]::ToArray(
[System.Linq.Enumerable]::Where(
[System.Linq.Enumerable]::Except($shouldBeUnderLfsFilePaths, $actualLfsFilePaths),
[Func[String, Boolean]] ${function:Is-NotEmptyLfsFile}
)
);
}
function Main-WithRepositoryAsWorkingFolder
{
$filePathsNotUnderLfs = Get-LfsFilePathsNotUnderLfs;
$count = $filePathsNotUnderLfs.Count;
Write-Host "There are $($count) lfs files that are not under lfs";
foreach ($filePathNotUnderLfs in $filePathsNotUnderLfs)
{
Write-Host "`t'$filePathNotUnderLfs'";
}
if ($count -eq 0)
{
return 0;
}
return 1;
}
function Main
{
[CmdletBinding()]
[OutputType([int])]
Param (
[Parameter(Mandatory = $true)]
[string]
$RepositoryLocalPath
)
Write-Host "RepositoryLocalPath = $RepositoryLocalPath";
# It is simpler to set working dir rather than use Process.Start with WorkingDirectory everywhere, and more reliable than Start-Process (that one may in some cases have issues with return exit code).
$originalGetLocation = (Get-Location).Path;
Set-Location -LiteralPath $RepositoryLocalPath;
$originalCurrentDirectory = [System.IO.Directory]::GetCurrentDirectory();
[System.IO.Directory]::SetCurrentDirectory($RepositoryLocalPath);
# Otherwise we won't get proper file paths from git lfs ls-files, as it doesn't use octal encoding
# And using output redirection may lead us to even bigger problems|difficulties.
$originalOutputEncoding = $OutputEncoding;
$OutputEncoding = [System.Text.Encoding]::UTF8;
$originalConsoleOutputEncoding = [Console]::OutputEncoding;
[Console]::OutputEncoding = [System.Text.Encoding]::UTF8;
$originalConsoleInputEncoding = [Console]::InputEncoding;
[Console]::InputEncoding = [System.Text.Encoding]::UTF8;
try
{
return Main-WithRepositoryAsWorkingFolder;
}
catch
{
Write-Host "$_ $($_.ScriptStackTrace)";
return 2;
}
finally
{
Set-Location -LiteralPath $originalGetLocation;
[System.IO.Directory]::SetCurrentDirectory($originalCurrentDirectory);
$OutputEncoding = $originalOutputEncoding;
[Console]::OutputEncoding = $originalConsoleOutputEncoding;
[Console]::InputEncoding = $originalConsoleInputEncoding;
}
}
exit (Main -RepositoryLocalPath $RepositoryLocalPath);