Powershell 在文件中搜索字符串并与其他文件进行比较以查找重复项

Powershell searching for string in file and compare with other file in order to find duplication

下面的脚本在位于 VIP 和 ZIP 文件中的文件中搜索字符串 'Package ID='。 每个 VIP 只包含一个 vip.manifest,其中至少有一个包 ID 的 guid ZIP 文件包含一个 VIP 文件。如您所见,内容被提取到临时文件夹并在最后删除。现在我的路径包含许多 VIP 或 ZIP,我需要知道是否存在重复。如果多个清单拥有相同的 GUID 并显示重复文件所在的信息。 当我 运行 时,我可以看到路径

中所有 ZIPs/VIPs 的所有 guid
function checkpackageID([string]$_path)
{
Add-Type -AssemblyName System.IO.Compression, System.IO.Compression.FileSystem

$path = $_path
$tempFolder = Join-Path ([IO.Path]::GetTempPath()) (New-GUID).ToString('n')
$compressedfiles = Get-ChildItem -path $path\* -Include "*.vip","*.zip"

foreach ($file in $compressedfiles) 
{   
    if ($file -like "*.zip")
    {
     try 
     { 
        $zip = [System.IO.Compression.ZipFile]::ExtractToDirectory($file, $tempFolder)
        $test = Get-ChildItem -path $tempFolder\* -Include "*.vip" 
       
        if ($test)
        {
            $zip2 = [System.IO.Compression.ZipFile]::ExtractToDirectory($test, $tempFolder)
            $guidmaps = Get-ChildItem $tempFolder -Include "*.manifest" -Recurse
            write-host    
            foreach($guidmap in $guidmaps) 
            {
               switch -Regex -File($guidmap) { 
               '(?<=<Package ID=")(?<guid>[\d\w-]+)"' {
               [pscustomobject]@{
               Guid = $Matches['guid']
               Path = $guidmap.FullName
            }
        }
    }
}
            $guidmap = $guidmap | Group-Object Guid | Where-Object Count -GT 1 | ForEach-Object Group
              
            }

        $guidmap
     }
     catch 
     {
            Write-Warning $_.Exception.Message
            continue
     }
     finally 
     {
               Remove-Item $tempFolder -Force -Recurse
     }
    }
    elseif ($file -like "*.vip") #vip
    {
     try 
     { 
        $zip = [System.IO.Compression.ZipFile]::ExtractToDirectory($file, $tempFolder)
        $guidmaps = Get-ChildItem $tempFolder -Include "*.manifest" -Recurse
        write-host
        foreach($guidmap in $guidmaps) 
        {            
            switch -Regex -File($guidmap) { 
               '(?<=<Package ID=")(?<guid>[\d\w-]+)"' {
               [pscustomobject]@{
               Guid = $Matches['guid']
               Path = $guidmap.FullName
            }
        }
    }
}
        $guidmap = $guidmap | Group-Object Guid | Where-Object Count -GT 1 | ForEach-Object Group
        $guidmap  
     }
        
     catch 
     {
            Write-Warning $_.Exception.Message
            continue
     }
     finally 
     {
               Remove-Item $tempFolder -Force -Recurse
     }  
    }
     
    }

} 

您可以直接在内存中读取条目,而不是将所有 .manifest 文件从您的 .zip.vip 中提取到一个文件夹中。假设 .zip 中可能包含 .vip 个文件,一种方法是使用将搜索所有 .manifest 个文件的递归函数。使用该函数提取所有 GUID 后,使用 Group-Object 的逻辑将保持不变。

using namespace System.IO
using namespace System.IO.Compression

Add-Type -AssemblyName System.IO.Compression

function Get-ManifestFile {
    [cmdletbinding()]
    param(
        [parameter(ValueFromPipeline, Mandatory)]
        [object] $Path,
        [string] $TargetExtension = '.manifest',
        [string] $Pattern = '(?<=<Package ID=")(?<guid>[\d\w-]+)"',
        [Parameter(DontShow)]
        [string] $Parent
    )

    process {

        try {
            if($Path -isnot [FileInfo]) {
                $zip = [ZipArchive]::new($Path.Open())
                $filePath = $Parent
            }
            else {
                $zip = [ZipFile]::OpenRead($Path.FullName)
                $filePath = $Path.FullName
            }

            foreach($entry in $zip.Entries) {
                # if the entry is a `manifest` file, read it
                if([Path]::GetExtension($entry) -eq $TargetExtension) {
                    try {
                        $handle = $entry.Open()
                        $reader = [StreamReader]::new($handle)
                        while(-not $reader.EndOfStream) {
                            if($reader.ReadLine() -match $Pattern) {
                                [pscustomobject]@{
                                    Guid         = $Matches['guid']
                                    FilePath     = $filePath
                                    ZipEntryPath = $entry.FullName
                                }
                            }
                        }
                    }
                    catch { $PSCmdlet.WriteError($_) }
                    finally {
                        ($reader, $handle).ForEach('Dispose')
                    }
                }
                # if the entry is a `.vip` file use recursion
                if([Path]::GetExtension($entry) -eq '.vip') {
                    Get-ManifestFile -Path $entry -Parent $filePath
                }
            }
        }
        catch { $PSCmdlet.WriteError($_) }
        finally {
            ($path, $zip).ForEach('Dispose')
        }
    }
}

$path = "Define Path Here!!!"
$result = Get-ChildItem $path\* -Include '*.vip', '*.zip' |
    Get-ManifestFile | Group-Object Guid | Where-Object Count -GT 1 |
        ForEach-Object Group

if(-not $result) {
    'No duplicates found.'
}
else { $result }