将文本文件转换为 CSV 文件
converting text file to CSV file
我已经编写了一些 powershell 代码来读取一个非常大的 .txt 文件,select 某些行并将它们放入 CSV 中。问题是文件格式如下:
header1: Data1
header2: Data1
header3: Data1
header4: Data1
header1: Data2
header2: Data2
header3: Data2
header4: Data2
我需要将其转换为:
Header1,Header2,Header3,Header4
data1,data1,data1,data1
data2,data2,data2,data2
代码是这样的:
$path = get-location
$textfile = Get-FileName $env:USERPROFILE\Downloads\
$writefile = "$path\data2.csv"
$reader = [System.IO.File]::OpenText($textfile)
$writer = New-Object System.IO.StreamWriter $writefile
$writer.WriteLine('{0},{1},{2},{3}', "Policy","Schedule Type","Retention Level","Host")
for(;;) {
$line = $reader.ReadLine() #
if ($null -eq $line) {
break
}
$data = $line.Split(":")
if ($null -ne $data[0]) {
$newdata0 = $data[0].trimstart(" ")
}
if ($null -ne $data[1]) {
$newdata1 = $data[1].trimstart(" ")
}
if ($newdata0 -eq "Policy") {$writer.WriteLine('{0},{1},{2},{3}', $newdata1,$null,$null,$null)}
if ($newdata0 -eq "Schedule Type") {$writer.WriteLine('{0},{1},{2},{3}', $null,$newdata1,$null,$null)}
if ($newdata0 -eq "Retention Level") {$writer.WriteLine('{0},{1},{2},{3}', $null,$null,$newdata1,$null)}
if ($newdata0 -eq "Host") {$writer.WriteLine('{0},{1},{2},{3}', $null,$null,$null,$newdata1)}
}
$reader.Close()
$writer.Close()
但我最终得到了这个(实际数据):
Policy,Schedule Type,Retention Level,Host
FS-Win-Servers-Tokyo-DACS_ONLY,,,
,FULL (0),,
,,infinity (9),
,,,opback03e.options-it.com
DB-Win-Exch2013-ADB11,,,
,INCR (1),,
,,6 months (6),
,,,opback03e.options-it.com
DB-Win-Exch2013-MDB11,,,
,INCR (1),,
,,6 months (6),
,,,opback03e.options-it.com
DB-Win-Exch2013-MDB10,,,
,INCR (1),,
,,6 months (6),
,,,opback03e.options-it.com
我想我在处理这段代码时出错了,或者可能只是需要找到一种方法来重新格式化 csv?
我倾向于寻找重复的东西作为记录分隔符(替换行尾)。
$header = New-Object System.Collections.Generic.List[String]
Get-Content test.txt | Where-Object { $_ -match '(?<Header>[^:]+): *(?<Value>.+)$' } | ForEach-Object {
if ($header.Contains($matches.Header)) {
# End of record start again.
$header.Clear()
# Output
$psObject
}
if ($header.Count -eq 0) {
# Start of the record. Create an object to hold it.
$psObject = New-Object PSObject
}
# Add the current header and value to the object.
$psObject | Add-Member $matches.Header $matches.Value
# Add the header name to the record controller
$header.Add($matches.Header)
}
# Output the last entry from the file (no end detection)
$psObject
您的问题是每次调用 $writer.WriteLine 都会推进您正在写入的目标文件中的行。您需要收集每个循环的信息,但每 4 个循环只写入一次,这可能有效:
$loopCounter = 0
for(;;) {
$line = $reader.ReadLine() #
if ($null -eq $line) {
break
}
$data = $line.Split(":")
if ($null -ne $data[0]) {
$newdata0 = $data[0].trimstart(" ")
}
if ($null -ne $data[1]) {
$newdata1 = $data[1].trimstart(" ")
}
if ($newdata0 -eq "Policy") {$data1=$newdata}
if ($newdata0 -eq "Schedule Type") {$data2=$newdata}
if ($newdata0 -eq "Retention Level") {$data3=$newdata}
if ($newdata0 -eq "Host") {$data4=$newdata}
if (($loopCounter % 4) -eq 3) {$writer.WriteLine('{0},{1},{2},{3}', $data1, $data2, $data3, $data4)}
$loopCounter++
}
我已经编写了一些 powershell 代码来读取一个非常大的 .txt 文件,select 某些行并将它们放入 CSV 中。问题是文件格式如下:
header1: Data1
header2: Data1
header3: Data1
header4: Data1
header1: Data2
header2: Data2
header3: Data2
header4: Data2
我需要将其转换为:
Header1,Header2,Header3,Header4
data1,data1,data1,data1
data2,data2,data2,data2
代码是这样的:
$path = get-location
$textfile = Get-FileName $env:USERPROFILE\Downloads\
$writefile = "$path\data2.csv"
$reader = [System.IO.File]::OpenText($textfile)
$writer = New-Object System.IO.StreamWriter $writefile
$writer.WriteLine('{0},{1},{2},{3}', "Policy","Schedule Type","Retention Level","Host")
for(;;) {
$line = $reader.ReadLine() #
if ($null -eq $line) {
break
}
$data = $line.Split(":")
if ($null -ne $data[0]) {
$newdata0 = $data[0].trimstart(" ")
}
if ($null -ne $data[1]) {
$newdata1 = $data[1].trimstart(" ")
}
if ($newdata0 -eq "Policy") {$writer.WriteLine('{0},{1},{2},{3}', $newdata1,$null,$null,$null)}
if ($newdata0 -eq "Schedule Type") {$writer.WriteLine('{0},{1},{2},{3}', $null,$newdata1,$null,$null)}
if ($newdata0 -eq "Retention Level") {$writer.WriteLine('{0},{1},{2},{3}', $null,$null,$newdata1,$null)}
if ($newdata0 -eq "Host") {$writer.WriteLine('{0},{1},{2},{3}', $null,$null,$null,$newdata1)}
}
$reader.Close()
$writer.Close()
但我最终得到了这个(实际数据):
Policy,Schedule Type,Retention Level,Host
FS-Win-Servers-Tokyo-DACS_ONLY,,,
,FULL (0),,
,,infinity (9),
,,,opback03e.options-it.com
DB-Win-Exch2013-ADB11,,,
,INCR (1),,
,,6 months (6),
,,,opback03e.options-it.com
DB-Win-Exch2013-MDB11,,,
,INCR (1),,
,,6 months (6),
,,,opback03e.options-it.com
DB-Win-Exch2013-MDB10,,,
,INCR (1),,
,,6 months (6),
,,,opback03e.options-it.com
我想我在处理这段代码时出错了,或者可能只是需要找到一种方法来重新格式化 csv?
我倾向于寻找重复的东西作为记录分隔符(替换行尾)。
$header = New-Object System.Collections.Generic.List[String]
Get-Content test.txt | Where-Object { $_ -match '(?<Header>[^:]+): *(?<Value>.+)$' } | ForEach-Object {
if ($header.Contains($matches.Header)) {
# End of record start again.
$header.Clear()
# Output
$psObject
}
if ($header.Count -eq 0) {
# Start of the record. Create an object to hold it.
$psObject = New-Object PSObject
}
# Add the current header and value to the object.
$psObject | Add-Member $matches.Header $matches.Value
# Add the header name to the record controller
$header.Add($matches.Header)
}
# Output the last entry from the file (no end detection)
$psObject
您的问题是每次调用 $writer.WriteLine 都会推进您正在写入的目标文件中的行。您需要收集每个循环的信息,但每 4 个循环只写入一次,这可能有效:
$loopCounter = 0
for(;;) {
$line = $reader.ReadLine() #
if ($null -eq $line) {
break
}
$data = $line.Split(":")
if ($null -ne $data[0]) {
$newdata0 = $data[0].trimstart(" ")
}
if ($null -ne $data[1]) {
$newdata1 = $data[1].trimstart(" ")
}
if ($newdata0 -eq "Policy") {$data1=$newdata}
if ($newdata0 -eq "Schedule Type") {$data2=$newdata}
if ($newdata0 -eq "Retention Level") {$data3=$newdata}
if ($newdata0 -eq "Host") {$data4=$newdata}
if (($loopCounter % 4) -eq 3) {$writer.WriteLine('{0},{1},{2},{3}', $data1, $data2, $data3, $data4)}
$loopCounter++
}