使用 gawk Sum total last column 并将 sum 精确地附加到最后一列的末尾以保持格式 - 首选就地编辑
Using gawk Sum total last column and append sum exactly to the end of last column maintaining the formatting - preferred inplace edit
首选就地编辑
有条件地转换大小列并将总计附加到最后一列末尾
数据:
列数:
"File_Name","Owner","Mod","Size"
"1","A",","mp4","369.775MB"
"2","B","mp4","363.806MB"
"3","C","txt","441.835MB"
"4","D","pdf","373.809MB"
"5","U","ps","363.845MB"
"6","E","svg","34MB"
"7","F","mkv","408GB"
"8","G","log","3KB"
"9","H","log","34KB"
"10","I","log","39KB"
为 $NF。
预期输出:
"1","A",","mp4","369.775MB"
"2","B","mp4","363.806MB"
"3","C","txt","441.835MB"
"4","D","pdf","373.809MB"
"5","U","ps","363.845MB"
"6","E","svg","34MB"
"7","F","mkv","408GB"
"8","G","log","3KB"
"9","H","log","34KB"
"10","I","log","39KB"
Total:"409.92GB"
BEGIN {
OFS=FS=","
}
{
if ($NF~/GB/) {gsub(/"/, "", $NF); sum+=($NF*1024*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/MB/) {gsub(/"/, "", $NF);sum+=($NF*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/KB/) {gsub(/"/, "", $NF);sum+=($NF*1024); gsub(/.*/,"\"&\"",$NF)} }
END {
if (sum >= 1099511627776) { sum=sum/1024/1024/1024/1024; gsub(/.*/, "&TB", sum) };
if (sum >= 1073741824 && sum < 1099511627776) { sum=sum/1024/1024/1024; gsub(/.*/, "&GB", sum) };
if (sum < 1073741824 && sum >= 1048576) {sum=sum/1024/1024; gsub(/.*/, "&MB", sum)};
if (sum < 1048576) {sum=sum/1024; gsub(/.*/, "&KB", sum)1} print "Total: \""sum"\""
}1
以上代码可以得到所需的总和。
但是如何在最后一行的末尾准确打印总和,像漂亮的打印一样正确对齐 table。
逻辑1)获取所有行的长度和平均值
BEGIN {
FS=","
OFS=FS
}
{
len+=(length)
if ($NF~/GB/) {gsub(/"/, "", $NF); sum+=($NF*1024*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/MB/) {gsub(/"/, "", $NF);sum+=($NF*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/KB/) {gsub(/"/, "", $NF);sum+=($NF*1024); gsub(/.*/,"\"&\"",$NF)} }
END {
len=int(len/NR+1)
if (sum >= 1099511627776) { sum=sum/1024/1024/1024/1024; gsub(/.*/, "&TB", sum) };
if (sum >= 1073741824 && sum < 1099511627776) { sum=sum/1024/1024/1024; gsub(/.*/, "&GB", sum) };
if (sum < 1073741824 && sum >= 1048576) {sum=sum/1024/1024; gsub(/.*/, "&MB", sum)};
if (sum < 1048576) {sum=sum/1024; gsub(/.*/, "&KB", sum)1}
printf "%*s:\"\n", len, "Total:" "\""sum"\""
}1
逻辑 2) 获取所有线的长度和平均值 -
还将文件从最长行排序到最短行以优化输出
BEGIN {
FS=","
OFS=FS
}
{
f = length
m[f] = m[f] ? m[f] RS [=15=] : [=15=]
len+=(length)
if ($NF~/GB/) {gsub(/"/, "", $NF); sum+=($NF*1024*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/MB/) {gsub(/"/, "", $NF);sum+=($NF*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/KB/) {gsub(/"/, "", $NF);sum+=($NF*1024); gsub(/.*/,"\"&\"",$NF)} }
END {
len=int(len/NR+1)
if (sum >= 1099511627776) { sum=sum/1024/1024/1024/1024; gsub(/.*/, "&TB", sum) };
if (sum >= 1073741824 && sum < 1099511627776) { sum=sum/1024/1024/1024; gsub(/.*/, "&GB", sum) };
if (sum < 1073741824 && sum >= 1048576) {sum=sum/1024/1024; gsub(/.*/, "&MB", sum)};
if (sum < 1048576) {sum=sum/1024; gsub(/.*/, "&KB", sum)1}
for (f in m) q[++x] = m[f]
while (x) print q[x--]
printf "%*s:\"\n", len, "Total:" "\""sum"\""
}
逻辑3)获取最频繁的行长度然后使用
逻辑 4) 使用最后一行长度不超过双引号并在打印总计之前打印那么多 space
这样总计将与最后一条记录的最后一个字段对齐。
如图
邀请其他类似结果的漂亮打印逻辑
一个awk
想法:
awk -F'"' ' # input field delimiter == double quote
BEGIN { kb=1024
mb=kb*kb
gb=mb*kb
tb=gb*kb
factors[""]=1
factors["KB"]=kb
factors["MB"]=mb
factors["GB"]=gb
factors["TB"]=tb
sum=0
}
{ print
value=unit=toupper( $(NF-1) ) # make copies of $(NF-1) and ...
gsub(/KB|MB|GB/,"",value) # strip off unit to get value
gsub(/[-.0-9]/,"",unit) # strip off number to get unit
sum+= (value * factors[unit])
}
END { if (sum >= tb) unit="TB"
else if (sum >= gb) unit="GB"
else if (sum >= mb) unit="MB"
else if (sum >= kb) unit="KB"
else unit=""
# using OPs 3rd definition of "alignment":
#
# Use last line length upto " and print that much space
# before printing total. This way total will align to last
# field of last record
#
# use regex to define start of last comma-delimited field;
# use match() to find start position of regex and add offset
# for string "Total"; store value in "x" variable; feed "x"
# to "printf"; if "x=17" then format string "%*s" becomes "%17s"
regex=",[^,]*$"
x=match([=10=],regex) + 5
printf "%*s:\"%.3f%s\"\n", x, "Total", (sum/factors[unit]), unit
}
' raw.dat
这会生成:
"1","A",","mp4","369.775MB"
"2","B","mp4","363.806MB"
"3","C","txt","441.835MB"
"4","D","pdf","373.809MB"
"5","U","ps","363.845MB"
"6","E","svg","34MB"
"7","F","mkv","408GB"
"8","G","log","3KB"
"9","H","log","34KB"
"10","I","log","39KB"
Total:"409.902GB"
首选就地编辑
有条件地转换大小列并将总计附加到最后一列末尾
数据:
列数:
"File_Name","Owner","Mod","Size"
"1","A",","mp4","369.775MB"
"2","B","mp4","363.806MB"
"3","C","txt","441.835MB"
"4","D","pdf","373.809MB"
"5","U","ps","363.845MB"
"6","E","svg","34MB"
"7","F","mkv","408GB"
"8","G","log","3KB"
"9","H","log","34KB"
"10","I","log","39KB"
为 $NF。
预期输出:
"1","A",","mp4","369.775MB"
"2","B","mp4","363.806MB"
"3","C","txt","441.835MB"
"4","D","pdf","373.809MB"
"5","U","ps","363.845MB"
"6","E","svg","34MB"
"7","F","mkv","408GB"
"8","G","log","3KB"
"9","H","log","34KB"
"10","I","log","39KB"
Total:"409.92GB"
BEGIN {
OFS=FS=","
}
{
if ($NF~/GB/) {gsub(/"/, "", $NF); sum+=($NF*1024*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/MB/) {gsub(/"/, "", $NF);sum+=($NF*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/KB/) {gsub(/"/, "", $NF);sum+=($NF*1024); gsub(/.*/,"\"&\"",$NF)} }
END {
if (sum >= 1099511627776) { sum=sum/1024/1024/1024/1024; gsub(/.*/, "&TB", sum) };
if (sum >= 1073741824 && sum < 1099511627776) { sum=sum/1024/1024/1024; gsub(/.*/, "&GB", sum) };
if (sum < 1073741824 && sum >= 1048576) {sum=sum/1024/1024; gsub(/.*/, "&MB", sum)};
if (sum < 1048576) {sum=sum/1024; gsub(/.*/, "&KB", sum)1} print "Total: \""sum"\""
}1
以上代码可以得到所需的总和。
但是如何在最后一行的末尾准确打印总和,像漂亮的打印一样正确对齐 table。
逻辑1)获取所有行的长度和平均值
BEGIN {
FS=","
OFS=FS
}
{
len+=(length)
if ($NF~/GB/) {gsub(/"/, "", $NF); sum+=($NF*1024*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/MB/) {gsub(/"/, "", $NF);sum+=($NF*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/KB/) {gsub(/"/, "", $NF);sum+=($NF*1024); gsub(/.*/,"\"&\"",$NF)} }
END {
len=int(len/NR+1)
if (sum >= 1099511627776) { sum=sum/1024/1024/1024/1024; gsub(/.*/, "&TB", sum) };
if (sum >= 1073741824 && sum < 1099511627776) { sum=sum/1024/1024/1024; gsub(/.*/, "&GB", sum) };
if (sum < 1073741824 && sum >= 1048576) {sum=sum/1024/1024; gsub(/.*/, "&MB", sum)};
if (sum < 1048576) {sum=sum/1024; gsub(/.*/, "&KB", sum)1}
printf "%*s:\"\n", len, "Total:" "\""sum"\""
}1
逻辑 2) 获取所有线的长度和平均值 - 还将文件从最长行排序到最短行以优化输出
BEGIN {
FS=","
OFS=FS
}
{
f = length
m[f] = m[f] ? m[f] RS [=15=] : [=15=]
len+=(length)
if ($NF~/GB/) {gsub(/"/, "", $NF); sum+=($NF*1024*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/MB/) {gsub(/"/, "", $NF);sum+=($NF*1024*1024); gsub(/.*/,"\"&\"",$NF)};
if ($NF~/KB/) {gsub(/"/, "", $NF);sum+=($NF*1024); gsub(/.*/,"\"&\"",$NF)} }
END {
len=int(len/NR+1)
if (sum >= 1099511627776) { sum=sum/1024/1024/1024/1024; gsub(/.*/, "&TB", sum) };
if (sum >= 1073741824 && sum < 1099511627776) { sum=sum/1024/1024/1024; gsub(/.*/, "&GB", sum) };
if (sum < 1073741824 && sum >= 1048576) {sum=sum/1024/1024; gsub(/.*/, "&MB", sum)};
if (sum < 1048576) {sum=sum/1024; gsub(/.*/, "&KB", sum)1}
for (f in m) q[++x] = m[f]
while (x) print q[x--]
printf "%*s:\"\n", len, "Total:" "\""sum"\""
}
逻辑3)获取最频繁的行长度然后使用
逻辑 4) 使用最后一行长度不超过双引号并在打印总计之前打印那么多 space 这样总计将与最后一条记录的最后一个字段对齐。
如图
邀请其他类似结果的漂亮打印逻辑
一个awk
想法:
awk -F'"' ' # input field delimiter == double quote
BEGIN { kb=1024
mb=kb*kb
gb=mb*kb
tb=gb*kb
factors[""]=1
factors["KB"]=kb
factors["MB"]=mb
factors["GB"]=gb
factors["TB"]=tb
sum=0
}
{ print
value=unit=toupper( $(NF-1) ) # make copies of $(NF-1) and ...
gsub(/KB|MB|GB/,"",value) # strip off unit to get value
gsub(/[-.0-9]/,"",unit) # strip off number to get unit
sum+= (value * factors[unit])
}
END { if (sum >= tb) unit="TB"
else if (sum >= gb) unit="GB"
else if (sum >= mb) unit="MB"
else if (sum >= kb) unit="KB"
else unit=""
# using OPs 3rd definition of "alignment":
#
# Use last line length upto " and print that much space
# before printing total. This way total will align to last
# field of last record
#
# use regex to define start of last comma-delimited field;
# use match() to find start position of regex and add offset
# for string "Total"; store value in "x" variable; feed "x"
# to "printf"; if "x=17" then format string "%*s" becomes "%17s"
regex=",[^,]*$"
x=match([=10=],regex) + 5
printf "%*s:\"%.3f%s\"\n", x, "Total", (sum/factors[unit]), unit
}
' raw.dat
这会生成:
"1","A",","mp4","369.775MB"
"2","B","mp4","363.806MB"
"3","C","txt","441.835MB"
"4","D","pdf","373.809MB"
"5","U","ps","363.845MB"
"6","E","svg","34MB"
"7","F","mkv","408GB"
"8","G","log","3KB"
"9","H","log","34KB"
"10","I","log","39KB"
Total:"409.902GB"