使用 gawk Sum total last column 并将 sum 精确地附加到最后一列的末尾以保持格式 - 首选就地编辑

Using gawk Sum total last column and append sum exactly to the end of last column maintaining the formatting - preferred inplace edit

首选就地编辑 有条件地转换大小列并将总计附加到最后一列末尾 数据:
列数:

"File_Name","Owner","Mod","Size"
"1","A",","mp4","369.775MB"
"2","B","mp4","363.806MB"
"3","C","txt","441.835MB"
"4","D","pdf","373.809MB"
"5","U","ps","363.845MB"
"6","E","svg","34MB"
"7","F","mkv","408GB"
"8","G","log","3KB"
"9","H","log","34KB"
"10","I","log","39KB" 

为 $NF。
预期输出:

"1","A",","mp4","369.775MB"
"2","B","mp4","363.806MB"
"3","C","txt","441.835MB"
"4","D","pdf","373.809MB"
"5","U","ps","363.845MB"
"6","E","svg","34MB"
"7","F","mkv","408GB"
"8","G","log","3KB"
"9","H","log","34KB"
"10","I","log","39KB" 
               Total:"409.92GB"


BEGIN {
OFS=FS=","
}
{
if ($NF~/GB/) {gsub(/"/, "", $NF); sum+=($NF*1024*1024*1024); gsub(/.*/,"\"&\"",$NF)}; 
if ($NF~/MB/) {gsub(/"/, "", $NF);sum+=($NF*1024*1024); gsub(/.*/,"\"&\"",$NF)}; 
if ($NF~/KB/) {gsub(/"/, "", $NF);sum+=($NF*1024); gsub(/.*/,"\"&\"",$NF)} } 
END {
if (sum >= 1099511627776) { sum=sum/1024/1024/1024/1024; gsub(/.*/, "&TB", sum) }; 
if (sum >= 1073741824 && sum < 1099511627776) { sum=sum/1024/1024/1024; gsub(/.*/, "&GB", sum) }; 
if (sum < 1073741824 && sum >= 1048576) {sum=sum/1024/1024; gsub(/.*/, "&MB", sum)}; 
if (sum < 1048576) {sum=sum/1024; gsub(/.*/, "&KB", sum)1} print "Total: \""sum"\""
}1

以上代码可以得到所需的总和。

但是如何在最后一行的末尾准确打印总和,像漂亮的打印一样正确对齐 table。

逻辑1)获取所有行的长度和平均值

BEGIN { 
FS=","
OFS=FS
}
{
len+=(length)
if ($NF~/GB/) {gsub(/"/, "", $NF); sum+=($NF*1024*1024*1024); gsub(/.*/,"\"&\"",$NF)}; 
if ($NF~/MB/) {gsub(/"/, "", $NF);sum+=($NF*1024*1024); gsub(/.*/,"\"&\"",$NF)}; 
if ($NF~/KB/) {gsub(/"/, "", $NF);sum+=($NF*1024); gsub(/.*/,"\"&\"",$NF)} } 
END {
len=int(len/NR+1)
if (sum >= 1099511627776) { sum=sum/1024/1024/1024/1024; gsub(/.*/, "&TB", sum) }; 
if (sum >= 1073741824 && sum < 1099511627776) { sum=sum/1024/1024/1024; gsub(/.*/, "&GB", sum) }; 
if (sum < 1073741824 && sum >= 1048576) {sum=sum/1024/1024; gsub(/.*/, "&MB", sum)}; 
if (sum < 1048576) {sum=sum/1024; gsub(/.*/, "&KB", sum)1} 
printf "%*s:\"\n", len, "Total:" "\""sum"\""
}1

逻辑 2) 获取所有线的长度和平均值 - 还将文件从最长行排序到最短行以优化输出

BEGIN { 
FS=","
OFS=FS
}
{
f = length
m[f] = m[f] ? m[f] RS [=15=] : [=15=]
len+=(length)
if ($NF~/GB/) {gsub(/"/, "", $NF); sum+=($NF*1024*1024*1024); gsub(/.*/,"\"&\"",$NF)}; 
if ($NF~/MB/) {gsub(/"/, "", $NF);sum+=($NF*1024*1024); gsub(/.*/,"\"&\"",$NF)}; 
if ($NF~/KB/) {gsub(/"/, "", $NF);sum+=($NF*1024); gsub(/.*/,"\"&\"",$NF)} } 
END {
len=int(len/NR+1)
if (sum >= 1099511627776) { sum=sum/1024/1024/1024/1024; gsub(/.*/, "&TB", sum) }; 
if (sum >= 1073741824 && sum < 1099511627776) { sum=sum/1024/1024/1024; gsub(/.*/, "&GB", sum) }; 
if (sum < 1073741824 && sum >= 1048576) {sum=sum/1024/1024; gsub(/.*/, "&MB", sum)}; 
if (sum < 1048576) {sum=sum/1024; gsub(/.*/, "&KB", sum)1} 
for (f in m) q[++x] = m[f]
while (x) print q[x--]
printf "%*s:\"\n", len, "Total:" "\""sum"\""
}

逻辑3)获取最频繁的行长度然后使用

逻辑 4) 使用最后一行长度不超过双引号并在打印总计之前打印那么多 space 这样总计将与最后一条记录的最后一个字段对齐。

如图

邀请其他类似结果的漂亮打印逻辑

一个awk想法:

awk -F'"' '                                # input field delimiter == double quote
BEGIN { kb=1024
        mb=kb*kb
        gb=mb*kb
        tb=gb*kb

        factors[""]=1
        factors["KB"]=kb
        factors["MB"]=mb
        factors["GB"]=gb
        factors["TB"]=tb

        sum=0
      }
      { print

        value=unit=toupper( $(NF-1) )      # make copies of $(NF-1) and ...
        gsub(/KB|MB|GB/,"",value)          # strip off unit to get value
        gsub(/[-.0-9]/,"",unit)            # strip off number to get unit

        sum+= (value * factors[unit])
      }
END   {      if (sum >= tb) unit="TB"
        else if (sum >= gb) unit="GB"
        else if (sum >= mb) unit="MB"
        else if (sum >= kb) unit="KB"
        else                unit=""

        # using OPs 3rd definition of "alignment":
        #
        #     Use last line length upto " and print that much space
        #     before printing total. This way total will align to last
        #     field of last record
        #
        # use regex to define start of last comma-delimited field;
        # use match() to find start position of regex and add offset
        # for string "Total"; store value in "x" variable; feed "x"
        # to "printf"; if "x=17" then format string "%*s" becomes "%17s"

        regex=",[^,]*$"
        x=match([=10=],regex) + 5

        printf "%*s:\"%.3f%s\"\n", x, "Total", (sum/factors[unit]), unit
      }
' raw.dat

这会生成:

"1","A",","mp4","369.775MB"
"2","B","mp4","363.806MB"
"3","C","txt","441.835MB"
"4","D","pdf","373.809MB"
"5","U","ps","363.845MB"
"6","E","svg","34MB"
"7","F","mkv","408GB"
"8","G","log","3KB"
"9","H","log","34KB"
"10","I","log","39KB"
               Total:"409.902GB"