NF-i 字段的累积和

cumulative sum of NF-i fields

我有

NC_042565.1  1   1  0  0  1  0  0  1  0  0  0  0  8  3  0  0  0  0  0  0  0  0  0
NC_042565.1  2   2  0  0  3  2  0  1  0  0  0  0  7  1  2  1  0  0  0  0  0  0  0
NC_042565.1  3   2  0  0  3  3  0  1  0  0  0  0  7  1  1  2  0  0  0  0  0  0  0
NC_042565.1  4   2  0  0  3  3  0  1  0  0  0  0  7  1  1  2  0  0  0  0  0  0  0
NC_042565.1  5   3  0  0  3  3  0  1  0  0  0  0  7  1  0  3  0  0  0  0  0  0  0

...

我想将第 14 列加到最后,然后将第 15 列加到最后,然后将第 16 列加到最后,依此类推。 这样我就有了

NC_042565.1  1   1  0  0  1  0  0  1  0  0  0  0  11  3  0  0  0  0  0  0  0  0  0
NC_042565.1  2   2  0  0  3  2  0  1  0  0  0  0  11  4  3  1  0  0  0  0  0  0  0
NC_042565.1  3   2  0  0  3  3  0  1  0  0  0  0  11  4  3  2  0  0  0  0  0  0  0
NC_042565.1  4   2  0  0  3  3  0  1  0  0  0  0  11  4  3  2  0  0  0  0  0  0  0
NC_042565.1  5   3  0  0  3  3  0  1  0  0  0  0  11  4  3  3  0  0  0  0  0  0  0

我试过了

awk '{for(i=2;i<=NF;i++){$i=$i+$(i-1)}} 1'  file 

awk '{for(i=15;i<=NF;i++){$i+=(s+=$NF>$i)}}1' file

awk:

$ awk '{
    for (n = 1; n < 15; n++) printf "%s ", $n
    sum = 0
    for (n = 15; n <= NF; n++) sum += $n
    for (n = 15; n < NF; n++) {
      printf "%d ", sum
      sum -= $n
    }
    printf "%d\n", sum
  }' input.txt
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 8 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 7 4 3 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 7 4 3 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 7 4 3 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 7 4 3 3 0 0 0 0 0 0 0

这里的技巧是对所有列求和一次,然后再次循环,每次从该总和中减去当前列,而不是对每一列的所有剩余列求和。使它成为 O(N) 而不是 O(N²)。

使用您展示的示例,请尝试执行以下 awk 程序。

awk '
FNR==NR{
  for(i=14;i<=NF;i++){
    arr[FNR,i]=$i
    sum[FNR]+=$i
  }
  next
}
{
  for(i=14;i<=NF;i++){
    diff=0
    if(i>14){
      for(j=14;j<i;j++){
        diff+=arr[FNR,j]
      }
    }
    $i=(sum[FNR]-diff)
   }
}
1
' Input_file Input_file 

NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 11 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 11 4 3 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 11 4 3 3 0 0 0 0 0 0 0

说明: 为以上添加详细说明。

awk '                        ##Starting awk program from here.
FNR==NR{                     ##Checking condition if FNR==NR then do following.
  for(i=14;i<=NF;i++){       ##Running for loop from 14th field to last field of line.
    arr[FNR,i]=$i            ##Creating arr with index of FNR and i with value of current field.
    sum[FNR]+=$i             ##Creating sum with index of FNR and keep adding all fields value to it.
  }
  next                       ##next will skip all further statements from here.
}
{
  for(i=14;i<=NF;i++){       ##Running for loop from 14th field to till last field here.
    diff=0                   ##Setting diff to 0 here.
    if(i>14){                ##Checking condition if i>14 then do following.
      for(j=14;j<i;j++){     ##Running for loop from 14 to till j<i here.
        diff+=arr[FNR,j]     ##Creating diff and keep adding each field value which we want to subtract to total sum.
      }
    }
    $i=(sum[FNR]-diff)       ##Setting current field value to sum[FNR](total sum) - diff here.
   }
}
1                            ##Printing current line.
' file1 file1                ##Mentioning Input_file names here.

这很简单:

$ awk '{for(i=NF-1;i>=14;i--)$i+=$(i+1);print}' file
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 11 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 11 4 3 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 11 4 3 3 0 0 0 0 0 0 0

说明:我们只是按照从 $(NF-1)</code> 的相反顺序处理字段 (<code>$i)。我们通过添加下一个字段 ($i+=$(i+1)) 即时修改 $i。当我们以相反的顺序进行时,$(i+1) 已经是 $(i+1)$NF 原始字段的总和。因此,当将 $(i+1) 添加到 $i 时,$i 本身成为 $i$NF 原始字段的总和。