NF-i 字段的累积和
cumulative sum of NF-i fields
我有
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 8 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 7 1 2 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 7 1 1 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 7 1 1 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 7 1 0 3 0 0 0 0 0 0 0
...
我想将第 14 列加到最后,然后将第 15 列加到最后,然后将第 16 列加到最后,依此类推。
这样我就有了
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 11 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 11 4 3 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 11 4 3 3 0 0 0 0 0 0 0
我试过了
awk '{for(i=2;i<=NF;i++){$i=$i+$(i-1)}} 1' file
和
awk '{for(i=15;i<=NF;i++){$i+=(s+=$NF>$i)}}1' file
与awk
:
$ awk '{
for (n = 1; n < 15; n++) printf "%s ", $n
sum = 0
for (n = 15; n <= NF; n++) sum += $n
for (n = 15; n < NF; n++) {
printf "%d ", sum
sum -= $n
}
printf "%d\n", sum
}' input.txt
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 8 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 7 4 3 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 7 4 3 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 7 4 3 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 7 4 3 3 0 0 0 0 0 0 0
这里的技巧是对所有列求和一次,然后再次循环,每次从该总和中减去当前列,而不是对每一列的所有剩余列求和。使它成为 O(N) 而不是 O(N²)。
使用您展示的示例,请尝试执行以下 awk
程序。
awk '
FNR==NR{
for(i=14;i<=NF;i++){
arr[FNR,i]=$i
sum[FNR]+=$i
}
next
}
{
for(i=14;i<=NF;i++){
diff=0
if(i>14){
for(j=14;j<i;j++){
diff+=arr[FNR,j]
}
}
$i=(sum[FNR]-diff)
}
}
1
' Input_file Input_file
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 11 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 11 4 3 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 11 4 3 3 0 0 0 0 0 0 0
说明: 为以上添加详细说明。
awk ' ##Starting awk program from here.
FNR==NR{ ##Checking condition if FNR==NR then do following.
for(i=14;i<=NF;i++){ ##Running for loop from 14th field to last field of line.
arr[FNR,i]=$i ##Creating arr with index of FNR and i with value of current field.
sum[FNR]+=$i ##Creating sum with index of FNR and keep adding all fields value to it.
}
next ##next will skip all further statements from here.
}
{
for(i=14;i<=NF;i++){ ##Running for loop from 14th field to till last field here.
diff=0 ##Setting diff to 0 here.
if(i>14){ ##Checking condition if i>14 then do following.
for(j=14;j<i;j++){ ##Running for loop from 14 to till j<i here.
diff+=arr[FNR,j] ##Creating diff and keep adding each field value which we want to subtract to total sum.
}
}
$i=(sum[FNR]-diff) ##Setting current field value to sum[FNR](total sum) - diff here.
}
}
1 ##Printing current line.
' file1 file1 ##Mentioning Input_file names here.
这很简单:
$ awk '{for(i=NF-1;i>=14;i--)$i+=$(i+1);print}' file
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 11 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 11 4 3 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 11 4 3 3 0 0 0 0 0 0 0
说明:我们只是按照从 $(NF-1)
到 </code> 的相反顺序处理字段 (<code>$i
)。我们通过添加下一个字段 ($i+=$(i+1)
) 即时修改 $i
。当我们以相反的顺序进行时,$(i+1)
已经是 $(i+1)
到 $NF
原始字段的总和。因此,当将 $(i+1)
添加到 $i
时,$i
本身成为 $i
到 $NF
原始字段的总和。
我有
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 8 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 7 1 2 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 7 1 1 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 7 1 1 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 7 1 0 3 0 0 0 0 0 0 0
...
我想将第 14 列加到最后,然后将第 15 列加到最后,然后将第 16 列加到最后,依此类推。 这样我就有了
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 11 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 11 4 3 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 11 4 3 3 0 0 0 0 0 0 0
我试过了
awk '{for(i=2;i<=NF;i++){$i=$i+$(i-1)}} 1' file
和
awk '{for(i=15;i<=NF;i++){$i+=(s+=$NF>$i)}}1' file
与awk
:
$ awk '{
for (n = 1; n < 15; n++) printf "%s ", $n
sum = 0
for (n = 15; n <= NF; n++) sum += $n
for (n = 15; n < NF; n++) {
printf "%d ", sum
sum -= $n
}
printf "%d\n", sum
}' input.txt
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 8 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 7 4 3 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 7 4 3 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 7 4 3 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 7 4 3 3 0 0 0 0 0 0 0
这里的技巧是对所有列求和一次,然后再次循环,每次从该总和中减去当前列,而不是对每一列的所有剩余列求和。使它成为 O(N) 而不是 O(N²)。
使用您展示的示例,请尝试执行以下 awk
程序。
awk '
FNR==NR{
for(i=14;i<=NF;i++){
arr[FNR,i]=$i
sum[FNR]+=$i
}
next
}
{
for(i=14;i<=NF;i++){
diff=0
if(i>14){
for(j=14;j<i;j++){
diff+=arr[FNR,j]
}
}
$i=(sum[FNR]-diff)
}
}
1
' Input_file Input_file
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 11 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 11 4 3 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 11 4 3 3 0 0 0 0 0 0 0
说明: 为以上添加详细说明。
awk ' ##Starting awk program from here.
FNR==NR{ ##Checking condition if FNR==NR then do following.
for(i=14;i<=NF;i++){ ##Running for loop from 14th field to last field of line.
arr[FNR,i]=$i ##Creating arr with index of FNR and i with value of current field.
sum[FNR]+=$i ##Creating sum with index of FNR and keep adding all fields value to it.
}
next ##next will skip all further statements from here.
}
{
for(i=14;i<=NF;i++){ ##Running for loop from 14th field to till last field here.
diff=0 ##Setting diff to 0 here.
if(i>14){ ##Checking condition if i>14 then do following.
for(j=14;j<i;j++){ ##Running for loop from 14 to till j<i here.
diff+=arr[FNR,j] ##Creating diff and keep adding each field value which we want to subtract to total sum.
}
}
$i=(sum[FNR]-diff) ##Setting current field value to sum[FNR](total sum) - diff here.
}
}
1 ##Printing current line.
' file1 file1 ##Mentioning Input_file names here.
这很简单:
$ awk '{for(i=NF-1;i>=14;i--)$i+=$(i+1);print}' file
NC_042565.1 1 1 0 0 1 0 0 1 0 0 0 0 11 3 0 0 0 0 0 0 0 0 0
NC_042565.1 2 2 0 0 3 2 0 1 0 0 0 0 11 4 3 1 0 0 0 0 0 0 0
NC_042565.1 3 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 4 2 0 0 3 3 0 1 0 0 0 0 11 4 3 2 0 0 0 0 0 0 0
NC_042565.1 5 3 0 0 3 3 0 1 0 0 0 0 11 4 3 3 0 0 0 0 0 0 0
说明:我们只是按照从 $(NF-1)
到 </code> 的相反顺序处理字段 (<code>$i
)。我们通过添加下一个字段 ($i+=$(i+1)
) 即时修改 $i
。当我们以相反的顺序进行时,$(i+1)
已经是 $(i+1)
到 $NF
原始字段的总和。因此,当将 $(i+1)
添加到 $i
时,$i
本身成为 $i
到 $NF
原始字段的总和。