AWK Attempt to use scalar variable as array 错误

AWK Attempt to use scalar variable as an array error

    #!/usr/bin/awk -f
    BEGIN {
        FS = "[_.]"
    }
    
    function display() {
        if (length(gene_ids) > 1)
            for (j=0; j <= i; j++)
                print a[j]
    }
    
    {
        if (/^>Cluster /) {
            display()
            delete a
            delete gene_ids
            a[i=0] = [=10=]
        } else {
            a[++i] = [=10=]
            gene_ids[] = 1
        }
    }
    
    END {
        display()
    }
>Cluster 0
0   3843aa, >9606_9d1c13f4f2796e1bc5d9c034d256608e_ENSP00000478752_3843_318_ENST00000621744_ENSG00000286185... *
1   3843aa, >9606_9d1c13f4f2796e1bc5d9c034d256608e_ENSP00000498781_3843_318_ENST00000651566_ENSG00000271383... at 1:3843:1:3843/100.00%
>Cluster 17
0   1388aa, >9606_e3f5b4b466cd2bae95842b586d4d5ff5_ENSP00000419786_1388_4_ENST00000465301_ENSG00000243978... *
1   1388aa, >9606_e3f5b4b466cd2bae95842b586d4d5ff5_ENSP00000441452_1388_4_ENST00000540313_ENSG00000243978... at 1:1388:1:1388/100.00%
>Cluster 34
0   1150aa, >9606_c6fca1c116a00dbb0d2e8930f4056625_ENSP00000353655_1150_26_ENST00000360468_ENSG00000196547... *
1   1150aa, >9606_c6fca1c116a00dbb0d2e8930f4056625_ENSP00000452948_1150_26_ENST00000559717_ENSG00000196547... at 1:1150:1:1150/100.00%
>Cluster 39
0   1072aa, >9606_64cead9c681fd594c83c17cc06748bb6_ENSP00000315112_1072_50_ENST00000324103_ENSG00000092098... *
1   1072aa, >9606_64cead9c681fd594c83c17cc06748bb6_ENSP00000457512_1072_50_ENST00000558468_ENSG00000259529... at 1:1072:1:1072/100.00%
>Cluster 271
0       551aa, >9606_95dbfd3f219d32f1cc1074a79bfc576d_ENSP00000415200_551_42_ENST00000429354_ENSG00000268500... *
1       551aa, >9606_95dbfd3f219d32f1cc1074a79bfc576d_ENSP00000470259_551_42_ENST00000599649_ENSG00000268500... at 1:551:1:551/100.00%
2       551aa, >9606_95dbfd3f219d32f1cc1074a79bfc576d_ENSP00000473238_551_42_ENST00000534261_ENSG00000105501... at 1:551:1:551/100.00%
>Cluster 284
0       547aa, >9606_8ed59e1e16a1229b55495ff661b5aa66_ENSP00000354675_547_9_ENST00000361229_ENSG00000198908... *
1       547aa, >9606_8ed59e1e16a1229b55495ff661b5aa66_ENSP00000361820_547_9_ENST00000372735_ENSG00000198908... at 1:547:1:547/100.00%
2       547aa, >9606_8ed59e1e16a1229b55495ff661b5aa66_ENSP00000391722_547_9_ENST00000448867_ENSG00000198908... at 1:547:1:547/100.00%
3       547aa, >9606_8ed59e1e16a1229b55495ff661b5aa66_ENSP00000403226_547_9_ENST00000457056_ENSG00000198908... at 1:547:1:547/100.00%
4       547aa, >9606_8ed59e1e16a1229b55495ff661b5aa66_ENSP00000405893_547_9_ENST00000447531_ENSG00000198908... at 1:547:1:547/100.00%
>Cluster 0
0   3843aa, >9606_9d1c13f4f2796e1bc5d9c034d256608e_ENSP00000478752_3843_318_ENST00000621744_ENSG00000286185... *
1   3843aa, >9606_9d1c13f4f2796e1bc5d9c034d256608e_ENSP00000498781_3843_318_ENST00000651566_ENSG00000271383... at 1:3843:1:3843/100.00%
>Cluster 39
0   1072aa, >9606_64cead9c681fd594c83c17cc06748bb6_ENSP00000315112_1072_50_ENST00000324103_ENSG00000092098... *
1   1072aa, >9606_64cead9c681fd594c83c17cc06748bb6_ENSP00000457512_1072_50_ENST00000558468_ENSG00000259529... at 1:1072:1:1072/100.00%
>Cluster 271
0       551aa, >9606_95dbfd3f219d32f1cc1074a79bfc576d_ENSP00000415200_551_42_ENST00000429354_ENSG00000268500... *
1       551aa, >9606_95dbfd3f219d32f1cc1074a79bfc576d_ENSP00000470259_551_42_ENST00000599649_ENSG00000268500... at 1:551:1:551/100.00%
2       551aa, >9606_95dbfd3f219d32f1cc1074a79bfc576d_ENSP00000473238_551_42_ENST00000534261_ENSG00000105501... at 1:551:1:551/100.00%
(FILENAME=test_cluster FNR=1) fatal: attempt to use scalar `gene_ids' as an array
awk 'BEGIN{a[1]=10;a[2]=20;print length(a)}'

按照建议here

set -o | grep posix

length(gene_ids)gene_ids 声明为标量,如果 gene_ids 以前未使用过,因为历史上 length() 仅用于字符串(该行为将在即将发布的 gawk 版本中更改这样 length() 将不会设置其参数的类型(如果之前未设置)。

delete gene_ids 添加到 BEGIN 部​​分以将其声明为数组,而不管脚本现有行的命中顺序如何,这由您的输入数据驱动:

$ awk 'BEGIN{ length(gene_ids); gene_ids[1] }'
awk: cmd. line:1: fatal: attempt to use scalar `gene_ids' as an array

$ awk 'BEGIN{ delete gene_ids; length(gene_ids); gene_ids[1] }'
$