我想用 AWK 读取文件并存储一些变量

I want to read a file and store some variables with AWK

我有一个包含以下内容的文件。它是在设备中查询的结果,因此预计在数据库中找不到某些输入。以下示例是成功和不成功查询的结果。我的意思是第二个示例没有我想要捕获到变量中的所有信息,所以我想忽略这个结果并使用 null/empty 值设置变量。

<INTLPO:ISV=PORTAB NTL="6130290095" VEM=NAO;
VECTURA - SS            BSA002             2020-09-12            09-32
INTLPO:ISV=PORTAB NTL="6130290095" VEM=NAO;
INTERROGACAO DE NUMERO TELEFONICO PARA PORTABILIDADE NUMERICA                   

  TIPO DE ENCAMINHAMENTO POR ASSINANTE
  NTL = 6130290095           OPC = S_INF    RNP = 551      CSP = 25
  EIP = S_INF
  CDO = 00961
  CNL = 61000                NUF = S_INF                   TPB = PREST
  CPT = NAO                  CRE = 125      NUE = S_INF
  DAT = 2014-04-16           HOR = 10:30:20.798609
  TBR = 25
  RST              MAN      RST              MAN      RST              MAN
  2%               934      3%               934      4%               934
  5%               934      6%               934      7%               934
  8%               934      9%               934      9090%            934
  0??%             934      90??%            934      0?0%             934


  TOTAL DE NUMEROS ASSOCIADOS AO SERVICO: 1
<INTLPO:ISV=PORTAB NTL="6160150178" VEM=NAO;
VECTURA - SS            BSA002             2020-09-12            09-32
INTLPO:ISV=PORTAB NTL="6160150178" VEM=NAO;
INTERROGACAO DE NUMERO TELEFONICO PARA PORTABILIDADE NUMERICA                   

  ME:  NENHUM NUMERO CADASTRADO ATENDE AS ESPECIFICACOES

我有以下部分正确的代码。结果有点乱(线条重复,甚至有错误的值)。

awk -F ' ' 'BEGIN { OFS="," }
            /^VECTURA/ { equipment = ; data =  }
            /^INTLPO/ { numero = }
            /^\s*NTL/ { ntl =  ; opc = ; rnp = ; csp = }
            /^\s*EIP/ { eip = }
            /^\s*CDO/ { cdo = }
            /^\s*CNL/ { cnl = ; nuf = ; tpb = }
            /^\s*CPT/ { cpt = ; cre = ; nue = }
            /^\s*DAT/ { dat = ; hor = }
            /^\s*TBR/ { tbr = }
            /^\s*RST/ { man = ; next}
            { print data, equipment, numero, ntl, opc, rnp, csp, eip, cdo, cnl, nuf, tpb, cpt, cre, nue, dat, hor, tbr, man}' input.tx >> output.txt

结果

2020-09-12,BSA002,6160150536,,,,,,,,,,,,,,,,
2020-09-12,BSA002,6160150536,,,,,,,,,,,,,,,,
2020-09-12,BSA002,6130290095,,,,,,,,,,,,,,,,
2020-09-12,BSA002,6130290095,,,,,,,,,,,,,,,,
2020-09-12,BSA002,6130290095,,,,,,,,,,,,,,,,
2020-09-12,BSA002,6130290095,,,,,,,,,,,,,,,,
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,,,,,,,,,,,,
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,,,,,,,,,,,
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,,,,,,,,,,
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,,,,,,,
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,,,,
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,,
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6160150178,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6160150178,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6160150178,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6160150178,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6160150178,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6160150178,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN
2020-09-12,BSA002,6160150178,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,MAN

请注意,记录 6130290095(变量 NTL)错误地与“数字”记录相关联(上面示例的最后几行)。

我该如何克服呢?我已经尝试了一些 AWK 条件语句,但也没有成功。 作为输出,我只想逐行记录,因为输出示例的某些行可以举例说明。 非常感谢。

当你只想改变numero未设置的值时,添加一个像numero ||这样的测试。
阅读您的评论后,我更改了解决方案。正如我现在所理解的,您不希望一个记录包含所有块的组合结果,但您希望每个处理的块都有一个结果行。每个新块都以 <INTLPO.
开头 此解决方案将使所有值在新块的开头为空(第一个块不需要,但不会造成伤害)。
当找到新块时以及我们在文件末尾时,将显示块的结果。

awk 'function newrecord() {
        recordnumber++;
        data=equipment=numero=ntl=opc=rnp=csp=eip=cdo="";
        cnl=nuf=tpb=cpt=cre=nue=dat=hor=tbr=man="";
     }
     function printrecord() {
         print data, equipment, numero, ntl, opc, rnp, csp, eip,
               cdo, cnl, nuf, tpb, cpt, cre, nue, dat, hor, tbr, man;
     }

     BEGIN { OFS="," }
            /^<INTLPO/ { if (recordnumber) printrecord(); newrecord(); }
            /^VECTURA/ { equipment = ; data =  }
            /^INTLPO/ { numero = }
            /^\s*NTL/ { ntl =  ; opc = ; rnp = ; csp = }
            /^\s*EIP/ { eip = }
            /^\s*CDO/ { cdo = }
            /^\s*CNL/ { cnl = ; nuf = ; tpb = }
            /^\s*CPT/ { cpt = ; cre = ; nue = }
            /^\s*DAT/ { dat = ; hor = }
            /^\s*TBR/ { tbr = }
            /^\s*RST/ { man = ; next}
            END { printrecord(); }
      ' input.tx

如果您有 tag = value 对,解决任何问题的最佳方法是首先填充该映射的数组(下面的 tag2val[]),然后您可以通过它们的标签访问您喜欢的任何值(名称)按您喜欢的顺序排列。

$ cat tst.awk
BEGIN {
    OFS = ","
    numTags = split("\
                EQUIPMENT \
                DATE \
                NUMERO \
                NTL \
                OPC \
                RNP \
                CSP \
                EIP \
                CDO \
                CNL \
                NUF \
                TPB \
                CPT \
                CRE \
                NUE \
                DAT \
                HOR \
                TBR \
                MAN \
                ",tags)
    for (tagNr=1; tagNr<=numTags; tagNr++) {
        tag = tags[tagNr]
        printf "%s%s", tag, (tagNr<numTags ? OFS : ORS)
    }
}
/^</ && (NR > 1) {
    prt()
    delete tag2val
}
 == "VECTURA" {
    tag2val["EQUIPMENT"] = 
    tag2val["DATE"] = 
}
/^INTLPO/ {
    gsub(/^[^"]+"|"$/,"",)
    tag2val["NUMERO"] = 
}
/^([[:space:]]*[^[:space:]]+ = [^[:space:]]+)+$/ {
    for (i=1; i<NF; i+=3) {
        tag2val[$i] = $(i+2)
    }
}
nextLineTag != "" {
    tag2val[nextLineTag] = 
    nextLineTag = ""
}
/^[[:space:]]*RST[[:space:]]+MAN/ {
    nextLineTag = "MAN"
}
END { prt() }

function prt(   tagNr, tag, val) {
    for (tagNr=1; tagNr<=numTags; tagNr++) {
        tag = tags[tagNr]
        val = tag2val[tag]
        printf "%s%s", val, (tagNr<numTags ? OFS : ORS)
    }
}

.

$ awk -f tst.awk file
EQUIPMENT,DATE,NUMERO,NTL,OPC,RNP,CSP,EIP,CDO,CNL,NUF,TPB,CPT,CRE,NUE,DAT,HOR,TBR,MAN
BSA002,2020-09-12,6130290095,6130290095,S_INF,551,25,S_INF,00961,61000,S_INF,PREST,NAO,125,S_INF,2014-04-16,10:30:20.798609,25,934
BSA002,2020-09-12,6160150178,,,,,,,,,,,,,,,,