Unix 中两个文件的比较和显示差异

Comparison of two file in Unix and display differences

我在 Unix 上有 2 个文件。我要比较文件-

  1. 显示两个文件中缺少的行。
  2. 显示两个文件中的实际差异。
  3. 为了能够跳过我不想在报告中进行比较的列,例如d_report_ref_date
  4. 我的唯一比较键是v_party_id

文件 1:

d_report_ref_date="2021-03-31" n_pd_percent="0.16323687" v_accounting_standard="IFRS" v_party_default_status_cd="NOTDFLT" v_party_id="103811925" v_src_system_id="SMT"
d_party_default_status_date="2012-03-09" d_report_ref_date="2021-03-31" n_pd_percent="1" v_accounting_standard="SQRT" v_party_default_status_cd="UNLIKE" v_party_id="36056030" v_src_system_id="SMT"
d_report_ref_date="2021-03-31" n_pd_percent="0.16323687" v_accounting_standard="IFRS" v_party_default_status_cd="NOTDFLT" v_party_id="53565979" v_src_system_id="SMT"
d_report_ref_date="2021-03-31" n_pd_percent="0.16323687" v_accounting_standard="IFRS" v_party_default_status_cd="NOTDFLT" v_party_id="103811100" v_src_system_id="SMT"
d_report_ref_date="2021-03-31" n_pd_percent="0.16323687" v_accounting_standard="IFRS" v_party_default_status_cd="NOTDFLT" v_party_id="103811200" v_src_system_id="SMT"

文件 2

d_report_ref_date="2021-03-31" n_pd_percent="0.2045" v_accounting_standard="SQRT" v_party_default_status_cd="NOTDFLT" v_party_id="103811925" v_src_system_id="SMT"
d_party_default_status_date="2012-03-09" d_report_ref_date="2021-03-31" n_pd_percent="1" v_accounting_standard="IFRS" v_party_default_status_cd="UNLIKE" v_party_id="36056030" v_src_system_id="SMT"
d_report_ref_date="2021-03-31" v_accounting_standard="IFRS" v_party_default_status_cd="NOTDFLT" v_party_id="53565979" v_src_system_id="SMT"
d_report_ref_date="2021-03-31" n_pd_percent="0.16323687" v_accounting_standard="IFRS" v_party_default_status_cd="NOTDFLT" v_party_id="103811400" v_src_system_id="SMT"

文件3

period="2021-02-28" book_base_ent_cd="U0027" other_inst_ident="PLCHS252SA20" rep_nom_curr="PLN" reporting_basis="Unit" src_instr_class="Debt" mat_date="2028-02-25" nom_curr="PLN" primary_asset_class="Bond" seniority_type="931" security_status="alive" issuer_name="CUST38677608" intra_group_prud_scope="Issuer is not part of the reporting group" intra_group_acc_scope="Issuer is not part of the reporting group" frbrnc_stts="Not forborne or renegotiated" src_frbrnc_stts="NOFRBRNRNGT" prfrmng_stts="Performing" src_prfrmng_stts="KC10.1" dflt_stts_issr="Not in default" src_dflt_stts_issr="KC10.1" dflt_stts_instrmnt="Not in default" src_mes_accntng_clssfctn="AMC" prdntl_prtfl="Non-trading book" imprmnt_stts="Stage 1 (IFRS)" src_imprmnt_stts="1" imprmnt_assssmnt_mthd="Collectively assessed" src_imprmnt_assssmnt_mthd="COLLECTIVE" accmltd_imprmnt="392.69" accmltd_chngs_fv_cr="0" expsr_vl="0" unit_measure="EUR" unit_measure_nv="EUR" crryng_amnt="122825.65" issuer_grid_id="38677608" v_party_id="PLCHS252SA20"

文件4

period="2021-02-28" book_base_ent_cd="U0027" other_inst_ident="PLCHS252SA20" rep_nom_curr="PLN" reporting_basis="Unit" src_instr_class="Debt" mat_date="2028-02-25" nom_curr="PLN" primary_asset_class="Bond" seniority_type="931" security_status="alive" issuer_name="CUST38677608" intra_group_prud_scope="Issuer is not part of the reporting group" intra_group_acc_scope="Issuer is not part of the reporting group" frbrnc_stts="Not forborne or renegotiated" src_frbrnc_stts="NOFRBRNRNGT" prfrmng_stts="Performing" src_prfrmng_stts="KC10.1" dflt_stts_issr="Not in default" src_dflt_stts_issr="KC10.1" dflt_stts_instrmnt="Not in default" src_mes_accntng_clssfctn="AMC" prdntl_prtfl="Non-trading book" imprmnt_stts="Stage 1 (IFRS)" src_imprmnt_stts="1" imprmnt_assssmnt_mthd="Collectively assessed" src_imprmnt_assssmnt_mthd="COLLECTIVE" accmltd_imprmnt="392.69" accmltd_chngs_fv_cr="0" expsr_vl="0" unit_measure="EUR" unit_measure_nv="EUR" crryng_amnt="122825.65" issuer_grid_id="38677608" v_party_id="PLCHS252SA20"

预期输出

Rows missing in file1: v_party_id="103811400"
Rows missing in file2: v_party_id="103811100", v_party_id="103811200"
Mismtach in row 1 for v_party_id="103811925": file1.n_pd_percent="0.16323687" file2.n_pd_percent="0.2045", file1.v_accounting_standard="IFRS" file2.v_accounting_standard="SQRT"
Mismtach in row 2 for v_party_id="36056030":  file1.v_accounting_standard="SQRT" file2.v_accounting_standard="IFRS"

代码:

BEGIN { FS="[= ]" }
NR==FNR {
    for (i=1; i<NF; i+=2) {
        file1[NR,i] = $(i+1)
    }
    next
}
{
    msg = sep = ""
    for (i=1; i<NF; i+=2) {
        if ( $(i+1) != file1[FNR,i] ) {
            msg = msg sep OFS ARGV[1] "." $i "=" file1[FNR,i] OFS FILENAME "." $i "=" $(i+1)
            sep = ","
        }
    }
    if ( msg != "" ) {
        print "Mismtach in row " FNR msg
    }
}

实际输出

 awk -f compare.awk file1 file2
Mismtach in row 1 file1.n_pd_percent="0.16323687" file2.n_pd_percent="0.2045", file1.v_accounting_standard="IFRS" file2.v_accounting_standard="SQRT"
Mismtach in row 2 file1.v_accounting_standard="SQRT" file2.v_accounting_standard="IFRS"
Mismtach in row 3 file1.v_accounting_standard="0.16323687" file2.v_accounting_standard="IFRS", file1.v_party_default_status_cd="IFRS" file2.v_party_default_status_cd="NOTDFLT", file1.v_party_id="NOTDFLT" file2.v_party_id="53565979", file1.v_src_system_id="53565979" file2.v_src_system_id="SMT"
Mismtach in row 4 file1.v_party_id="103811100" file2.v_party_id="103811400"

我可以在代码中做哪些更改才能以所需格式显示输出?

这将按照我的想法进行,您可能需要调整输出格式:

$ cat tst.awk
BEGIN { FS="[= ]" }
{
    match(" "[=10=],/ v_party_id="[^"]+"/)
    key = substr([=10=],RSTART,RLENGTH)
}
NR==FNR {
    file1[key] = [=10=]
    next
}
{
    if ( key in file1 ) {
        nf = split(file1[key],tmp)
        for (i=1; i<nf; i+=2) {
            f1[key,tmp[i]] = tmp[i+1]
        }

        msg = sep = ""
        for (i=1; i<NF; i+=2) {
            if ( $(i+1) != f1[key,$i] ) {
                msg = msg sep OFS ARGV[1] "." $i "=" f1[key,$i] OFS FILENAME "." $i "=" $(i+1)
                sep = ","
            }
        }
        if ( msg != "" ) {
            print "Mismatch in row " FNR msg
        }
        delete file1[key]
    }
    else {
        file2[key] = [=10=]
    }
}
END {
    for (key in file1) {
        print "In file1 only:", key, file1[key]
    }
    for (key in file2) {
        print "In file2 only:", key, file2[key]
    }
}

$ awk -f tst.awk file1 file2
Mismatch in row 1 file1.n_pd_percent="0.16323687" file2.n_pd_percent="0.2045", file1.v_accounting_standard="IFRS" file2.v_accounting_standard="SQRT"
Mismatch in row 2 file1.v_accounting_standard="SQRT" file2.v_accounting_standard="IFRS"
In file1 only: v_party_id="103811200"  d_report_ref_date="2021-03-31" n_pd_percent="0.16323687" v_accounting_standard="IFRS" v_party_default_status_cd="NOTDFLT" v_party_id="103811200" v_src_system_id="SMT"
In file1 only: v_party_id="103811100"  d_report_ref_date="2021-03-31" n_pd_percent="0.16323687" v_accounting_standard="IFRS" v_party_default_status_cd="NOTDFLT" v_party_id="103811100" v_src_system_id="SMT"
In file2 only: v_party_id="103811400"  d_report_ref_date="2021-03-31" n_pd_percent="0.16323687" v_accounting_standard="IFRS" v_party_default_status_cd="NOTDFLT" v_party_id="103811400" v_src_system_id="SMT"