如何使用 AWK 透视数据
How to Pivot Data Using AWK
发件人:
DT X Y Z
10 75 0 3
20 100 1 6
30 125 2 9
收件人:
DT ID VALUE
10 X 75
20 Y 0
30 Z 3
10 X 100
20 Y 1
30 Z 6
10 X 125
20 Y 2
30 Z 9
你可以试试这个 awk(MAWK 版本 1.2)
您的数据可以是 5x5 或更多
mawk -v OFS='\t' '
NR==1 {
nbfield=(NF-1)
for(i=1;i<NF;i++)
ID[i]=$(i+1)
print OFS "ID" OFS "VALUE"
next
}
{
numrecord=((NR-1)%nbfield)
numrecord = numrecord ? numrecord : nbfield
for(i=0;i<=nbfield;i++)
val[ID[i],numrecord]=$(i+1)
}
numrecord==nbfield {
for(i=1;i<=nbfield;i++)
for(j=1;j<=nbfield;j++)
print val[ID[0],j] OFS ID[j] OFS val[ID[j],i]
}
' infile
完成
#my original dataset is separated by "," and have 280 cols
tempfile=dataset.csv;
col_count=`head -n1 $tempfile | tr -cd "," | wc -c`;
col_count=`expr $col_count + 1`;
for i in `seq 4 $col_count`; do
echo $i;
pt="{print $"$i"}";
col_name=`head -n 1 $tempfile | sed s'/ //'g | awk -F"," "$pt"`;
awk -F"," -v header="DT,ID,$col_name" -f st.awk $tempfile | awk 'NR>1 {print substr([=10=],index([=10=],))",'"$col_name"'"}' | sed 's/ //g' >> New$tempfile;
done;
# file st.awk:
# the code below was found on some Whosebug page, with some minor changes
BEGIN {
# Parse headers into an assoc array h
split(header, a, ",")
for(i in a) {
h[a[i]]=2
}
}
# Find the column numbers in the first line of a file
FNR==1{
split("", cols) # This will re-init cols
for(i=1;i<=NF;i++) {
if($i in h) {
cols[i]=1
}
}
next
}
# Print those columns on all other lines
{
res = ""
for(i=1;i<=NF;i++) {
if(i in cols) {
s = res ? OFS : ""
res = res "," $i
}
}
if (res) {
print res
}
}
发件人:
DT X Y Z
10 75 0 3
20 100 1 6
30 125 2 9
收件人:
DT ID VALUE
10 X 75
20 Y 0
30 Z 3
10 X 100
20 Y 1
30 Z 6
10 X 125
20 Y 2
30 Z 9
你可以试试这个 awk(MAWK 版本 1.2)
您的数据可以是 5x5 或更多
mawk -v OFS='\t' '
NR==1 {
nbfield=(NF-1)
for(i=1;i<NF;i++)
ID[i]=$(i+1)
print OFS "ID" OFS "VALUE"
next
}
{
numrecord=((NR-1)%nbfield)
numrecord = numrecord ? numrecord : nbfield
for(i=0;i<=nbfield;i++)
val[ID[i],numrecord]=$(i+1)
}
numrecord==nbfield {
for(i=1;i<=nbfield;i++)
for(j=1;j<=nbfield;j++)
print val[ID[0],j] OFS ID[j] OFS val[ID[j],i]
}
' infile
完成
#my original dataset is separated by "," and have 280 cols
tempfile=dataset.csv;
col_count=`head -n1 $tempfile | tr -cd "," | wc -c`;
col_count=`expr $col_count + 1`;
for i in `seq 4 $col_count`; do
echo $i;
pt="{print $"$i"}";
col_name=`head -n 1 $tempfile | sed s'/ //'g | awk -F"," "$pt"`;
awk -F"," -v header="DT,ID,$col_name" -f st.awk $tempfile | awk 'NR>1 {print substr([=10=],index([=10=],))",'"$col_name"'"}' | sed 's/ //g' >> New$tempfile;
done;
# file st.awk:
# the code below was found on some Whosebug page, with some minor changes
BEGIN {
# Parse headers into an assoc array h
split(header, a, ",")
for(i in a) {
h[a[i]]=2
}
}
# Find the column numbers in the first line of a file
FNR==1{
split("", cols) # This will re-init cols
for(i=1;i<=NF;i++) {
if($i in h) {
cols[i]=1
}
}
next
}
# Print those columns on all other lines
{
res = ""
for(i=1;i<=NF;i++) {
if(i in cols) {
s = res ? OFS : ""
res = res "," $i
}
}
if (res) {
print res
}
}