awk 从 file1 中按顺序查找缺失的数字并附加到 file2 中的列
awk find missing number in sequence from file1 and append to column in file2
嗨,正如上一个问题中所建议的,我会尝试更清楚地说明我想要实现的目标。
与文件 1 中一样,在 $4 列中,我的数字不是连续排序的,如 1,2,3,4,5.. ,这意味着
我需要打印那些丢失的,例如在 3 号之后我应该得到 4 号等等
cat 文件 1
A R5 A48 1
B R5 A48 2
C R4 A48 3
D R8 A48 15
E R9 A48 22
F R20 B55 21
G R55 B22 19
R B1 I77 14
AA B8 PP 18
BX A255 PA 7
CA A77 PB 10
WW W7 PX 11
我在这个返回的 awk 中找到了部分解决方案
arr=($(awk '{ print }' file1 )) | printf '%s\n' ${arr[*]}| \
awk -v first=1 -v last=23 ' BEGIN {for(i=first; i<=last; i++) array[i] = 1} \
{for(i=1;i<=NF;i++) array[$i] += 1} END {for (num in array) if (array[num] == 0) print num}'
4
5
6
8
9
12
13
16
17
20
23
这就是我想要的,但我仍然缺少打印 23 到 31 之后的剩余数字,并根据 rows/lines [=14 的文件 2 编号将其粘贴为 $3 列(第 3 列) =]
cat 文件 2
md5sum 25d422cc23b44c3bbd7a66c76d52af46
md5sum 25d422cc23b44c3bbd7a66c76d52af47
md5sum 25d422cc23b44c3bbd7a66c76d52af48
md5sum 25d422cc23b44c3bbd7a66c76d52af41
md5sum 25d422cc23b44c3bbd7a66c76d52af22
md5sum 25d422cc23b44c3bbd7a66c76d52af33
md5sum 25d422cc23b44c3bbd7a66c76d52af12
md5sum 25d422cc23b44c3bbd7a66c76d52af01
md5sum 25d422cc23b44c3bbd7a66c76d52af55
md5sum 25d422cc23b44c3bbd7a66c76d52af14
md5sum 25d422cc23b44c3bbd7a66c76d52af18
md5sum 25d422cc23b44c3bbd7a66c76d52af17
md5sum 25d422cc23b44c3bbd7a66c76d52af77
md5sum 25d422cc23b44c3bbd7a66c76d52af06
md5sum 25d422cc23b44c3bbd7a66c76d52af05
md5sum 25d422cc23b44c3bbd7a66c76d52af72
md5sum 25d422cc23b44c3bbd7a66c76d52af73
md5sum 25d422cc23b44c3bbd7a66c76d52af74
md5sum 25d422cc23b44c3bbd7a66c76d52af75
md5sum 25d422cc23b44c3bbd7a66c76d52af76
导致
md5sum 25d422cc23b44c3bbd7a66c76d52af46 4
md5sum 25d422cc23b44c3bbd7a66c76d52af47 5
md5sum 25d422cc23b44c3bbd7a66c76d52af48 6
md5sum 25d422cc23b44c3bbd7a66c76d52af41 8
md5sum 25d422cc23b44c3bbd7a66c76d52af22 9
md5sum 25d422cc23b44c3bbd7a66c76d52af33 12
md5sum 25d422cc23b44c3bbd7a66c76d52af12 13
md5sum 25d422cc23b44c3bbd7a66c76d52af01 16
md5sum 25d422cc23b44c3bbd7a66c76d52af55 17
md5sum 25d422cc23b44c3bbd7a66c76d52af14 19
md5sum 25d422cc23b44c3bbd7a66c76d52af18 20
md5sum 25d422cc23b44c3bbd7a66c76d52af17 23
md5sum 25d422cc23b44c3bbd7a66c76d52af77 24
md5sum 25d422cc23b44c3bbd7a66c76d52af06 25
md5sum 25d422cc23b44c3bbd7a66c76d52af05 26
md5sum 25d422cc23b44c3bbd7a66c76d52af72 27
md5sum 25d422cc23b44c3bbd7a66c76d52af73 28
md5sum 25d422cc23b44c3bbd7a66c76d52af74 29
md5sum 25d422cc23b44c3bbd7a66c76d52af75 30
md5sum 25d422cc23b44c3bbd7a66c76d52af76 31
例如如果如果下一个文件 2 将有 22 rows/lines 它将添加数字直到 32 例如
我认为应该通过更好的方法来完成,将文件 1 列 $4 中的数字也放入数组中并保持逻辑
awk
救援!无需在脚本中插入 bash
。 awk
是一种完全成熟的编程语言,尤其适用于文本处理。
$ awk 'NR==FNR{a[$NF]; next} {while(++c in a); print [=10=], c}' file1 file2
md5sum 25d422cc23b44c3bbd7a66c76d52af46 4
md5sum 25d422cc23b44c3bbd7a66c76d52af47 5
md5sum 25d422cc23b44c3bbd7a66c76d52af48 6
md5sum 25d422cc23b44c3bbd7a66c76d52af41 8
md5sum 25d422cc23b44c3bbd7a66c76d52af22 9
md5sum 25d422cc23b44c3bbd7a66c76d52af33 12
md5sum 25d422cc23b44c3bbd7a66c76d52af12 13
md5sum 25d422cc23b44c3bbd7a66c76d52af01 16
md5sum 25d422cc23b44c3bbd7a66c76d52af55 17
md5sum 25d422cc23b44c3bbd7a66c76d52af14 20
md5sum 25d422cc23b44c3bbd7a66c76d52af18 23
md5sum 25d422cc23b44c3bbd7a66c76d52af17 24
md5sum 25d422cc23b44c3bbd7a66c76d52af77 25
md5sum 25d422cc23b44c3bbd7a66c76d52af06 26
md5sum 25d422cc23b44c3bbd7a66c76d52af05 27
md5sum 25d422cc23b44c3bbd7a66c76d52af72 28
md5sum 25d422cc23b44c3bbd7a66c76d52af73 29
md5sum 25d422cc23b44c3bbd7a66c76d52af74 30
md5sum 25d422cc23b44c3bbd7a66c76d52af75 31
md5sum 25d422cc23b44c3bbd7a66c76d52af76 32
请注意,19
在您的第一个文件中,因此在输出中被跳过。您的输出与给定输入的规范不一致。
嗨,正如上一个问题中所建议的,我会尝试更清楚地说明我想要实现的目标。 与文件 1 中一样,在 $4 列中,我的数字不是连续排序的,如 1,2,3,4,5.. ,这意味着 我需要打印那些丢失的,例如在 3 号之后我应该得到 4 号等等
cat 文件 1
A R5 A48 1
B R5 A48 2
C R4 A48 3
D R8 A48 15
E R9 A48 22
F R20 B55 21
G R55 B22 19
R B1 I77 14
AA B8 PP 18
BX A255 PA 7
CA A77 PB 10
WW W7 PX 11
我在这个返回的 awk 中找到了部分解决方案
arr=($(awk '{ print }' file1 )) | printf '%s\n' ${arr[*]}| \
awk -v first=1 -v last=23 ' BEGIN {for(i=first; i<=last; i++) array[i] = 1} \
{for(i=1;i<=NF;i++) array[$i] += 1} END {for (num in array) if (array[num] == 0) print num}'
4
5
6
8
9
12
13
16
17
20
23
这就是我想要的,但我仍然缺少打印 23 到 31 之后的剩余数字,并根据 rows/lines [=14 的文件 2 编号将其粘贴为 $3 列(第 3 列) =]
cat 文件 2
md5sum 25d422cc23b44c3bbd7a66c76d52af46
md5sum 25d422cc23b44c3bbd7a66c76d52af47
md5sum 25d422cc23b44c3bbd7a66c76d52af48
md5sum 25d422cc23b44c3bbd7a66c76d52af41
md5sum 25d422cc23b44c3bbd7a66c76d52af22
md5sum 25d422cc23b44c3bbd7a66c76d52af33
md5sum 25d422cc23b44c3bbd7a66c76d52af12
md5sum 25d422cc23b44c3bbd7a66c76d52af01
md5sum 25d422cc23b44c3bbd7a66c76d52af55
md5sum 25d422cc23b44c3bbd7a66c76d52af14
md5sum 25d422cc23b44c3bbd7a66c76d52af18
md5sum 25d422cc23b44c3bbd7a66c76d52af17
md5sum 25d422cc23b44c3bbd7a66c76d52af77
md5sum 25d422cc23b44c3bbd7a66c76d52af06
md5sum 25d422cc23b44c3bbd7a66c76d52af05
md5sum 25d422cc23b44c3bbd7a66c76d52af72
md5sum 25d422cc23b44c3bbd7a66c76d52af73
md5sum 25d422cc23b44c3bbd7a66c76d52af74
md5sum 25d422cc23b44c3bbd7a66c76d52af75
md5sum 25d422cc23b44c3bbd7a66c76d52af76
导致
md5sum 25d422cc23b44c3bbd7a66c76d52af46 4
md5sum 25d422cc23b44c3bbd7a66c76d52af47 5
md5sum 25d422cc23b44c3bbd7a66c76d52af48 6
md5sum 25d422cc23b44c3bbd7a66c76d52af41 8
md5sum 25d422cc23b44c3bbd7a66c76d52af22 9
md5sum 25d422cc23b44c3bbd7a66c76d52af33 12
md5sum 25d422cc23b44c3bbd7a66c76d52af12 13
md5sum 25d422cc23b44c3bbd7a66c76d52af01 16
md5sum 25d422cc23b44c3bbd7a66c76d52af55 17
md5sum 25d422cc23b44c3bbd7a66c76d52af14 19
md5sum 25d422cc23b44c3bbd7a66c76d52af18 20
md5sum 25d422cc23b44c3bbd7a66c76d52af17 23
md5sum 25d422cc23b44c3bbd7a66c76d52af77 24
md5sum 25d422cc23b44c3bbd7a66c76d52af06 25
md5sum 25d422cc23b44c3bbd7a66c76d52af05 26
md5sum 25d422cc23b44c3bbd7a66c76d52af72 27
md5sum 25d422cc23b44c3bbd7a66c76d52af73 28
md5sum 25d422cc23b44c3bbd7a66c76d52af74 29
md5sum 25d422cc23b44c3bbd7a66c76d52af75 30
md5sum 25d422cc23b44c3bbd7a66c76d52af76 31
例如如果如果下一个文件 2 将有 22 rows/lines 它将添加数字直到 32 例如
我认为应该通过更好的方法来完成,将文件 1 列 $4 中的数字也放入数组中并保持逻辑
awk
救援!无需在脚本中插入 bash
。 awk
是一种完全成熟的编程语言,尤其适用于文本处理。
$ awk 'NR==FNR{a[$NF]; next} {while(++c in a); print [=10=], c}' file1 file2
md5sum 25d422cc23b44c3bbd7a66c76d52af46 4
md5sum 25d422cc23b44c3bbd7a66c76d52af47 5
md5sum 25d422cc23b44c3bbd7a66c76d52af48 6
md5sum 25d422cc23b44c3bbd7a66c76d52af41 8
md5sum 25d422cc23b44c3bbd7a66c76d52af22 9
md5sum 25d422cc23b44c3bbd7a66c76d52af33 12
md5sum 25d422cc23b44c3bbd7a66c76d52af12 13
md5sum 25d422cc23b44c3bbd7a66c76d52af01 16
md5sum 25d422cc23b44c3bbd7a66c76d52af55 17
md5sum 25d422cc23b44c3bbd7a66c76d52af14 20
md5sum 25d422cc23b44c3bbd7a66c76d52af18 23
md5sum 25d422cc23b44c3bbd7a66c76d52af17 24
md5sum 25d422cc23b44c3bbd7a66c76d52af77 25
md5sum 25d422cc23b44c3bbd7a66c76d52af06 26
md5sum 25d422cc23b44c3bbd7a66c76d52af05 27
md5sum 25d422cc23b44c3bbd7a66c76d52af72 28
md5sum 25d422cc23b44c3bbd7a66c76d52af73 29
md5sum 25d422cc23b44c3bbd7a66c76d52af74 30
md5sum 25d422cc23b44c3bbd7a66c76d52af75 31
md5sum 25d422cc23b44c3bbd7a66c76d52af76 32
请注意,19
在您的第一个文件中,因此在输出中被跳过。您的输出与给定输入的规范不一致。