将两个不同大小的数据框合并为一个
Combine two dataframes of dfiferent size into one
我希望将两个大小不同的数据帧附加在一起。我试过追加、合并、连接——我知道我很接近但很容易遗漏一些东西。我是 Python 自学的新手。
import pandas as pd
data1 = [['lj', 22.72, 37, 9.8], ['nc', 13.24, 30.9, 4.4],['bm', 13.77, 26.3, 9.3], ['jl', 12, 25.9, 7.2]]
df = pd.DataFrame(data1, columns= ['Name', 'Proj', 'Ceil', 'Floor'])
print(df)
data2 = [['0', 50, 55, 25, 20], ['1', 49, 54, 24, 19], ['2', 33, 2, 27, 18], ['3', 14, 60, 17, 35], ['4', 45, 40, 48, 10], ['5', 10, 15, 35, 30], ['6', 57, 75, 27, 27], ['7', 22, 17, 18, 11], ['8', 3, 6, 26, 36], ['9', 12, 32, 5, 3]]
df2 = pd.DataFrame(data2, columns=['sim_id', 'lj', 'nc', 'bn', 'jl'])
print(df2)
Name
Proj
Ceil
Floor
0
lj
22.72
37
9.8
1
nc
13.24
30.9
4.4
2
bm
13.77
26.3
9.3
3
jl
12
25.9
7.2
sim_id
lj
nc
bn
jl
0
0
50
55
25
20
1
1
49
54
24
19
2
2
33
2
27
18
3
3
14
60
17
35
4
4
45
40
48
10
5
5
10
15
35
30
6
6
57
75
27
27
7
7
22
17
18
11
8
8
3
6
26
36
9
9
12
32
5
3
期望输出
Name
Proj
Ceil
Floor
sim_id
proj
0
lj
22.72
37
9.8
0
50
0
lj
22.72
37
9.8
1
49
0
lj
22.72
37
9.8
2
33
0
lj
22.72
37
9.8
3
14
0
lj
22.72
37
9.8
4
45
0
lj
22.72
37
9.8
5
10
0
lj
22.72
37
9.8
6
57
0
lj
22.72
37
9.8
7
22
0
lj
22.72
37
9.8
8
3
0
lj
22.72
37
9.8
9
12
1
nc
13.24
30.9
4.4
0
55
1
nc
13.24
30.9
4.4
1
54
1
nc
13.24
30.9
4.4
2
2
1
nc
13.24
30.9
4.4
3
60
1
nc
13.24
30.9
4.4
4
40
1
nc
13.24
30.9
4.4
5
15
1
nc
13.24
30.9
4.4
6
75
1
nc
13.24
30.9
4.4
7
17
1
nc
13.24
30.9
4.4
8
6
1
nc
13.24
30.9
4.4
9
32
2
bm
13.77
26.3
9.3
0
25
2
bm
13.77
26.3
9.3
1
24
2
bm
13.77
26.3
9.3
2
27
2
bm
13.77
26.3
9.3
3
17
2
bm
13.77
26.3
9.3
4
48
2
bm
13.77
26.3
9.3
5
35
2
bm
13.77
26.3
9.3
6
27
2
bm
13.77
26.3
9.3
7
18
2
bm
13.77
26.3
9.3
8
26
2
bm
13.77
26.3
9.3
9
5
3
jl
12
25.9
7.2
0
20
3
jl
12
25.9
7.2
1
19
3
jl
12
25.9
7.2
2
18
3
jl
12
25.9
7.2
3
35
3
jl
12
25.9
7.2
4
10
3
jl
12
25.9
7.2
5
30
3
jl
12
25.9
7.2
6
27
3
jl
12
25.9
7.2
7
11
3
jl
12
25.9
7.2
8
36
3
jl
12
25.9
7.2
9
3
您可以使用 merge
。但是您需要一个公共列来连接两个数据框。由于您没有任何公共列,因此一种方法是为连接创建一个临时列,然后将其删除。如下所示。
import pandas as pd
data1 = [['lj', 22.72, 37, 9.8], ['nc', 13.24, 30.9, 4.4],['bm', 13.77, 26.3, 9.3], ['jl', 12, 25.9, 7.2]]
df = pd.DataFrame(data1, columns= ['Name', 'Proj', 'Ceil', 'Floor'])
print(df)
data2 = [['0', 50, 55, 25, 20], ['1', 49, 54, 24, 19], ['2', 33, 2, 27, 18], ['3', 14, 60, 17, 35], ['4', 45, 40, 48, 10], ['5', 10, 15, 35, 30], ['6', 57, 75, 27, 27], ['7', 22, 17, 18, 11], ['8', 3, 6, 26, 36], ['9', 12, 32, 5, 3]]
df2 = pd.DataFrame(data2, columns=['sim_id', 'lj', 'nc', 'bn', 'jl'])
print(df2)
#add the below lines
df['temp']=1
df2['temp']=1
df3 = pd.merge(df,df2, on=['temp'])
df3 = df3.drop('temp', axis=1)
print(df3)
我希望将两个大小不同的数据帧附加在一起。我试过追加、合并、连接——我知道我很接近但很容易遗漏一些东西。我是 Python 自学的新手。
import pandas as pd
data1 = [['lj', 22.72, 37, 9.8], ['nc', 13.24, 30.9, 4.4],['bm', 13.77, 26.3, 9.3], ['jl', 12, 25.9, 7.2]]
df = pd.DataFrame(data1, columns= ['Name', 'Proj', 'Ceil', 'Floor'])
print(df)
data2 = [['0', 50, 55, 25, 20], ['1', 49, 54, 24, 19], ['2', 33, 2, 27, 18], ['3', 14, 60, 17, 35], ['4', 45, 40, 48, 10], ['5', 10, 15, 35, 30], ['6', 57, 75, 27, 27], ['7', 22, 17, 18, 11], ['8', 3, 6, 26, 36], ['9', 12, 32, 5, 3]]
df2 = pd.DataFrame(data2, columns=['sim_id', 'lj', 'nc', 'bn', 'jl'])
print(df2)
Name | Proj | Ceil | Floor | |
---|---|---|---|---|
0 | lj | 22.72 | 37 | 9.8 |
1 | nc | 13.24 | 30.9 | 4.4 |
2 | bm | 13.77 | 26.3 | 9.3 |
3 | jl | 12 | 25.9 | 7.2 |
sim_id | lj | nc | bn | jl | |
---|---|---|---|---|---|
0 | 0 | 50 | 55 | 25 | 20 |
1 | 1 | 49 | 54 | 24 | 19 |
2 | 2 | 33 | 2 | 27 | 18 |
3 | 3 | 14 | 60 | 17 | 35 |
4 | 4 | 45 | 40 | 48 | 10 |
5 | 5 | 10 | 15 | 35 | 30 |
6 | 6 | 57 | 75 | 27 | 27 |
7 | 7 | 22 | 17 | 18 | 11 |
8 | 8 | 3 | 6 | 26 | 36 |
9 | 9 | 12 | 32 | 5 | 3 |
期望输出
Name | Proj | Ceil | Floor | sim_id | proj | |
---|---|---|---|---|---|---|
0 | lj | 22.72 | 37 | 9.8 | 0 | 50 |
0 | lj | 22.72 | 37 | 9.8 | 1 | 49 |
0 | lj | 22.72 | 37 | 9.8 | 2 | 33 |
0 | lj | 22.72 | 37 | 9.8 | 3 | 14 |
0 | lj | 22.72 | 37 | 9.8 | 4 | 45 |
0 | lj | 22.72 | 37 | 9.8 | 5 | 10 |
0 | lj | 22.72 | 37 | 9.8 | 6 | 57 |
0 | lj | 22.72 | 37 | 9.8 | 7 | 22 |
0 | lj | 22.72 | 37 | 9.8 | 8 | 3 |
0 | lj | 22.72 | 37 | 9.8 | 9 | 12 |
1 | nc | 13.24 | 30.9 | 4.4 | 0 | 55 |
1 | nc | 13.24 | 30.9 | 4.4 | 1 | 54 |
1 | nc | 13.24 | 30.9 | 4.4 | 2 | 2 |
1 | nc | 13.24 | 30.9 | 4.4 | 3 | 60 |
1 | nc | 13.24 | 30.9 | 4.4 | 4 | 40 |
1 | nc | 13.24 | 30.9 | 4.4 | 5 | 15 |
1 | nc | 13.24 | 30.9 | 4.4 | 6 | 75 |
1 | nc | 13.24 | 30.9 | 4.4 | 7 | 17 |
1 | nc | 13.24 | 30.9 | 4.4 | 8 | 6 |
1 | nc | 13.24 | 30.9 | 4.4 | 9 | 32 |
2 | bm | 13.77 | 26.3 | 9.3 | 0 | 25 |
2 | bm | 13.77 | 26.3 | 9.3 | 1 | 24 |
2 | bm | 13.77 | 26.3 | 9.3 | 2 | 27 |
2 | bm | 13.77 | 26.3 | 9.3 | 3 | 17 |
2 | bm | 13.77 | 26.3 | 9.3 | 4 | 48 |
2 | bm | 13.77 | 26.3 | 9.3 | 5 | 35 |
2 | bm | 13.77 | 26.3 | 9.3 | 6 | 27 |
2 | bm | 13.77 | 26.3 | 9.3 | 7 | 18 |
2 | bm | 13.77 | 26.3 | 9.3 | 8 | 26 |
2 | bm | 13.77 | 26.3 | 9.3 | 9 | 5 |
3 | jl | 12 | 25.9 | 7.2 | 0 | 20 |
3 | jl | 12 | 25.9 | 7.2 | 1 | 19 |
3 | jl | 12 | 25.9 | 7.2 | 2 | 18 |
3 | jl | 12 | 25.9 | 7.2 | 3 | 35 |
3 | jl | 12 | 25.9 | 7.2 | 4 | 10 |
3 | jl | 12 | 25.9 | 7.2 | 5 | 30 |
3 | jl | 12 | 25.9 | 7.2 | 6 | 27 |
3 | jl | 12 | 25.9 | 7.2 | 7 | 11 |
3 | jl | 12 | 25.9 | 7.2 | 8 | 36 |
3 | jl | 12 | 25.9 | 7.2 | 9 | 3 |
您可以使用 merge
。但是您需要一个公共列来连接两个数据框。由于您没有任何公共列,因此一种方法是为连接创建一个临时列,然后将其删除。如下所示。
import pandas as pd
data1 = [['lj', 22.72, 37, 9.8], ['nc', 13.24, 30.9, 4.4],['bm', 13.77, 26.3, 9.3], ['jl', 12, 25.9, 7.2]]
df = pd.DataFrame(data1, columns= ['Name', 'Proj', 'Ceil', 'Floor'])
print(df)
data2 = [['0', 50, 55, 25, 20], ['1', 49, 54, 24, 19], ['2', 33, 2, 27, 18], ['3', 14, 60, 17, 35], ['4', 45, 40, 48, 10], ['5', 10, 15, 35, 30], ['6', 57, 75, 27, 27], ['7', 22, 17, 18, 11], ['8', 3, 6, 26, 36], ['9', 12, 32, 5, 3]]
df2 = pd.DataFrame(data2, columns=['sim_id', 'lj', 'nc', 'bn', 'jl'])
print(df2)
#add the below lines
df['temp']=1
df2['temp']=1
df3 = pd.merge(df,df2, on=['temp'])
df3 = df3.drop('temp', axis=1)
print(df3)