Excel vlookup 多个值并添加重复行
Excel vlookup multiple values and add duplicate rows
我有两个电子表格,如下所示:
我知道如果我从 Spreadsheet2 开始,我可以只进行 vlookup 并获取值。
但我需要从 Spreadsheet1 开始。我需要添加行。这是一个小数据,但我实际拥有的数据很大..(超过 20000 行)。
电子表格 1:
Category Type NumItem
Air B747 10
Ground TBus1 15
Air B777 20
Air A380 5
电子表格 2:
Type TypeElement NumEngine
B747 747T1 2
B747 747T2 4
B747 747T3 8
Tbus1 TbusT1 0
B777 777T1 6
B777 777T2 4
A380 380T1 10
我想将这些合并到电子表格中。如您所见,两种匹配的类型,但对于每种类型,我都有多个 "TypeElement"s.
我希望它看起来像
Category Type NumItem TypeElement NumEngine
Air B747 10 747T1 2
Air B747 10 747T2 4
Air B747 10 747T3 8
Ground TBus1 15 TbusT1 0
Air B777 20 777T1 6
Air B777 20 777T2 4
Air A380 5 380T1 10
可以使用 Excel 中的函数来完成吗,或者..我必须使用 VBA/Macro 吗?
如果有人知道如何使用 R 完成此操作,请评论 formulas/packages 我应该使用什么。
谢谢!!
一些想法
1)您可以尝试组合行并使用排序函数,通过元素项中的升序或降序值对值进行排序。例如,进行自定义排序或筛选。
2) 您必须选择要如何对值进行分类。按类别?按类型?您可以通过某种方式对项目进行本地分组。设置变量之间的链接。
如@r-schifini 所述,有几个库可用于导入 Excel 文件。这里我使用 readxl
包。要保留第一个电子表格(您的 Spreadsheet1)中的所有行,请在 merge
函数中指定 all.x=TRUE
。有关详细信息,请参阅 ?merge
。请注意,我在 Spreadsheet1 中又添加了一行 B700
.
类型的假数据
library(readxl)
ss1 <- read_excel(path = "spreadsheet1.xlsx", sheet = 1)
ss2 <- read_excel(path = "spreadsheet2.xlsx", sheet = 1)
out <- merge(ss1, ss2, all.x=TRUE)
out
# Type Category NumItem TypeElement NumEngine
# 1 A380 Air 5 380T1 10
# 2 B700 Air 8 <NA> NA
# 3 B747 Air 10 747T1 2
# 4 B747 Air 10 747T2 4
# 5 B747 Air 10 747T3 8
# 6 B777 Air 20 777T1 6
# 7 B777 Air 20 777T2 4
# 8 TBus1 Ground 15 <NA> NA
为什么第 8 行有 NA
?这是因为您的类型在 Spreadsheet1 中是 TBus1
,而在 Spreadsheet2 中是 Tbus1
。为了避免这样的问题,我们可以在合并之前将大小写更改为大写。
ss1$Type <- toupper(ss1$Type)
ss2$Type <- toupper(ss2$Type)
out <- merge(ss1, ss2, all.x=TRUE)
out
# Type Category NumItem TypeElement NumEngine
# 1 A380 Air 5 380T1 10
# 2 B700 Air 8 <NA> NA
# 3 B747 Air 10 747T1 2
# 4 B747 Air 10 747T2 4
# 5 B747 Air 10 747T3 8
# 6 B777 Air 20 777T1 6
# 7 B777 Air 20 777T2 4
# 8 TBUS1 Ground 15 TbusT1 0
我使用 VBA 将您的 tb1 和 tb2 移动到 Access 文件 (c:\testdb.mdb)。
然后使用SQL命令加入他们
Sub Main()
Dim adoxCat As Object, adoConn As Object, adoRst As Object, var As Variant, strSQL As String
Dim i As Long
'make an empty mdb file'
If Dir("C:\testdb.mdb") = "" Then
Set adoxCat = CreateObject("ADOX.catalog")
adoxCat.Create "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=C:\testdb.mdb;"
Set adoxCat = Nothing
Else
MsgBox "C:\testdb.mdb is existed.", vbCritical
Exit Sub
End If
'create an ADO connection'
On Error Resume Next
Set adoConn = CreateObject("adodb.connection")
With adoConn
.Open "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=C:\testdb.mdb;"
If .State 1 Then
MsgBox "Cannot create ADO Connection.", vbCritical
Set adoConn = Nothing
Exit Sub
End If
End With
'create two Tables in the mdb file.'
With adoConn
.Execute "CREATE TABLE tb_1 (Category varchar, Type varchar, NumItem number)"
.Execute "CREATE TABLE tb_2 (Type varchar, TypeElement varchar, NumEngine number)"
'move data in excel to mdb file'
var = toArray(Worksheets(1))
For i = LBound(var, 1) To UBound(var, 1)
strSQL = "INSERT INTO tb_1 (category, type, NumItem) VALUES("
strSQL = strSQL & " '" & var(i, 0) & "',"
strSQL = strSQL & " '" & var(i, 1) & "',"
strSQL = strSQL & " " & var(i, 2) & " );"
.Execute strSQL
Next i
var = toArray(Worksheets(2))
For i = LBound(var, 1) To UBound(var, 1)
strSQL = "INSERT INTO tb_2 (Type, TypeElement, NumEngine) VALUES("
strSQL = strSQL & " '" & var(i, 0) & "',"
strSQL = strSQL & " '" & var(i, 1) & "',"
strSQL = strSQL & " " & var(i, 2) & " );"
.Execute strSQL
Next i
'Use SQL Join statement to Join two tables'
strSQL = "SELECT * FROM tb_1 left join tb_2 on tb_1.type = tb_2.type;"
Set adoRst = .Execute(strSQL)
'output the result to excel worksheet(3)'
Worksheets(3).Range("A1").CopyFromRecordset adoRst
.Close
End With
Set adoConn = Nothing
'remove the mdb file'
Kill "c:\testdb.mdb"
End Sub
Function toArray(from_WSht As Worksheet) As Variant
Dim strPath As String, myRng As Range, rw As Range, c As Range
Dim i As Long, j As Long, dt As Variant
Set myRng = from_WSht.Range("a1").CurrentRegion
If not myRng.Rows.Count > 1 Then GoTo errHdr
ReDim dt(myRng.Rows.Count - 1, myRng.Columns.Count - 1) As Variant
i = 0
For Each rw In myRng.Rows
If rw.Row > 1 Then
j = 0
For Each c In rw.Cells
dt(i, j) = c.Value
j = j + 1
Next c
i = i + 1
End If
Next rw
toArray = dt
Exit Function
errHdr:
toArray = 0
End Function
我有两个电子表格,如下所示: 我知道如果我从 Spreadsheet2 开始,我可以只进行 vlookup 并获取值。 但我需要从 Spreadsheet1 开始。我需要添加行。这是一个小数据,但我实际拥有的数据很大..(超过 20000 行)。
电子表格 1:
Category Type NumItem
Air B747 10
Ground TBus1 15
Air B777 20
Air A380 5
电子表格 2:
Type TypeElement NumEngine
B747 747T1 2
B747 747T2 4
B747 747T3 8
Tbus1 TbusT1 0
B777 777T1 6
B777 777T2 4
A380 380T1 10
我想将这些合并到电子表格中。如您所见,两种匹配的类型,但对于每种类型,我都有多个 "TypeElement"s.
我希望它看起来像
Category Type NumItem TypeElement NumEngine
Air B747 10 747T1 2
Air B747 10 747T2 4
Air B747 10 747T3 8
Ground TBus1 15 TbusT1 0
Air B777 20 777T1 6
Air B777 20 777T2 4
Air A380 5 380T1 10
可以使用 Excel 中的函数来完成吗,或者..我必须使用 VBA/Macro 吗? 如果有人知道如何使用 R 完成此操作,请评论 formulas/packages 我应该使用什么。
谢谢!!
一些想法 1)您可以尝试组合行并使用排序函数,通过元素项中的升序或降序值对值进行排序。例如,进行自定义排序或筛选。
2) 您必须选择要如何对值进行分类。按类别?按类型?您可以通过某种方式对项目进行本地分组。设置变量之间的链接。
如@r-schifini 所述,有几个库可用于导入 Excel 文件。这里我使用 readxl
包。要保留第一个电子表格(您的 Spreadsheet1)中的所有行,请在 merge
函数中指定 all.x=TRUE
。有关详细信息,请参阅 ?merge
。请注意,我在 Spreadsheet1 中又添加了一行 B700
.
library(readxl)
ss1 <- read_excel(path = "spreadsheet1.xlsx", sheet = 1)
ss2 <- read_excel(path = "spreadsheet2.xlsx", sheet = 1)
out <- merge(ss1, ss2, all.x=TRUE)
out
# Type Category NumItem TypeElement NumEngine
# 1 A380 Air 5 380T1 10
# 2 B700 Air 8 <NA> NA
# 3 B747 Air 10 747T1 2
# 4 B747 Air 10 747T2 4
# 5 B747 Air 10 747T3 8
# 6 B777 Air 20 777T1 6
# 7 B777 Air 20 777T2 4
# 8 TBus1 Ground 15 <NA> NA
为什么第 8 行有 NA
?这是因为您的类型在 Spreadsheet1 中是 TBus1
,而在 Spreadsheet2 中是 Tbus1
。为了避免这样的问题,我们可以在合并之前将大小写更改为大写。
ss1$Type <- toupper(ss1$Type)
ss2$Type <- toupper(ss2$Type)
out <- merge(ss1, ss2, all.x=TRUE)
out
# Type Category NumItem TypeElement NumEngine
# 1 A380 Air 5 380T1 10
# 2 B700 Air 8 <NA> NA
# 3 B747 Air 10 747T1 2
# 4 B747 Air 10 747T2 4
# 5 B747 Air 10 747T3 8
# 6 B777 Air 20 777T1 6
# 7 B777 Air 20 777T2 4
# 8 TBUS1 Ground 15 TbusT1 0
我使用 VBA 将您的 tb1 和 tb2 移动到 Access 文件 (c:\testdb.mdb)。 然后使用SQL命令加入他们
Sub Main()
Dim adoxCat As Object, adoConn As Object, adoRst As Object, var As Variant, strSQL As String
Dim i As Long
'make an empty mdb file'
If Dir("C:\testdb.mdb") = "" Then
Set adoxCat = CreateObject("ADOX.catalog")
adoxCat.Create "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=C:\testdb.mdb;"
Set adoxCat = Nothing
Else
MsgBox "C:\testdb.mdb is existed.", vbCritical
Exit Sub
End If
'create an ADO connection'
On Error Resume Next
Set adoConn = CreateObject("adodb.connection")
With adoConn
.Open "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=C:\testdb.mdb;"
If .State 1 Then
MsgBox "Cannot create ADO Connection.", vbCritical
Set adoConn = Nothing
Exit Sub
End If
End With
'create two Tables in the mdb file.'
With adoConn
.Execute "CREATE TABLE tb_1 (Category varchar, Type varchar, NumItem number)"
.Execute "CREATE TABLE tb_2 (Type varchar, TypeElement varchar, NumEngine number)"
'move data in excel to mdb file'
var = toArray(Worksheets(1))
For i = LBound(var, 1) To UBound(var, 1)
strSQL = "INSERT INTO tb_1 (category, type, NumItem) VALUES("
strSQL = strSQL & " '" & var(i, 0) & "',"
strSQL = strSQL & " '" & var(i, 1) & "',"
strSQL = strSQL & " " & var(i, 2) & " );"
.Execute strSQL
Next i
var = toArray(Worksheets(2))
For i = LBound(var, 1) To UBound(var, 1)
strSQL = "INSERT INTO tb_2 (Type, TypeElement, NumEngine) VALUES("
strSQL = strSQL & " '" & var(i, 0) & "',"
strSQL = strSQL & " '" & var(i, 1) & "',"
strSQL = strSQL & " " & var(i, 2) & " );"
.Execute strSQL
Next i
'Use SQL Join statement to Join two tables'
strSQL = "SELECT * FROM tb_1 left join tb_2 on tb_1.type = tb_2.type;"
Set adoRst = .Execute(strSQL)
'output the result to excel worksheet(3)'
Worksheets(3).Range("A1").CopyFromRecordset adoRst
.Close
End With
Set adoConn = Nothing
'remove the mdb file'
Kill "c:\testdb.mdb"
End Sub
Function toArray(from_WSht As Worksheet) As Variant
Dim strPath As String, myRng As Range, rw As Range, c As Range
Dim i As Long, j As Long, dt As Variant
Set myRng = from_WSht.Range("a1").CurrentRegion
If not myRng.Rows.Count > 1 Then GoTo errHdr
ReDim dt(myRng.Rows.Count - 1, myRng.Columns.Count - 1) As Variant
i = 0
For Each rw In myRng.Rows
If rw.Row > 1 Then
j = 0
For Each c In rw.Cells
dt(i, j) = c.Value
j = j + 1
Next c
i = i + 1
End If
Next rw
toArray = dt
Exit Function
errHdr:
toArray = 0
End Function