Excel vlookup 多个值并添加重复行

Excel vlookup multiple values and add duplicate rows

我有两个电子表格,如下所示: 我知道如果我从 Spreadsheet2 开始,我可以只进行 vlookup 并获取值。 但我需要从 Spreadsheet1 开始。我需要添加行。这是一个小数据,但我实际拥有的数据很大..(超过 20000 行)。

电子表格 1:

Category    Type    NumItem
Air         B747    10
Ground      TBus1   15
Air         B777    20
Air         A380    5

电子表格 2:

Type    TypeElement    NumEngine
B747    747T1          2
B747    747T2          4
B747    747T3          8
Tbus1   TbusT1         0
B777    777T1          6
B777    777T2          4
A380    380T1          10

我想将这些合并到电子表格中。如您所见,两种匹配的类型,但对于每种类型,我都有多个 "TypeElement"s.

我希望它看起来像

Category    Type    NumItem    TypeElement    NumEngine
Air         B747    10         747T1          2
Air         B747    10         747T2          4
Air         B747    10         747T3          8
Ground      TBus1   15         TbusT1         0
Air         B777    20         777T1          6
Air         B777    20         777T2          4
Air         A380    5          380T1          10

可以使用 Excel 中的函数来完成吗,或者..我必须使用 VBA/Macro 吗? 如果有人知道如何使用 R 完成此操作,请评论 formulas/packages 我应该使用什么。

谢谢!!

一些想法 1)您可以尝试组合行并使用排序函数,通过元素项中的升序或降序值对值进行排序。例如,进行自定义排序或筛选。

2) 您必须选择要如何对值进行分类。按类别?按类型?您可以通过某种方式对项目进行本地分组。设置变量之间的链接。

如@r-schifini 所述,有几个库可用于导入 Excel 文件。这里我使用 readxl 包。要保留第一个电子表格(您的 Spreadsheet1)中的所有行,请在 merge 函数中指定 all.x=TRUE。有关详细信息,请参阅 ?merge。请注意,我在 Spreadsheet1 中又添加了一行 B700.

类型的假数据
library(readxl)
ss1 <- read_excel(path = "spreadsheet1.xlsx", sheet = 1)
ss2 <- read_excel(path = "spreadsheet2.xlsx", sheet = 1)


out <- merge(ss1, ss2, all.x=TRUE)
out
#    Type Category NumItem TypeElement NumEngine
# 1  A380      Air       5       380T1        10
# 2  B700      Air       8        <NA>        NA
# 3  B747      Air      10       747T1         2
# 4  B747      Air      10       747T2         4
# 5  B747      Air      10       747T3         8
# 6  B777      Air      20       777T1         6
# 7  B777      Air      20       777T2         4
# 8 TBus1   Ground      15        <NA>        NA

为什么第 8 行有 NA?这是因为您的类型在 Spreadsheet1 中是 TBus1,而在 Spreadsheet2 中是 Tbus1。为了避免这样的问题,我们可以在合并之前将大小写更改为大写。

ss1$Type <- toupper(ss1$Type)
ss2$Type <- toupper(ss2$Type)
out <- merge(ss1, ss2, all.x=TRUE)
out
#    Type Category NumItem TypeElement NumEngine
# 1  A380      Air       5       380T1        10
# 2  B700      Air       8        <NA>        NA
# 3  B747      Air      10       747T1         2
# 4  B747      Air      10       747T2         4
# 5  B747      Air      10       747T3         8
# 6  B777      Air      20       777T1         6
# 7  B777      Air      20       777T2         4
# 8 TBUS1   Ground      15      TbusT1         0

我使用 VBA 将您的 tb1 和 tb2 移动到 Access 文件 (c:\testdb.mdb)。 然后使用SQL命令加入他们


Sub Main()
    Dim adoxCat As Object, adoConn As Object, adoRst As Object, var As Variant, strSQL As String
    Dim i As Long

'make an empty mdb file'
    If Dir("C:\testdb.mdb") = "" Then
        Set adoxCat = CreateObject("ADOX.catalog")
        adoxCat.Create "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=C:\testdb.mdb;"
        Set adoxCat = Nothing
    Else
        MsgBox "C:\testdb.mdb is existed.", vbCritical
        Exit Sub
    End If
'create an ADO connection'
    On Error Resume Next
    Set adoConn = CreateObject("adodb.connection")
    With adoConn
        .Open "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=C:\testdb.mdb;"
        If .State  1 Then
            MsgBox "Cannot create ADO Connection.", vbCritical
            Set adoConn = Nothing
            Exit Sub
        End If
    End With
'create two Tables in the mdb file.'
    With adoConn
        .Execute "CREATE TABLE tb_1 (Category varchar, Type varchar, NumItem number)"
        .Execute "CREATE TABLE tb_2 (Type varchar, TypeElement varchar, NumEngine number)"

'move data in excel to mdb file'
        var = toArray(Worksheets(1))
        For i = LBound(var, 1) To UBound(var, 1)
            strSQL = "INSERT INTO tb_1 (category, type, NumItem) VALUES("
            strSQL = strSQL & " '" & var(i, 0) & "',"
            strSQL = strSQL & " '" & var(i, 1) & "',"
            strSQL = strSQL & "  " & var(i, 2) & " );"
            .Execute strSQL
        Next i

        var = toArray(Worksheets(2))
        For i = LBound(var, 1) To UBound(var, 1)
            strSQL = "INSERT INTO tb_2 (Type, TypeElement, NumEngine) VALUES("
            strSQL = strSQL & " '" & var(i, 0) & "',"
            strSQL = strSQL & " '" & var(i, 1) & "',"
            strSQL = strSQL & "  " & var(i, 2) & " );"
            .Execute strSQL
        Next i 

'Use SQL Join statement to Join two tables'
        strSQL = "SELECT * FROM tb_1 left join tb_2 on tb_1.type = tb_2.type;"
        Set adoRst = .Execute(strSQL)
'output the result to excel worksheet(3)'
        Worksheets(3).Range("A1").CopyFromRecordset adoRst

        .Close
    End With
    Set adoConn = Nothing
'remove the mdb file'
    Kill "c:\testdb.mdb" 
End Sub


Function toArray(from_WSht As Worksheet) As Variant
    Dim strPath As String, myRng As Range, rw As Range, c As Range
    Dim i As Long, j As Long, dt As Variant

    Set myRng = from_WSht.Range("a1").CurrentRegion
    If not myRng.Rows.Count > 1 Then GoTo errHdr
        ReDim dt(myRng.Rows.Count - 1, myRng.Columns.Count - 1) As Variant
        i = 0
        For Each rw In myRng.Rows
            If rw.Row > 1 Then
                j = 0
                For Each c In rw.Cells
                    dt(i, j) = c.Value
                    j = j + 1
                Next c
                i = i + 1
            End If
        Next rw

    toArray = dt
    Exit Function
errHdr:
    toArray = 0
End Function