合并具有多个变量的 2 个数据集(不能只使用相似的变量)

Merging 2 datasets with multiple variables (Cannot use just similar variables)

我正在尝试合并 2 个数据集(150,000 条记录和 50,000 条记录),每个数据集大约有 50 个变量,其中一些可能匹配。两个数据集中的一个共同变量是 'Incident date' 但我不能使用它,因为数据集有大约 300 起事件发生在那个特定日期(按地址、城市、县、邮编、时间紧急医疗服务细分) (EMS)已收到通知)。另一个数据集有事件发生的确切时间、地址、城市、县、邮政编码和其他一些字段,但如果信息不是 known/recorded,这些字段可能为空。

我想创建一个缓冲区来按每个字段连接数据集。例如,首先从事件发生的日期开始(没有缺失值),如果它们相同,下一步就是检查它们是否发生在同一个县市等。 (某些值可能为空)。比较的最后一个字段是 EMS 收到通知的时间(事件发生后最多 30 - 60 分钟)。如果所有字段都匹配,则最终缓冲时间为 30 - 60 分钟。这将是多对一的合并(50,000 到 150,000)。

哪个程序可以让我这样做?有一定的代码吗?

我添加了两个数据集的片段 (https://filedropper.com/filemanager/public.php?service=files&t=0f2d129b1622901fafc8c9e678433623&download) and (https://filedropper.com/filemanager/public.php?service=files&t=642c840bc3e431c3d4d839a71bb66944&download)

预期输出看起来像这样

使用的代码是:

T1 = readtable('dataset1.csv')
T2 = readtable('dataset2.csv')
LT1 = size(T1,1);
LT2 = size(T2,1);
T1 = [T1, cell2table(repmat({''}, LT1, 7),'VariableNames', {'County_Name', 'City_Name', 'Town_Name','CrashTime', 'SecondaryLocation', 'RouteName', 'PostalCityName'})]
augmented = false(LT1,1);
dtstr = 'MM/dd/yyyy HH:mm';
trange = duration([0,0,0;1,0,0]);
for tt2 = 1:LT2
cdate2 = T2.CrashDate{tt2};
crasht2 = T2.CrashDateTime{tt2};
assert(~isempty(cdate2) & ~isempty(crasht2),'Major data missing')
crashdt2 = [cdate2, ' ', crasht2];
crashdt2 = datetime(crashdt2,'InputFormat',dtstr);
strtaddr2 = T2.RouteName{tt2};
if ~isempty(strtaddr2)
strtaddr2 = upper(strtaddr2);
strtaddr2 = strrep(strtaddr2,'ROAD','RD');
strtaddr2 = strtaddr2(isletter(strtaddr2));
end
pcityn2 = T2.PostalCityName{tt2};
if ~isempty(pcityn2)
pcityn2 = upper(pcityn2);
pcityn2 = pcityn2(isletter(pcityn2));
end
countyn2 = T2.County_Name{tt2};
if ~isempty(countyn2)
countyn2 = countyn2(isletter(countyn2));
countyn2 = upper(countyn2);
countyn2 = strrep(countyn2,'COUNTY','');
end
for tt1 = 1:LT1
if augmented(tt1)
continue
end
matchvec = true(5,1);
cdate1 = T1.IncidentDate{tt1};
matchvec(1) = strcmp(cdate1, cdate2);
strtaddr1 = upper(T1.AddressStreet{tt1});
if ~isempty(strtaddr2) && ~isempty(strtaddr1)
strtaddr1 = strrep(strtaddr1,'ROAD','RD');
strtaddr1 = strtaddr1(isletter(strtaddr1));
matchvec(2) = strcmp(strtaddr1,strtaddr2);
end
pcityn1 = upper(T1.AddressCityIncident{tt1});
pcityn1 = pcityn1(isletter(pcityn1));
if ~isempty(pcityn2) && ~isempty(pcityn1)
pcityn1 = pcityn1(isletter(pcityn1));
matchvec(3) = strcmp(pcityn1,pcityn2);
end
countyn1 = upper(T1.AddressCountyIncident{tt1});
countyn1 = countyn1(isletter(countyn1));
if ~isempty(countyn2) && ~isempty(countyn1)
countyn1 = countyn1(isletter(countyn1));
matchvec(4) = strcmp(countyn1,countyn2);
end
crashdt1u = T1.UnitNotified{tt1};
crashdt1d = T1.Date12_DispatchNotified{tt1};
if ~isempty(crashdt1u) || ~isempty(crashdt1d)
tmatch = true(2,1);
if ~isempty(crashdt1u)
crashdt1u = datetime(crashdt1u,'InputFormat',dtstr);
difcrdt1d = crashdt1d-crashdt2;
tmatch = difcrdt1d >= trange(1) && difcrdt1d <= trange(2);
end
matchvec(5) = all(tmatch);
end
if all(matchvec)
T1{tt1,{'County_Name', 'City_Name', 'Town_Name','CrashTime', 'SecondaryLocation', 'RouteName','PostalCityName'}} = table2cell( T2(tt2,{'County_Name', 'City_Name','Town_Name', 'CrashTime', 'SecondaryLocation','RouteName', 'PostalCityName'}) );
augmented(tt1)=true;
else
T1(tt1,:)
T2(tt2,:)
matchvec
end
end
end
T1

编辑:优化代码以提高性能;预计大量数据。

OP 注意事项:您的原始数据有很多错误。 csv 文件中的实际数据中的任何地方都不允许使用逗号。某些字符串(发现 1 个单位通知时间)没有预定义格式。 try 块处理一种特殊情况;如果所有字段都存在缺陷数据,则应在所有字段中实施 try。所有这些都应该在合并之前解决。

clear;clc;close all

T1 = readtable('dataset1.csv');
T2 = readtable('dataset2.csv');
T1 = T1(1:1000,:);
T2 = T2(1:900,:);
LT1 = size(T1,1);
LT2 = size(T2,1);
% expand T1 for expansion
T1 = [T1, cell2table(repmat({''}, LT1, 7), ....
    'VariableNames', {'County_Name', 'City_Name', 'Town_Name', ....
    'CrashTime', 'SecondaryLocation', 'RouteName', 'PostalCityName'})];

augmented = false(LT1,1); % see usage below
dtstr = 'MM/dd/yyyy HH:mm';
trange = duration([-1,0,0;1,0,0]); % 0 to 1 hour
strtaddrcmpf = @(c1,c2) cellfun(@(s2) ....
    cellfun(@(s1) ....
    ~(isempty(strfind(s1,s2)) | isempty(strfind(s2,s1))), ....
    c1), ....
    c2);
% buffer original to speed up
fprintf('Pre-processing started at %s \n', datestr(datetime('now')))
T1B = cell2table([repmat({''}, LT1, 5), repmat({true}, LT1, 4)], ....
    'VariableNames', {'CrashDTU','CrashDTD',  ....
    'StrtAdd','PoCityN', 'CountyN', ....
    'CrashDTFlg', 'StrtAddFlg', 'PoCityNFlg', 'CountyNFlg'});
T2B = cell2table([repmat({''}, LT2, 4), repmat({true}, LT2, 3)], ....
    'VariableNames', {'CrashDT', 'StrtAdd', 'PoCityN', 'CountyN', ....
    'StrtAddFlg', 'PoCityNFlg', 'CountyNFlg'});

fprintf('Progress:        ')
for tt2 = 1:LT2
    fprintf('%s',repmat(sprintf('\b'),1,length('Progress:        ')))
    fprintf('Progress: %6.2f%%', tt2/LT2*50);

    cdate2 = T2.CrashDate{tt2};
    crasht2 = T2.CrashTime{tt2};
    assert(~isempty(cdate2) & ~isempty(crasht2),'Major data missing')
    crashdt2 = [cdate2, ' ', crasht2];
    T2B.CrashDT{tt2} = datetime(crashdt2,'InputFormat',dtstr);

    strtaddr2 = T2.RouteName{tt2};
    if ~isempty(strtaddr2)
        strtaddr2 = upper(strtaddr2);
        strtaddr2 = strrep(strtaddr2,'ROAD','RD'); % repeat for HWY ST etc
        strtaddr2 = strsplit(strtaddr2,'/');
        switch true
            case strfind(strtaddr2,'I95')
                strtaddr2 = {'I95'};
            case strfind(strtaddr2,'I495')
                strtaddr2 = {'I495'};
            otherwise
                strtaddr2 = cellfun(@(s) s(isletter(s)), ....
                    strtaddr2, 'Uniform',false);
        end
        T2B.StrtAdd{tt2} = strtaddr2;
    else
        T2B.StrtAddFlg(tt2) = false;
    end

    pcityn2 = T2.PostalCityName{tt2};
    if ~isempty(pcityn2)
        pcityn2 = upper(pcityn2);
        pcityn2 = pcityn2(isletter(pcityn2));
        T2B.PoCityN{tt2} = pcityn2;
    else
        T2B.PoCityNFlg(tt2) = false;
    end

    countyn2 = T2.County_Name{tt2};
    if ~isempty(countyn2)
        countyn2 = upper(countyn2);
        countyn2 = countyn2(isletter(countyn2));
        countyn2 = strrep(countyn2,'COUNTY','');
        T2B.CountyN{tt2} = countyn2;
    else
        T2B.CountyNFlg(tt2) = false;
    end
end
for tt1 = 1:LT1
    fprintf('%s',repmat(sprintf('\b'),1,length('Progress:        ')))
    fprintf('Progress: %6.2f%%', tt1/LT1*50+50);

    strtaddr1 = upper(T1.AddressStreet{tt1});
    if ~isempty(strtaddr1)
        strtaddr1 = strrep(strtaddr1,'ROAD','RD');
        strtaddr1 = strsplit(strtaddr1,'/');
        switch true
            case strfind(strtaddr1,'I95')
                strtaddr1 = {'I95'};
            case strfind(strtaddr1,'I495')
                strtaddr1 = {'I495'};
            otherwise
                strtaddr1 = cellfun(@(s) s(isletter(s)), ....
                    strtaddr1, 'Uniform',false);
        end
        T1B.StrtAdd{tt1} = strtaddr1;
    else
        T1B.StrtAddFlg(tt1) = false;
    end

    pcityn1 = upper(T1.AddressCityIncident{tt1});
    if ~isempty(pcityn1)
        pcityn1 = pcityn1(isletter(pcityn1));
        T1B.PoCityN{tt1} = pcityn1;
    else
        T1B.PoCityNFlg(tt1) = false;
    end

    countyn1 = upper(T1.AddressCountyIncident{tt1});
    if ~isempty(countyn1)
        countyn1 = countyn1(isletter(countyn1));
        countyn1 = strrep(countyn1,'COUNTY','');
        T1B.CountyN{tt1} = countyn1;
    else
        T1B.CountyNFlg(tt1) = false;
    end

    crashdt1u = T1.UnitNotified{tt1};
    crashdt1d = T1.DispatchNotified{tt1};
    if ~isempty(crashdt1u) || ~isempty(crashdt1d)
        tmatch = true(2,1);
%         a little dirty here, need both date and time
        try
            if ~isempty(crashdt1u)
                crashdt1u = datetime(crashdt1u,'InputFormat',dtstr);
                T1B.CrashDTU{tt1} = crashdt1u;
            end
            if ~isempty(crashdt1d)
                crashdt1d = datetime(crashdt1d,'InputFormat',dtstr);
                T1B.CrashDTD{tt1} = crashdt1d;
            end
        catch
            T1B.CrashDTFlg(tt1) = false;
        end
    else
        T1B.CrashDTFlg(tt1) = false;
    end
end
fprintf('%s',repmat(sprintf('\b'),1,length('Progress:        ')))
fprintf('Pre-processing finished at %s \n', ....
    datestr(datetime('now')))

fprintf('Matching started at %s \n', datestr(datetime('now')))

% process data
fprintf('Progress:        ')
for tt2 = 1:LT2
    fprintf('%s',repmat(sprintf('\b'),1,length('Progress:        ')))
    fprintf('Progress: %6.2f%%', tt2/LT2*100);
%     extract a row for comparison
    crashdt2 = T2B.CrashDT{tt2};
    strtaddr2 = T2B.StrtAdd{tt2};
    pcityn2 = T2B.PoCityN{tt2};
    countyn2 = T2B.CountyN{tt2};

    for tt1 = 1:LT1
        if augmented(tt1) % match already found, skip
            continue
        end

%         Boolean comparison: treat missing data as identical
        cdate1 = T1.IncidentDate{tt1};
        match1 = strcmp(cdate1, cdate2); % incident date
        if ~match1
            continue
        end

        if T2B.StrtAddFlg(tt2) && T1B.StrtAddFlg(tt1) % put 2 first: faster
            strtaddr1 = T1B.StrtAdd{tt1};
            strtaddr_cmp = strtaddrcmpf(strtaddr2,strtaddr1);
            match2 = any(strtaddr_cmp); % street name match
        end
        if ~match2
            continue
        end

        if T2B.PoCityNFlg(tt2) && T1B.PoCityNFlg(tt1)
            pcityn1 = T1B.PoCityN{tt1};
            match3 = strcmp(pcityn1,pcityn2); % postal city name match
        end
        if ~match3
            continue
        end

        if T2B.CountyNFlg(tt2) && T1B.CountyNFlg(tt1)
            countyn1 = T1B.CountyN{tt1};
            countyn1 = countyn1(isletter(countyn1));
            countyn1 = strrep(countyn1,'COUNTY','');
            match4 = strcmp(countyn1,countyn2); % county name match
        end
        if ~match4
            continue
        end

        if T1B.CrashDTFlg(tt1)
            crashdt1u = T1B.CrashDTU{tt1};
            crashdt1d = T1B.CrashDTD{tt1};
%             a little dirty here, need both date and time
            if ~isempty(crashdt1u)
                difcrdt1u = crashdt1u-crashdt2;
                tmatch1 = difcrdt1u >= trange(1) && difcrdt1u <= trange(2);
            end
            if ~isempty(crashdt1d)
                difcrdt1d = crashdt1d-crashdt2;
                tmatch2 = difcrdt1d >= trange(1) && difcrdt1d <= trange(2);
            end
            match5 = tmatch1 & tmatch2;
        end
        if ~match5
            continue
        end

%         append row in T2 to T1
        T1{tt1,{'County_Name', 'City_Name', 'Town_Name', ....
            'CrashTime', 'SecondaryLocation', 'RouteName', ....
            'PostalCityName'}} = ....
            table2cell( T2(tt2,{'County_Name', 'City_Name', ....
            'Town_Name', 'CrashTime', 'SecondaryLocation', ....
            'RouteName', 'PostalCityName'}) );
        augmented(tt1) = true;
%         break % assume unique matching
    end
end

fprintf('%s',repmat(sprintf('\b'),1,length('Progress:        ')))
fprintf('Matching finished at %s \nTotalling %d matches. \n', ....
    datestr(datetime('now')), sum(augmented))

编辑:随着OP新上传的数据集,涵盖了更多案例。

  • 'GEORGETOWN PIKE/CENTRILLION DR' 等交叉路口应与 'GEORGETOWN PIKE''CENTRILLION DR' 匹配。
  • 州际公路名称,如 'I95',其名称中包含数字,应与街道号码区分开来。
  • 州际公路名称有时包含应忽略的详细位置。 (并查看其他信息)

添加了进度显示。


编辑:我忘记使用 augmented 记录来加快速度。此外,在最后添加了调试部分,以便查看匹配过程中哪些条件不满足。


这是一个在 Matlab 中使用 table class 的解决方案。由于这是一项相当新的功能,因此在不同版本的 Matlab 中进行编程可能会有所不同。我正在使用 R2015b。

要点:

  1. 对于数据集 2 中的每一行,查找数据集 1 中所有行的匹配项。
  2. 如果记录的任何内容不匹配,请跳过。除此以外, 认为它们属于同一事件。
  3. 将数据集 2 中的其他内容附加到 1。

带有注释的示例代码:

(obsolete)

我从 Matlab 收到这条消息

Warning: Variable names were modified to make them valid MATLAB identifiers.

因此您可能需要根据需要更改表中的列名。


这些是从您的 csv 文件导入的原始数据集

(obsolete)

示例输出:

(obsolete)

新数据集和输出:

>> T1

T1 = 

    IncidentDate                   AddressStreet                   AddressCityIncident    AddressCountyIncident    AddressState    IncidentPostalCode    DispatchNotified      UnitNotified  
    ____________    ___________________________________________    ___________________    _____________________    ____________    __________________    ________________    ________________

    '1/1/2014'      'BURKE LAKE RD/BURKE RD'                       'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 1:33'     '1/1/2014 1:33' 
    '1/1/2014'      'BURKE LAKE RD/BURKE RD'                       'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 1:33'     '1/1/2014 1:33' 
    '1/1/2014'      'I95 SB TO OLD KEENE MILL RD'                  'SPRINGFIELD'          'Fairfax County'         'VA'            22150                 '1/1/2014 2:00'     '1/1/2014 2:00' 
    '1/1/2014'      'SYDENSTRICKER RD/OLD KEENE MILL RD'           'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 4:54'     '1/1/2014 4:54' 
    '1/1/2014'      'RT28 SB THRU RAMP/RT28 SB RAMP TO RT50 WB'    'CHANTILLY'            'Fairfax County'         'VA'            20151                 '1/1/2014 12:28'    '1/1/2014 12:28'
    '1/1/2014'      '11700 SWARTS DR'                              'FAIRFAX'              'Fairfax County'         'VA'            22030                 '1/1/2014 13:07'    '1/1/2014 13:07'
    '1/1/2014'      '11700 SWARTS DR'                              'FAIRFAX'              'Fairfax County'         'VA'            22030                 '1/1/2014 13:07'    '1/1/2014 13:07'
    '1/1/2014'      'CENTREVILLE RD/BRADENTON DR'                  'CENTREVILLE'          'Fairfax County'         'VA'            20121                 '1/1/2014 13:41'    '1/1/2014 13:41'
    '1/1/2014'      'GEORGETOWN PIKE/CENTRILLION DR'               'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:45'    '1/1/2014 16:45'
    '1/1/2014'      'GEORGETOWN PIKE/CENTRILLION DR'               'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:42'    '1/1/2014 16:42'
    '1/1/2014'      '8526 GEORGETOWN PIKE'                         'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:49'    '1/1/2014 16:49'
    '1/1/2014'      'OX RD/BRADDOCK RD'                            'FAIRFAX'              'Fairfax County'         'VA'            22032                 '1/1/2014 22:32'    '1/1/2014 22:32'

>> T2

T2 = 

    CrashDate       County_Name       City_Name    Town_Name    CrashTime        SecondaryLocation              RouteName         PostalCityName
    __________    ________________    _________    _________    _________    __________________________    ___________________    ______________

    '1/1/2014'    'Fairfax County'    NaN          NaN          '6:35'       ''                            'I95'                  'LORTON'      
    '1/1/2014'    'Fairfax County'    NaN          NaN          '5:19'       ''                            'I95 RAMP'             'SPRINGFIELD' 
    '1/1/2014'    'Fairfax County'    NaN          NaN          '10:23'      ''                            'I495'                 'ANNANDALE'   
    '1/1/2014'    'Fairfax County'    NaN          NaN          '2:08'       ''                            'BUILDERS RD'          'HERNDON'     
    '1/1/2014'    'Fairfax County'    NaN          NaN          '16:42'      ''                            'GEORGETOWN PIKE'      'MCLEAN'      
    '1/1/2014'    'Fairfax County'    NaN          NaN          '20:55'      'LEESBURG PIKE'               'WILSON BLVD'          'FALLS CHURCH'
    '1/1/2014'    'Fairfax County'    NaN          NaN          '4:54'       ''                            'SYDENSTRICKER RD'     'BURKE'       
    '1/1/2014'    'Fairfax County'    NaN          NaN          '2:34'       'BEACON HILL RD'              'RICHMOND HWY'         'ALEXANDRIA'  
    '1/1/2014'    'Fairfax County'    NaN          NaN          '2:00'       ''                            'COAT RIDGE RD'        'HERNDON'     
    '1/1/2014'    'Fairfax County'    NaN          NaN          '13:17'      ''                            'OLD KEENE MILL RD'    'BURKE'       
    '1/1/2014'    'Fairfax County'    NaN          NaN          '5:19'       'MCLEAREN RD'                 'CENTREVILLE RD'       'HERNDON'     
    '1/1/2014'    'Fairfax County'    NaN          NaN          '21:48'      'VIRGINIA CENTER BLVD'        'VADEN DR'             'VIENNA'      
    '1/1/2014'    'Fairfax County'    NaN          NaN          '19:59'      'FAIRFAX COUNTY PKWY RAMP'    'LEE HWY RAMP'         'FAIRFAX'     
    '1/1/2014'    'Fairfax County'    NaN          NaN          '2:36'       ''                            'I95'                  'SPRINGFIELD' 
    '1/1/2014'    'Fairfax County'    NaN          NaN          '20:36'      'MOUNT GILEAD RD'             'BRADDOCK RD'          'CENTREVILLE' 
    '1/1/2014'    'Fairfax County'    NaN          NaN          '1:46'       ''                            'I95'                  'LORTON'      
    '1/1/2014'    'Fairfax County'    NaN          NaN          '18:45'      ''                            'I495'                 'HAMPTON'     
    '1/1/2014'    'Fairfax County'    NaN          NaN          '13:40'      'BRADENTON DR'                'CENTREVILLE RD'       'CENTREVILLE' 
    '1/1/2014'    'Fairfax County'    NaN          NaN          '17:24'      'SHREVE HILL RD'              'IDYLWOOD RD'          'DUNN LORING' 
    '1/1/2014'    'Fairfax County'    NaN          NaN          '17:46'      'SACRAMENTO DR'               'RICHMOND HWY'         'ALEXANDRIA'  
    '1/1/2014'    'Fairfax County'    NaN          NaN          '1:40'       ''                            'WINBOURNE RD'         'BURKE'       
    '1/1/2014'    'Fairfax County'    NaN          NaN          '1:33'       ''                            'BURKE LAKE RD'        'BURKE'       
    '1/1/2014'    'Fairfax County'    NaN          NaN          '15:44'      'TELEGRAPH RD'                'FRANCONIA RD'         'ALEXANDRIA'  
    '1/1/2014'    'Fairfax County'    NaN          NaN          '22:19'      'OX RD'                       'BRADDOCK RD'          'FAIRFAX'     
    '1/1/2014'    'Fairfax County'    NaN          NaN          '12:27'      ''                            'SULLY RD'             'HERNDON'     
    '1/1/2014'    'Fairfax County'    NaN          NaN          '11:25'      'MONUMENT DR'                 'LEE HWY'              'FAIRFAX'     



T1 = 

    IncidentDate                   AddressStreet                   AddressCityIncident    AddressCountyIncident    AddressState    IncidentPostalCode    DispatchNotified      UnitNotified        County_Name       City_Name    Town_Name    CrashTime    SecondaryLocation        RouteName         PostalCityName
    ____________    ___________________________________________    ___________________    _____________________    ____________    __________________    ________________    ________________    ________________    _________    _________    _________    _________________    __________________    ______________

    '1/1/2014'      'BURKE LAKE RD/BURKE RD'                       'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 1:33'     '1/1/2014 1:33'     'Fairfax County'    [NaN]        [NaN]        '1:33'       ''                   'BURKE LAKE RD'       'BURKE'       
    '1/1/2014'      'BURKE LAKE RD/BURKE RD'                       'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 1:33'     '1/1/2014 1:33'     'Fairfax County'    [NaN]        [NaN]        '1:33'       ''                   'BURKE LAKE RD'       'BURKE'       
    '1/1/2014'      'I95 SB TO OLD KEENE MILL RD'                  'SPRINGFIELD'          'Fairfax County'         'VA'            22150                 '1/1/2014 2:00'     '1/1/2014 2:00'     ''                  ''           ''           ''           ''                   ''                    ''            
    '1/1/2014'      'SYDENSTRICKER RD/OLD KEENE MILL RD'           'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 4:54'     '1/1/2014 4:54'     'Fairfax County'    [NaN]        [NaN]        '4:54'       ''                   'SYDENSTRICKER RD'    'BURKE'       
    '1/1/2014'      'RT28 SB THRU RAMP/RT28 SB RAMP TO RT50 WB'    'CHANTILLY'            'Fairfax County'         'VA'            20151                 '1/1/2014 12:28'    '1/1/2014 12:28'    ''                  ''           ''           ''           ''                   ''                    ''            
    '1/1/2014'      '11700 SWARTS DR'                              'FAIRFAX'              'Fairfax County'         'VA'            22030                 '1/1/2014 13:07'    '1/1/2014 13:07'    ''                  ''           ''           ''           ''                   ''                    ''            
    '1/1/2014'      '11700 SWARTS DR'                              'FAIRFAX'              'Fairfax County'         'VA'            22030                 '1/1/2014 13:07'    '1/1/2014 13:07'    ''                  ''           ''           ''           ''                   ''                    ''            
    '1/1/2014'      'CENTREVILLE RD/BRADENTON DR'                  'CENTREVILLE'          'Fairfax County'         'VA'            20121                 '1/1/2014 13:41'    '1/1/2014 13:41'    'Fairfax County'    [NaN]        [NaN]        '13:40'      'BRADENTON DR'       'CENTREVILLE RD'      'CENTREVILLE' 
    '1/1/2014'      'GEORGETOWN PIKE/CENTRILLION DR'               'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:45'    '1/1/2014 16:45'    'Fairfax County'    [NaN]        [NaN]        '16:42'      ''                   'GEORGETOWN PIKE'     'MCLEAN'      
    '1/1/2014'      'GEORGETOWN PIKE/CENTRILLION DR'               'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:42'    '1/1/2014 16:42'    'Fairfax County'    [NaN]        [NaN]        '16:42'      ''                   'GEORGETOWN PIKE'     'MCLEAN'      
    '1/1/2014'      '8526 GEORGETOWN PIKE'                         'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:49'    '1/1/2014 16:49'    'Fairfax County'    [NaN]        [NaN]        '16:42'      ''                   'GEORGETOWN PIKE'     'MCLEAN'      
    '1/1/2014'      'OX RD/BRADDOCK RD'                            'FAIRFAX'              'Fairfax County'         'VA'            22032                 '1/1/2014 22:32'    '1/1/2014 22:32'    'Fairfax County'    [NaN]        [NaN]        '22:19'      'OX RD'              'BRADDOCK RD'         'FAIRFAX'