I am trying to merge 2 datasets (150,000 and 50,000 records) which have around 50 variables each, some of which might match. One common variable in both the datasets is the 'Incident date' but I cannot use that, as the datasets have like 300 incidents that took place on that particular date (broken down by address, city, county, zip, time emergency medical services (EMS) was notified). The other data set has the exact time the incident took place, address, city, county, zip and some other fields but these fields maybe blank if the information is not known/recorded.
I would like to create a buffer to join the datasets by each field. For example, first starting with the date of the incident(no missing values), if they are the same, next step would be to check if they took place in the same county, city and so on. (some values might be blank). Final field for comparison would be when EMS was notified (upto 30 - 60 minutes after the incident). If all fields match, then it comes down to this final buffer of 30 - 60 minutes. It would be a many to one merge(50,000 to 150,000).
What program would let me do this? Is there a certain code?
I have added a snippet of both the datasets (https://filedropper.com/filemanager/public.php?service=files&t=0f2d129b1622901fafc8c9e678433623&download) and (https://filedropper.com/filemanager/public.php?service=files&t=642c840bc3e431c3d4d839a71bb66944&download)
Expected output looks something like this
Code used was:
T1 = readtable('dataset1.csv')
T2 = readtable('dataset2.csv')
LT1 = size(T1,1);
LT2 = size(T2,1);
T1 = [T1, cell2table(repmat({''}, LT1, 7),'VariableNames', {'County_Name', 'City_Name', 'Town_Name','CrashTime', 'SecondaryLocation', 'RouteName', 'PostalCityName'})]
augmented = false(LT1,1);
dtstr = 'MM/dd/yyyy HH:mm';
trange = duration([0,0,0;1,0,0]);
for tt2 = 1:LT2
cdate2 = T2.CrashDate{tt2};
crasht2 = T2.CrashDateTime{tt2};
assert(~isempty(cdate2) & ~isempty(crasht2),'Major data missing')
crashdt2 = [cdate2, ' ', crasht2];
crashdt2 = datetime(crashdt2,'InputFormat',dtstr);
strtaddr2 = T2.RouteName{tt2};
if ~isempty(strtaddr2)
strtaddr2 = upper(strtaddr2);
strtaddr2 = strrep(strtaddr2,'ROAD','RD');
strtaddr2 = strtaddr2(isletter(strtaddr2));
end
pcityn2 = T2.PostalCityName{tt2};
if ~isempty(pcityn2)
pcityn2 = upper(pcityn2);
pcityn2 = pcityn2(isletter(pcityn2));
end
countyn2 = T2.County_Name{tt2};
if ~isempty(countyn2)
countyn2 = countyn2(isletter(countyn2));
countyn2 = upper(countyn2);
countyn2 = strrep(countyn2,'COUNTY','');
end
for tt1 = 1:LT1
if augmented(tt1)
continue
end
matchvec = true(5,1);
cdate1 = T1.IncidentDate{tt1};
matchvec(1) = strcmp(cdate1, cdate2);
strtaddr1 = upper(T1.AddressStreet{tt1});
if ~isempty(strtaddr2) && ~isempty(strtaddr1)
strtaddr1 = strrep(strtaddr1,'ROAD','RD');
strtaddr1 = strtaddr1(isletter(strtaddr1));
matchvec(2) = strcmp(strtaddr1,strtaddr2);
end
pcityn1 = upper(T1.AddressCityIncident{tt1});
pcityn1 = pcityn1(isletter(pcityn1));
if ~isempty(pcityn2) && ~isempty(pcityn1)
pcityn1 = pcityn1(isletter(pcityn1));
matchvec(3) = strcmp(pcityn1,pcityn2);
end
countyn1 = upper(T1.AddressCountyIncident{tt1});
countyn1 = countyn1(isletter(countyn1));
if ~isempty(countyn2) && ~isempty(countyn1)
countyn1 = countyn1(isletter(countyn1));
matchvec(4) = strcmp(countyn1,countyn2);
end
crashdt1u = T1.UnitNotified{tt1};
crashdt1d = T1.Date12_DispatchNotified{tt1};
if ~isempty(crashdt1u) || ~isempty(crashdt1d)
tmatch = true(2,1);
if ~isempty(crashdt1u)
crashdt1u = datetime(crashdt1u,'InputFormat',dtstr);
difcrdt1d = crashdt1d-crashdt2;
tmatch = difcrdt1d >= trange(1) && difcrdt1d <= trange(2);
end
matchvec(5) = all(tmatch);
end
if all(matchvec)
T1{tt1,{'County_Name', 'City_Name', 'Town_Name','CrashTime', 'SecondaryLocation', 'RouteName','PostalCityName'}} = table2cell( T2(tt2,{'County_Name', 'City_Name','Town_Name', 'CrashTime', 'SecondaryLocation','RouteName', 'PostalCityName'}) );
augmented(tt1)=true;
else
T1(tt1,:)
T2(tt2,:)
matchvec
end
end
end
T1
Edit: Optimized code for performance; anticipating large amount of data.
Note to OP: Your raw data has many bugs. Commas should not be allowed anywhere in the actual data in the csv file. Some string (found 1 unit notified time) does not have predefined format. The try
block deals with one particular case; if all fields are subject to defective data then try
should be implemented in all fields. All these should be addressed prior to merging.
clear;clc;close all
T1 = readtable('dataset1.csv');
T2 = readtable('dataset2.csv');
T1 = T1(1:1000,:);
T2 = T2(1:900,:);
LT1 = size(T1,1);
LT2 = size(T2,1);
% expand T1 for expansion
T1 = [T1, cell2table(repmat({''}, LT1, 7), ....
'VariableNames', {'County_Name', 'City_Name', 'Town_Name', ....
'CrashTime', 'SecondaryLocation', 'RouteName', 'PostalCityName'})];
augmented = false(LT1,1); % see usage below
dtstr = 'MM/dd/yyyy HH:mm';
trange = duration([-1,0,0;1,0,0]); % 0 to 1 hour
strtaddrcmpf = @(c1,c2) cellfun(@(s2) ....
cellfun(@(s1) ....
~(isempty(strfind(s1,s2)) | isempty(strfind(s2,s1))), ....
c1), ....
c2);
% buffer original to speed up
fprintf('Pre-processing started at %s \n', datestr(datetime('now')))
T1B = cell2table([repmat({''}, LT1, 5), repmat({true}, LT1, 4)], ....
'VariableNames', {'CrashDTU','CrashDTD', ....
'StrtAdd','PoCityN', 'CountyN', ....
'CrashDTFlg', 'StrtAddFlg', 'PoCityNFlg', 'CountyNFlg'});
T2B = cell2table([repmat({''}, LT2, 4), repmat({true}, LT2, 3)], ....
'VariableNames', {'CrashDT', 'StrtAdd', 'PoCityN', 'CountyN', ....
'StrtAddFlg', 'PoCityNFlg', 'CountyNFlg'});
fprintf('Progress: ')
for tt2 = 1:LT2
fprintf('%s',repmat(sprintf('\b'),1,length('Progress: ')))
fprintf('Progress: %6.2f%%', tt2/LT2*50);
cdate2 = T2.CrashDate{tt2};
crasht2 = T2.CrashTime{tt2};
assert(~isempty(cdate2) & ~isempty(crasht2),'Major data missing')
crashdt2 = [cdate2, ' ', crasht2];
T2B.CrashDT{tt2} = datetime(crashdt2,'InputFormat',dtstr);
strtaddr2 = T2.RouteName{tt2};
if ~isempty(strtaddr2)
strtaddr2 = upper(strtaddr2);
strtaddr2 = strrep(strtaddr2,'ROAD','RD'); % repeat for HWY ST etc
strtaddr2 = strsplit(strtaddr2,'/');
switch true
case strfind(strtaddr2,'I95')
strtaddr2 = {'I95'};
case strfind(strtaddr2,'I495')
strtaddr2 = {'I495'};
otherwise
strtaddr2 = cellfun(@(s) s(isletter(s)), ....
strtaddr2, 'Uniform',false);
end
T2B.StrtAdd{tt2} = strtaddr2;
else
T2B.StrtAddFlg(tt2) = false;
end
pcityn2 = T2.PostalCityName{tt2};
if ~isempty(pcityn2)
pcityn2 = upper(pcityn2);
pcityn2 = pcityn2(isletter(pcityn2));
T2B.PoCityN{tt2} = pcityn2;
else
T2B.PoCityNFlg(tt2) = false;
end
countyn2 = T2.County_Name{tt2};
if ~isempty(countyn2)
countyn2 = upper(countyn2);
countyn2 = countyn2(isletter(countyn2));
countyn2 = strrep(countyn2,'COUNTY','');
T2B.CountyN{tt2} = countyn2;
else
T2B.CountyNFlg(tt2) = false;
end
end
for tt1 = 1:LT1
fprintf('%s',repmat(sprintf('\b'),1,length('Progress: ')))
fprintf('Progress: %6.2f%%', tt1/LT1*50+50);
strtaddr1 = upper(T1.AddressStreet{tt1});
if ~isempty(strtaddr1)
strtaddr1 = strrep(strtaddr1,'ROAD','RD');
strtaddr1 = strsplit(strtaddr1,'/');
switch true
case strfind(strtaddr1,'I95')
strtaddr1 = {'I95'};
case strfind(strtaddr1,'I495')
strtaddr1 = {'I495'};
otherwise
strtaddr1 = cellfun(@(s) s(isletter(s)), ....
strtaddr1, 'Uniform',false);
end
T1B.StrtAdd{tt1} = strtaddr1;
else
T1B.StrtAddFlg(tt1) = false;
end
pcityn1 = upper(T1.AddressCityIncident{tt1});
if ~isempty(pcityn1)
pcityn1 = pcityn1(isletter(pcityn1));
T1B.PoCityN{tt1} = pcityn1;
else
T1B.PoCityNFlg(tt1) = false;
end
countyn1 = upper(T1.AddressCountyIncident{tt1});
if ~isempty(countyn1)
countyn1 = countyn1(isletter(countyn1));
countyn1 = strrep(countyn1,'COUNTY','');
T1B.CountyN{tt1} = countyn1;
else
T1B.CountyNFlg(tt1) = false;
end
crashdt1u = T1.UnitNotified{tt1};
crashdt1d = T1.DispatchNotified{tt1};
if ~isempty(crashdt1u) || ~isempty(crashdt1d)
tmatch = true(2,1);
% a little dirty here, need both date and time
try
if ~isempty(crashdt1u)
crashdt1u = datetime(crashdt1u,'InputFormat',dtstr);
T1B.CrashDTU{tt1} = crashdt1u;
end
if ~isempty(crashdt1d)
crashdt1d = datetime(crashdt1d,'InputFormat',dtstr);
T1B.CrashDTD{tt1} = crashdt1d;
end
catch
T1B.CrashDTFlg(tt1) = false;
end
else
T1B.CrashDTFlg(tt1) = false;
end
end
fprintf('%s',repmat(sprintf('\b'),1,length('Progress: ')))
fprintf('Pre-processing finished at %s \n', ....
datestr(datetime('now')))
fprintf('Matching started at %s \n', datestr(datetime('now')))
% process data
fprintf('Progress: ')
for tt2 = 1:LT2
fprintf('%s',repmat(sprintf('\b'),1,length('Progress: ')))
fprintf('Progress: %6.2f%%', tt2/LT2*100);
% extract a row for comparison
crashdt2 = T2B.CrashDT{tt2};
strtaddr2 = T2B.StrtAdd{tt2};
pcityn2 = T2B.PoCityN{tt2};
countyn2 = T2B.CountyN{tt2};
for tt1 = 1:LT1
if augmented(tt1) % match already found, skip
continue
end
% Boolean comparison: treat missing data as identical
cdate1 = T1.IncidentDate{tt1};
match1 = strcmp(cdate1, cdate2); % incident date
if ~match1
continue
end
if T2B.StrtAddFlg(tt2) && T1B.StrtAddFlg(tt1) % put 2 first: faster
strtaddr1 = T1B.StrtAdd{tt1};
strtaddr_cmp = strtaddrcmpf(strtaddr2,strtaddr1);
match2 = any(strtaddr_cmp); % street name match
end
if ~match2
continue
end
if T2B.PoCityNFlg(tt2) && T1B.PoCityNFlg(tt1)
pcityn1 = T1B.PoCityN{tt1};
match3 = strcmp(pcityn1,pcityn2); % postal city name match
end
if ~match3
continue
end
if T2B.CountyNFlg(tt2) && T1B.CountyNFlg(tt1)
countyn1 = T1B.CountyN{tt1};
countyn1 = countyn1(isletter(countyn1));
countyn1 = strrep(countyn1,'COUNTY','');
match4 = strcmp(countyn1,countyn2); % county name match
end
if ~match4
continue
end
if T1B.CrashDTFlg(tt1)
crashdt1u = T1B.CrashDTU{tt1};
crashdt1d = T1B.CrashDTD{tt1};
% a little dirty here, need both date and time
if ~isempty(crashdt1u)
difcrdt1u = crashdt1u-crashdt2;
tmatch1 = difcrdt1u >= trange(1) && difcrdt1u <= trange(2);
end
if ~isempty(crashdt1d)
difcrdt1d = crashdt1d-crashdt2;
tmatch2 = difcrdt1d >= trange(1) && difcrdt1d <= trange(2);
end
match5 = tmatch1 & tmatch2;
end
if ~match5
continue
end
% append row in T2 to T1
T1{tt1,{'County_Name', 'City_Name', 'Town_Name', ....
'CrashTime', 'SecondaryLocation', 'RouteName', ....
'PostalCityName'}} = ....
table2cell( T2(tt2,{'County_Name', 'City_Name', ....
'Town_Name', 'CrashTime', 'SecondaryLocation', ....
'RouteName', 'PostalCityName'}) );
augmented(tt1) = true;
% break % assume unique matching
end
end
fprintf('%s',repmat(sprintf('\b'),1,length('Progress: ')))
fprintf('Matching finished at %s \nTotalling %d matches. \n', ....
datestr(datetime('now')), sum(augmented))
Edit: With newly uploaded data set by OP, more cases are covered.
'GEORGETOWN PIKE/CENTRILLION DR'
should be matched with either 'GEORGETOWN PIKE'
or 'CENTRILLION DR'
. 'I95'
has numerics in its name that should be differentiated from street numbers. Added progress display.
Edit: I forgot to use augmented
record to speed things up. Also, added debugging part at the end in order to see which conditions are not met during a match.
Here is a solution using the table
class in Matlab. Since it's a pretty new feature, programming in different versions of Matlab may vary. I'm using R2015b.
Key points:
Example code with comments:
(obsolete)
I got this message from Matlab
Warning: Variable names were modified to make them valid MATLAB identifiers.
So you may have to change the column names in the tables to your desire.
These are the original datasets imported from your csv files
(obsolete)
Example output:
(obsolete)
New data set and output:
>> T1
T1 =
IncidentDate AddressStreet AddressCityIncident AddressCountyIncident AddressState IncidentPostalCode DispatchNotified UnitNotified
____________ ___________________________________________ ___________________ _____________________ ____________ __________________ ________________ ________________
'1/1/2014' 'BURKE LAKE RD/BURKE RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 1:33' '1/1/2014 1:33'
'1/1/2014' 'BURKE LAKE RD/BURKE RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 1:33' '1/1/2014 1:33'
'1/1/2014' 'I95 SB TO OLD KEENE MILL RD' 'SPRINGFIELD' 'Fairfax County' 'VA' 22150 '1/1/2014 2:00' '1/1/2014 2:00'
'1/1/2014' 'SYDENSTRICKER RD/OLD KEENE MILL RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 4:54' '1/1/2014 4:54'
'1/1/2014' 'RT28 SB THRU RAMP/RT28 SB RAMP TO RT50 WB' 'CHANTILLY' 'Fairfax County' 'VA' 20151 '1/1/2014 12:28' '1/1/2014 12:28'
'1/1/2014' '11700 SWARTS DR' 'FAIRFAX' 'Fairfax County' 'VA' 22030 '1/1/2014 13:07' '1/1/2014 13:07'
'1/1/2014' '11700 SWARTS DR' 'FAIRFAX' 'Fairfax County' 'VA' 22030 '1/1/2014 13:07' '1/1/2014 13:07'
'1/1/2014' 'CENTREVILLE RD/BRADENTON DR' 'CENTREVILLE' 'Fairfax County' 'VA' 20121 '1/1/2014 13:41' '1/1/2014 13:41'
'1/1/2014' 'GEORGETOWN PIKE/CENTRILLION DR' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:45' '1/1/2014 16:45'
'1/1/2014' 'GEORGETOWN PIKE/CENTRILLION DR' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:42' '1/1/2014 16:42'
'1/1/2014' '8526 GEORGETOWN PIKE' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:49' '1/1/2014 16:49'
'1/1/2014' 'OX RD/BRADDOCK RD' 'FAIRFAX' 'Fairfax County' 'VA' 22032 '1/1/2014 22:32' '1/1/2014 22:32'
>> T2
T2 =
CrashDate County_Name City_Name Town_Name CrashTime SecondaryLocation RouteName PostalCityName
__________ ________________ _________ _________ _________ __________________________ ___________________ ______________
'1/1/2014' 'Fairfax County' NaN NaN '6:35' '' 'I95' 'LORTON'
'1/1/2014' 'Fairfax County' NaN NaN '5:19' '' 'I95 RAMP' 'SPRINGFIELD'
'1/1/2014' 'Fairfax County' NaN NaN '10:23' '' 'I495' 'ANNANDALE'
'1/1/2014' 'Fairfax County' NaN NaN '2:08' '' 'BUILDERS RD' 'HERNDON'
'1/1/2014' 'Fairfax County' NaN NaN '16:42' '' 'GEORGETOWN PIKE' 'MCLEAN'
'1/1/2014' 'Fairfax County' NaN NaN '20:55' 'LEESBURG PIKE' 'WILSON BLVD' 'FALLS CHURCH'
'1/1/2014' 'Fairfax County' NaN NaN '4:54' '' 'SYDENSTRICKER RD' 'BURKE'
'1/1/2014' 'Fairfax County' NaN NaN '2:34' 'BEACON HILL RD' 'RICHMOND HWY' 'ALEXANDRIA'
'1/1/2014' 'Fairfax County' NaN NaN '2:00' '' 'COAT RIDGE RD' 'HERNDON'
'1/1/2014' 'Fairfax County' NaN NaN '13:17' '' 'OLD KEENE MILL RD' 'BURKE'
'1/1/2014' 'Fairfax County' NaN NaN '5:19' 'MCLEAREN RD' 'CENTREVILLE RD' 'HERNDON'
'1/1/2014' 'Fairfax County' NaN NaN '21:48' 'VIRGINIA CENTER BLVD' 'VADEN DR' 'VIENNA'
'1/1/2014' 'Fairfax County' NaN NaN '19:59' 'FAIRFAX COUNTY PKWY RAMP' 'LEE HWY RAMP' 'FAIRFAX'
'1/1/2014' 'Fairfax County' NaN NaN '2:36' '' 'I95' 'SPRINGFIELD'
'1/1/2014' 'Fairfax County' NaN NaN '20:36' 'MOUNT GILEAD RD' 'BRADDOCK RD' 'CENTREVILLE'
'1/1/2014' 'Fairfax County' NaN NaN '1:46' '' 'I95' 'LORTON'
'1/1/2014' 'Fairfax County' NaN NaN '18:45' '' 'I495' 'HAMPTON'
'1/1/2014' 'Fairfax County' NaN NaN '13:40' 'BRADENTON DR' 'CENTREVILLE RD' 'CENTREVILLE'
'1/1/2014' 'Fairfax County' NaN NaN '17:24' 'SHREVE HILL RD' 'IDYLWOOD RD' 'DUNN LORING'
'1/1/2014' 'Fairfax County' NaN NaN '17:46' 'SACRAMENTO DR' 'RICHMOND HWY' 'ALEXANDRIA'
'1/1/2014' 'Fairfax County' NaN NaN '1:40' '' 'WINBOURNE RD' 'BURKE'
'1/1/2014' 'Fairfax County' NaN NaN '1:33' '' 'BURKE LAKE RD' 'BURKE'
'1/1/2014' 'Fairfax County' NaN NaN '15:44' 'TELEGRAPH RD' 'FRANCONIA RD' 'ALEXANDRIA'
'1/1/2014' 'Fairfax County' NaN NaN '22:19' 'OX RD' 'BRADDOCK RD' 'FAIRFAX'
'1/1/2014' 'Fairfax County' NaN NaN '12:27' '' 'SULLY RD' 'HERNDON'
'1/1/2014' 'Fairfax County' NaN NaN '11:25' 'MONUMENT DR' 'LEE HWY' 'FAIRFAX'
T1 =
IncidentDate AddressStreet AddressCityIncident AddressCountyIncident AddressState IncidentPostalCode DispatchNotified UnitNotified County_Name City_Name Town_Name CrashTime SecondaryLocation RouteName PostalCityName
____________ ___________________________________________ ___________________ _____________________ ____________ __________________ ________________ ________________ ________________ _________ _________ _________ _________________ __________________ ______________
'1/1/2014' 'BURKE LAKE RD/BURKE RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 1:33' '1/1/2014 1:33' 'Fairfax County' [NaN] [NaN] '1:33' '' 'BURKE LAKE RD' 'BURKE'
'1/1/2014' 'BURKE LAKE RD/BURKE RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 1:33' '1/1/2014 1:33' 'Fairfax County' [NaN] [NaN] '1:33' '' 'BURKE LAKE RD' 'BURKE'
'1/1/2014' 'I95 SB TO OLD KEENE MILL RD' 'SPRINGFIELD' 'Fairfax County' 'VA' 22150 '1/1/2014 2:00' '1/1/2014 2:00' '' '' '' '' '' '' ''
'1/1/2014' 'SYDENSTRICKER RD/OLD KEENE MILL RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 4:54' '1/1/2014 4:54' 'Fairfax County' [NaN] [NaN] '4:54' '' 'SYDENSTRICKER RD' 'BURKE'
'1/1/2014' 'RT28 SB THRU RAMP/RT28 SB RAMP TO RT50 WB' 'CHANTILLY' 'Fairfax County' 'VA' 20151 '1/1/2014 12:28' '1/1/2014 12:28' '' '' '' '' '' '' ''
'1/1/2014' '11700 SWARTS DR' 'FAIRFAX' 'Fairfax County' 'VA' 22030 '1/1/2014 13:07' '1/1/2014 13:07' '' '' '' '' '' '' ''
'1/1/2014' '11700 SWARTS DR' 'FAIRFAX' 'Fairfax County' 'VA' 22030 '1/1/2014 13:07' '1/1/2014 13:07' '' '' '' '' '' '' ''
'1/1/2014' 'CENTREVILLE RD/BRADENTON DR' 'CENTREVILLE' 'Fairfax County' 'VA' 20121 '1/1/2014 13:41' '1/1/2014 13:41' 'Fairfax County' [NaN] [NaN] '13:40' 'BRADENTON DR' 'CENTREVILLE RD' 'CENTREVILLE'
'1/1/2014' 'GEORGETOWN PIKE/CENTRILLION DR' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:45' '1/1/2014 16:45' 'Fairfax County' [NaN] [NaN] '16:42' '' 'GEORGETOWN PIKE' 'MCLEAN'
'1/1/2014' 'GEORGETOWN PIKE/CENTRILLION DR' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:42' '1/1/2014 16:42' 'Fairfax County' [NaN] [NaN] '16:42' '' 'GEORGETOWN PIKE' 'MCLEAN'
'1/1/2014' '8526 GEORGETOWN PIKE' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:49' '1/1/2014 16:49' 'Fairfax County' [NaN] [NaN] '16:42' '' 'GEORGETOWN PIKE' 'MCLEAN'
'1/1/2014' 'OX RD/BRADDOCK RD' 'FAIRFAX' 'Fairfax County' 'VA' 22032 '1/1/2014 22:32' '1/1/2014 22:32' 'Fairfax County' [NaN] [NaN] '22:19' 'OX RD' 'BRADDOCK RD' 'FAIRFAX'