Search code examples
phpcsvphp-7array-mergefputcsv

PHP CSV merge two CSV files when there is same data in both files' rows


I have csv01 like that:

A    |  B   |   C     |  D 
-----------------------------
905  |  bla |   meh?  |  na
16   |  meh |   meh?  |  ni
4    |  bla |   meh?  |  put

I have csv02 like that:

A    |  Z   |   Y     
---------------------
4    |  info |  meh   
16   |  info |  meh   
905  |  info |  meh  

How could I get the info based on column A numbers , which are in both files ?

These numbers are in both files , not in the same order. Sometimes the data is blank so it could out put blank or "null" result for that case.

A    |  B   |   C     |  D     |  Z   
-------------------------------------
905  |  bla |   meh?  |  na   |  info
16   |  meh |   meh?  |  ni   |  info
4    |  bla |   meh?  |  put  |  info

What I'm trying to do is to merge both CSV's ( values of each column when the value is the same ) based on matching number values:905 ,16 ,4...

I tried but not getting any result, it only output some incorrect data on one line:

<?php
   $fh = fopen('csv1.csv', 'r');
        $fhg = fopen('csv2.csv', 'r');
         while (($data = fgetcsv($fh, 0, ",")) !== FALSE) {
            $csv1[3]=$data;
        }
        while (($data = fgetcsv($fhg, 0, ",")) !== FALSE) {
                $csv2[0]=$data;
        }

        for($x=0;$x< count($csv2);$x++)
        {
            if($x==0){
                unset($csv1[0][3]);
                $line[$x]=array_merge($csv2[0],$csv1[3]); //header
            }
            else{
                $deadlook=0;
                for($y=0;$y <= count($csv1);$y++)
                {
                    if($csv1[$y][3] == $csv2[$x][0]){
                        unset($csv1[$y][3]);
                        $line[$x]=array_merge($csv2[$x],$csv1[$y]);
                        $deadlook=1;
                    }
                }
                if($deadlook==0)
                    $line[$x]=$csv2[$x];
            }
        }

        $fp = fopen('final.csv', 'w');//output file set here

        foreach ($line as $fields) {
            fputcsv($fp, $fields);
        }
        fclose($fp);
?>

Have been trying to use fgetcsv and used keys. My code is not working at all I give up . Any ideas on how I could achieve this ?


Solution

  • This can be done relatively easy in PHP, no need for Python's Pandas.

    a.csv:

    905,bla,meh?,na3
    16,meh,meh?,ni2
    4,bla,meh?,put1
    

    b.csv:

    4,info,meh1
    16,info,meh2
    905,info,meh3
    
    <?php
    
    // load both files to array
    $a = array_map('str_getcsv', file('a.csv'));
    $b = array_map('str_getcsv', file('b.csv'));
    
    var_dump($a, $b);
    
    // index both arrays by the column you want to merge by
    $mergeColumn = 0;
    $a = array_combine(array_column($a, $mergeColumn), $a);
    $b = array_combine(array_column($b, $mergeColumn), $b);
    
    var_dump($a, $b);
    
    // construct combined array by combining same indexes from both arrays
    // and merging the values (skip merge column from second file so it is not doubled)
    $c = [];
    foreach ($a as $k => $dataA) {
      unset($b[$k][$mergeColumn]);
      $c[$k] = array_merge($dataA, $b[$k]);
    }
    
    var_dump($c);
    
    // put it to output csv file
    $fp = fopen('c.csv', 'w');
    foreach ($c as $row) {
        fputcsv($fp, $row);
    }
    fclose($fp);
    

    Output:

    // a.csv as array
    array(3) {
      [0]=>
      array(4) {
        [0]=>
        string(3) "905"
        [1]=>
        string(3) "bla"
        [2]=>
        string(4) "meh?"
        [3]=>
        string(3) "na3"
      }
      [1]=>
      array(4) {
        [0]=>
        string(2) "16"
        [1]=>
        string(3) "meh"
        [2]=>
        string(4) "meh?"
        [3]=>
        string(3) "ni2"
      }
      [2]=>
      array(4) {
        [0]=>
        string(1) "4"
        [1]=>
        string(3) "bla"
        [2]=>
        string(4) "meh?"
        [3]=>
        string(4) "put1"
      }
    }
    
    // b.csv as array
    array(3) {
      [0]=>
      array(3) {
        [0]=>
        string(1) "4"
        [1]=>
        string(4) "info"
        [2]=>
        string(4) "meh1"
      }
      [1]=>
      array(3) {
        [0]=>
        string(2) "16"
        [1]=>
        string(4) "info"
        [2]=>
        string(4) "meh2"
      }
      [2]=>
      array(3) {
        [0]=>
        string(3) "905"
        [1]=>
        string(4) "info"
        [2]=>
        string(4) "meh3"
      }
    }
    
    // a.csv keyed by merge column
    array(3) {
      [905]=>
      array(4) {
        [0]=>
        string(3) "905"
        [1]=>
        string(3) "bla"
        [2]=>
        string(4) "meh?"
        [3]=>
        string(3) "na3"
      }
      [16]=>
      array(4) {
        [0]=>
        string(2) "16"
        [1]=>
        string(3) "meh"
        [2]=>
        string(4) "meh?"
        [3]=>
        string(3) "ni2"
      }
      [4]=>
      array(4) {
        [0]=>
        string(1) "4"
        [1]=>
        string(3) "bla"
        [2]=>
        string(4) "meh?"
        [3]=>
        string(4) "put1"
      }
    }
    
    // b.csv keyed by merge column
    array(3) {
      [4]=>
      array(3) {
        [0]=>
        string(1) "4"
        [1]=>
        string(4) "info"
        [2]=>
        string(4) "meh1"
      }
      [16]=>
      array(3) {
        [0]=>
        string(2) "16"
        [1]=>
        string(4) "info"
        [2]=>
        string(4) "meh2"
      }
      [905]=>
      array(3) {
        [0]=>
        string(3) "905"
        [1]=>
        string(4) "info"
        [2]=>
        string(4) "meh3"
      }
    }
    
    // combined array
    array(3) {
      [905]=>
      array(6) {
        [0]=>
        string(3) "905"
        [1]=>
        string(3) "bla"
        [2]=>
        string(4) "meh?"
        [3]=>
        string(3) "na3"
        [4]=>
        string(4) "info"
        [5]=>
        string(4) "meh3"
      }
      [16]=>
      array(6) {
        [0]=>
        string(2) "16"
        [1]=>
        string(3) "meh"
        [2]=>
        string(4) "meh?"
        [3]=>
        string(3) "ni2"
        [4]=>
        string(4) "info"
        [5]=>
        string(4) "meh2"
      }
      [4]=>
      array(6) {
        [0]=>
        string(1) "4"
        [1]=>
        string(3) "bla"
        [2]=>
        string(4) "meh?"
        [3]=>
        string(4) "put1"
        [4]=>
        string(4) "info"
        [5]=>
        string(4) "meh1"
      }
    }
    

    and c.csv (outcome):

    905,bla,meh?,na3,info,meh3
    16,meh,meh?,ni2,info,meh2
    4,bla,meh?,put1,info,meh1
    

    Above solution works if both files have the same identifiers that merging is performed on (first column) If there can be a row that doesnt have a match in other csv file then another approach is needed.

    a.csv:

    A,B,C,D
    905,bla,meh?,na3
    16,meh,meh?,ni2
    4,bla,meh?,put1
    1,a,b,c
    

    b.csv:

    A,Z,Y
    4,info,meh1
    16,info,meh2
    905,info,meh3
    2,d,e
    

    a.csv contains a row with A: 1, there is no match for that in b.csv. b.csv contains a row with A: 2, there is no match for that in a.csv. We expect result csv to have 5 rows (4, 16, 905 - common + 1 with empty Z,Y + 2 with empty B,C,D).

    code:

    <?php
    
    $a = parseCsv('a.csv');
    $b = parseCsv('b.csv');
    
    $allHeaders = array_unique(array_merge($a['header'], $b['header']));
    
    $mergeColumn = 'A';
    $a['rows'] = array_combine(array_column($a['rows'], $mergeColumn), $a['rows']);
    $b['rows'] = array_combine(array_column($b['rows'], $mergeColumn), $b['rows']);
    
    $allIndexes = array_unique(array_merge(array_column($a['rows'], $mergeColumn), array_column($b['rows'], $mergeColumn)));
    
    $c = [];
    foreach ($allIndexes as $index) {
      $row = [];
      foreach ($allHeaders as $header) {
        $row[$header] = '';
        if (isset($a['rows'][$index][$header])) {
          $row[$header] = $a['rows'][$index][$header];
        } elseif (isset($b['rows'][$index][$header])) {
          $row[$header] = $b['rows'][$index][$header];
        }
      }
    
      $c[$index] = $row;
    }
    
    var_dump($c);
    
    $fp = fopen('c.csv', 'w');
    fputcsv($fp, $allHeaders);
    foreach ($c as $row) {
        fputcsv($fp, $row);
    }
    fclose($fp);
    
    
    function parseCsv(string $file): array {
      $rows = array_map('str_getcsv', file($file));
      $header = array_shift($rows);
      $csv = [];
      foreach($rows as $row) {
        $csv[] = array_combine($header, $row);
      }
    
      return ['header' => $header, 'rows' => $csv];
    }
    

    Output:

    array(5) {
      [905]=>
      array(6) {
        ["A"]=>
        string(3) "905"
        ["B"]=>
        string(3) "bla"
        ["C"]=>
        string(4) "meh?"
        ["D"]=>
        string(3) "na3"
        ["Z"]=>
        string(4) "info"
        ["Y"]=>
        string(4) "meh3"
      }
      [16]=>
      array(6) {
        ["A"]=>
        string(2) "16"
        ["B"]=>
        string(3) "meh"
        ["C"]=>
        string(4) "meh?"
        ["D"]=>
        string(3) "ni2"
        ["Z"]=>
        string(4) "info"
        ["Y"]=>
        string(4) "meh2"
      }
      [4]=>
      array(6) {
        ["A"]=>
        string(1) "4"
        ["B"]=>
        string(3) "bla"
        ["C"]=>
        string(4) "meh?"
        ["D"]=>
        string(4) "put1"
        ["Z"]=>
        string(4) "info"
        ["Y"]=>
        string(4) "meh1"
      }
      [1]=>
      array(6) {
        ["A"]=>
        string(1) "1"
        ["B"]=>
        string(1) "a"
        ["C"]=>
        string(1) "b"
        ["D"]=>
        string(1) "c"
        ["Z"]=>
        string(0) ""
        ["Y"]=>
        string(0) ""
      }
      [2]=>
      array(6) {
        ["A"]=>
        string(1) "2"
        ["B"]=>
        string(0) ""
        ["C"]=>
        string(0) ""
        ["D"]=>
        string(0) ""
        ["Z"]=>
        string(1) "d"
        ["Y"]=>
        string(1) "e"
      }
    }
    

    c.csv:

    905,bla,meh?,na3,info,meh3
    16,meh,meh?,ni2,info,meh2
    4,bla,meh?,put1,info,meh1
    1,a,b,c,,
    2,,,,d,e