Search code examples
excelcsvperlexport-to-csv

How to automatically change the format or encode the excel cell value while writing it to CSV file


My script is breaking when excel cell has double quotes in their value and quotes. I had to explicitly write a function to handle commas in OUTFILE. Is there any way I can provide cell value and automatically it can be encoded to CSV format. example-

  1. cell->value - Student GOT 8 MARKS in math, 7 in Spanish

    Desired Correct CSV format-> "Student GOT 8 MARKS in math, 7 in Spanish".

  2. cell->value - Student GOT 8 MARKS in "math", 7 in "Spanish"

    Desired Correct CSV format-> "Student GOT 8 MARKS in ""math"", 7 in ""Spanish""".

I wrote my function to find COMMAS in cell value and if it exits then put the string in double-quotes. I wanted to avoid it in case there are any built functions of CSV writer.

   #!/home/utils/perl-5.08
   use Text::CSV_XS;
   use Text::CSV;
   use Excel::Writer::XLSX;
   use Spreadsheet::ParseXLSX;
   use CGI qw(:standard);
   use DBI;
   use DBD::CSV;
   
   my $student_excel_file = "";
   my $csv = "";
   
   $student_excel_file='ABC.xlsm';
   
      $csv = $student_excel_file;
      $csv =~ s/.xlsx$/_22june_intermediate_xlsxtocsv.csv/;
      $csv =~ s/.xlsm$/_22june_intermediate_xlsmtocsv.csv/;
      my $parser_1   = Spreadsheet::ParseXLSX->new();
      my $workbook_1 = $parser_1->parse($student_excel_file);
      printf "$csv\n";
      print "writing out the new csv file $csv given prvs xlsm file\n";
      
      my $csv_1 = Text::CSV_XS->new ({ binary => 1, auto_diag => 1, eol => "\r\n", sep_char  => ',' });
      
      open my $fh, ">:encoding(utf-8)", $csv or die "failed to create $csv: $!";
      #open OUTFILE, "> $student_excel_out_csv_file" or die "ERROR: can't the student;'s CSV file:- $student_excel_out_csv_file.\n";
      
      if ( !defined $workbook_1 ) 
      {
         die $parser_1->error(), ".\n";
      }
    
      my $worksheet_1=$workbook_1->worksheet(0);
      my ( $row_min, $row_max ) = $worksheet_1->row_range();
      my ( $col_min, $col_max ) = $worksheet_1->col_range();
      printf("Copyig Sheet: %s from the provided PRVS \n", $worksheet_1->{Name});
      my $reached_end_of_sheet = 0;
      my $concurentEmptyLineCount = 0;
      $col_max=65;
      #$row_max=2;
   my(@heading) = ("CodeA", "CodeB", "Name", "Count", "Pos", "Orientation");
   $csv_1->print($fh, \@heading);
   my(@datarow) = ("A", "B", "Abelone", 3, "(6,9)", "NW");
   $csv_1->print($fh, \@datarow); 
   my(@datarow_1) = ("A", "B", "Abelone", 3, "WORKS - ""what"" - lets", "_2NW");
   $csv_1->print($fh, \@datarow_1);
   for my $worksheet ( $workbook->worksheets() ) {
    
       my ( $row_min, $row_max ) = $worksheet->row_range();
       my ( $col_min, $col_max ) = $worksheet->col_range();
       printf("Sheet: %s\n", $worksheet->{Name});
   
       my $sheet_write = $excel_2->add_worksheet($worksheet->{Name});
       # my $format = $sheet_write->add_format();
   
          for my $row_1 ( 1 .. $row_max ) 
      {
         if($reached_end_of_sheet)
         {
            last;
         }
         for my $col_1 ( $col_min .. $col_max ) 
            {
               my $cell_1 = $worksheet_1->get_cell( $row_1, 0 );
               next unless $cell_1;
               $concurentEmptyLineCount=0;
               my $cell_2 = $worksheet_1->get_cell( $row_1, $col_1);
               my $cell2_value =$cell_2 -> {Val};
               print $cell_2 -> {Val};
               $csv_1->print ($fh, \$cell2_value );
               # if(defined $cell2_value)
               # {
                  # if($cell2_value=~ m/,/)
                  # {
                      # $cell2_value=qq("$cell2_value");
                  # }
                  
                  # printf OUTFILE "%s,", $cell2_value;
               # }
               # else
               # {
                  # printf OUTFILE ",";}
               # }
               my $cell_3 = $worksheet_1->get_cell( $row_1, 0 );
               $concurentEmptyLineCount++;
               if($concurentEmptyLineCount > 20)
               {
                  $reached_end_of_sheet = 1;
               }
               next unless $cell_3;
               #printf OUTFILE "\n";
               $csv_1->print ($fh, "\n" );
       }
   
   
   #close OUTFILE;
   
   close $fh;
   
   
   exit;


Solution

  • You can use combine() to quote the fields. For example:

    use feature qw(say);
    use strict;
    use warnings;
    use Text::CSV;
    
    my $csv = Text::CSV->new();
    my @fields = (
        q[Student GOT 8 MARKS in math, 7 in Spanish],
        q[Student GOT 8 MARKS in "math", 7 in "Spanish"],
    );
    for my $field (@fields) {
        my $success = $csv->combine($field);
        if (!$success) {
            die "Failed to quote field: " . $field;
        }
        say "Result: ", $csv->string();
    }
    

    Output:

    Result: "Student GOT 8 MARKS in math, 7 in Spanish"
    Result: "Student GOT 8 MARKS in ""math"", 7 in ""Spanish"""