Search code examples
c#csvexport-to-csv

Unwanted extra new row in CSV when reading data


I am trying to read data from an excel spreadsheet and put it in a csv file. I am using the ToCSV() extension(https://extensionmethod.net/csharp/list-string/datareader-to-csv). To remove any newline characters I added the removal of "\n" to get rid of all extra newline characters but this only fixed the issue for 90% of the data. Is there anything else I should be removing from the original strings to make sure there are no extra rows?

public static List<string> ToCSV(this IDataReader dataReader, bool includeHeaderAsFirstRow, string separator)
        {
            List<string> csvRows = new List<string>();
            StringBuilder sb = null;

            if (includeHeaderAsFirstRow)
            {
                sb = new StringBuilder();
                for (int index = 0; index < dataReader.FieldCount; index++)
                {
                    if (dataReader.GetName(index) != null)
                        sb.Append(dataReader.GetName(index));

                    if (index < dataReader.FieldCount - 1)
                        sb.Append(separator);
                }
                csvRows.Add(sb.ToString());
            }

            while (dataReader.Read())
            {
                sb = new StringBuilder();
                for (int index = 0; index < dataReader.FieldCount - 1; index++)
                {
                    if (!dataReader.IsDBNull(index))
                    {
                        string value = dataReader.GetValue(index).ToString();
                        if (dataReader.GetFieldType(index) == typeof(String))
                        {
                            //if newline character is used in value, ensure each are replaced.
                            if (value.IndexOf("\n") >= 0)
                                value = value.Replace("\n", "");
                            
                            //If double quotes are used in value, ensure each are replaced but 2.
                            if (value.IndexOf("\"") >= 0)
                                value = value.Replace("\"", "\"\"");

                            //If separtor are is in value, ensure it is put in double quotes.
                            if (value.IndexOf(separator) >= 0)
                                value = "\"" + value + "\"";

                            if (value.IndexOf("CR LF") >= 0)
                                value = value.Replace("CR LF", "");

                            if (value.IndexOf("LF") >= 0)
                                value = value.Replace("LF", "");

                        }
                        sb.Append(value);
                    }

                    if (index < dataReader.FieldCount - 1)
                        sb.Append(separator);
                }

                if (!dataReader.IsDBNull(dataReader.FieldCount - 1))
                    sb.Append(dataReader.GetValue(dataReader.FieldCount - 1).ToString().Replace(separator, " "));

                csvRows.Add(sb.ToString());
            }
            dataReader.Close();
            sb = null;
            return csvRows;
        }
    }

Solution

  • replacing \r and \n with empty string would take care of eliminating newlines. However, I see you have replaced "CR LF" and "LF" with empty strings. This would actually replace any text with "LF" or "CR LF" in it and not the special characters (Carriage Return and Line Feed).