Search code examples
xmlexcelvbaencodingfrench

Excel to XML encoding problems with French words


I already found partial solution here for my problem with encoding with some French words...

However! Few characters are doing problems and I cant figure out why. I have tried to do separate VBA script for directly copying this problematic word with those characters and it was OK, which is real mystery to me!

With my complex translation code (see old post), in excel sheet I have Français and in XML then wrong representation Français

CODE which works OK

Sub EncodingRepair()

Dim strLine As String
Dim strPath As String

Dim fso As Object
Set fso = CreateObject("Scripting.FileSystemObject")

Dim strFolderPath As String

strFolderPath = "C:\Users\zema\Documents\"

Set fOutputFile = fso.CreateTextFile(strFolderPath & "EncodingRepair.xml", True)

strLine = ThisWorkbook.Worksheets("wording").Range("G16").Text

fOutputFile.WriteLine (strLine & vbCrLn)

End Sub

Only difference here is loading string... In this small code I am loading Text from direct Cell (just for try) and in my complex code, there is loading from .Range object where I put finded .Row

Complex CODE where I have problems with last few words

If intChoice <> 0 Then

strPath = Application.FileDialog( _
msoFileDialogOpen).SelectedItems(1)

Dim strFolderPath As String

strFolderPath = Left(strPath, Len(strPath) - 4)
Set fGermanOutputFile = fso.CreateTextFile((strFolderPath & "_German.xml"), True, True)
Set fItalianOutputFile = fso.CreateTextFile((strFolderPath & "_Italian.xml"), True, True)
Set fFrenchOutputFile = fso.CreateTextFile((strFolderPath & "_French.xml"), True, True)

Open strPath For Input As #1

AlarmString = "RESETNoTranslation"

Do Until EOF(1)
    Line Input #1, strLine

    AllLine = strLine

    Alarm = InStr(1, strLine, AlarmString)

    intLastFoundChar = 0

    strGermanLine = ""
    strFrenchLine = ""
    strItalianLine = ""

    For intI = 0 To (UBound(ArrStrOpeningTags, 1) - 1)

        intFoundString = InStr(strLine, ArrStrOpeningTags(intI))

        If intFoundString <> 0 Then
            intI = 4
        End If

    Next intI

    If ((intFoundString <> 0) And (Alarm = 0)) Then

        For intJ = 0 To (UBound(ArrStrParamsToReplace) - 1)


            strLine = Right(strLine, Len(strLine) - intLastFoundChar)

            strStringToLookFor = (ArrStrParamsToReplace(intJ) & "=""")

            intFoundString = InStr(1, strLine, strStringToLookFor, vbBinaryCompare)

            If intFoundString <> 0 Then
                intStringSplitIndex = (intFoundString + Len(strStringToLookFor))

                strStringToLookFor = Right(strLine, Len(strLine) - intStringSplitIndex + 1)

                strDummyString = Left(strLine, intStringSplitIndex - 1)
                strGermanLine = strGermanLine & strDummyString
                strFrenchLine = strFrenchLine & strDummyString
                strItalianLine = strItalianLine & strDummyString

                intLastFoundChar = intLastFoundChar + intStringSplitIndex

                intFoundString = InStr(strStringToLookFor, """")

                If intFoundString <> 0  strStringToLookFor = Left(strStringToLookFor, intFoundString - 1)

                    Set rngFoundString = rngEnglishDictionary.Find(strStringToLookFor)


                    If (rngFoundString Is Nothing) Then
                        Debug.Print "String " & strStringToLookFor & " not found!"

                        strGermanLine = strGermanLine & strStringToLookFor & """"
                        strFrenchLine = strFrenchLine & strStringToLookFor & """"
                        strItalianLine = strItalianLine & strStringToLookFor & """"
                    Else

                        intWordToReplaceIndex = rngEnglishDictionary.Find(strStringToLookFor).Row - rngEnglishDictionary.Row + 1


                        strGermanLine = strGermanLine & rngGermanDictionary(intWordToReplaceIndex) & """"
                        strFrenchLine = strFrenchLine & rngFrenchDictionary(intWordToReplaceIndex) & """"
                        strItalianLine = strItalianLine & rngItalianDictionary(intWordToReplaceIndex) & """"
                    End If

                    intLastFoundChar = intLastFoundChar + Len(strStringToLookFor)

                End If
            End If

        Next intJ

        If intJ = 2 Then
            strEndOfLine = Right(AllLine, Len(AllLine) - intLastFoundChar)
            strGermanLine = strGermanLine & strEndOfLine
            strFrenchLine = strFrenchLine & strEndOfLine
            strItalianLine = strItalianLine & strEndOfLine
        End If

    Else

    strGermanLine = strLine
    strFrenchLine = strLine
    strItalianLine = strLine

    End If

    fGermanOutputFile.WriteLine (strGermanLine & vbCrLn)
    fFrenchOutputFile.WriteLine (strFrenchLine & vbCrLn)
    fItalianOutputFile.WriteLine (strItalianLine & vbCrLn)

    strGermanLine = ""
    strFrenchLine = ""
    strItalianLine = ""

Loop

End If   
End Sub

Solution

  • Your input file is not Unicode but utf-8, so the fso TextStream approach will not work for reading, as the FileSystemObject knows only ASCII and Unicode, not Utf-8. For the latter you need a reference to Microsoft ActiveX Data Objects and an ADODB.Stream.

    Here an example that you can build around your code which uses UTF-8 as input encoding and writes Unicode to the "EncodingRepair.xml" file:

    Sub EncodingRepair()
    
    Dim strPath As String
    
    Dim fso As Object, inFile As Object
    Dim fOutputFile As Object, AllLine As String
    Dim LineArray As Variant
    Dim strFolderPath As String
    
    Set fso = CreateObject("Scripting.FileSystemObject")
    Set inFile = CreateObject("ADODB.Stream")
    
    strFolderPath = "C:\Users\zema\Documents\"
    strPath = "C:\00_Tools\test\test.txt"
    
    Set fOutputFile = fso.CreateTextFile("C:\00_Tools\test\EncodingRepair.xml", True, True)
    
    Set inFile = CreateObject("ADODB.Stream")
    inFile.Charset = "utf-8"
    inFile.Open
    inFile.LoadFromFile (strPath)
    
    AlarmString = "RESETNoTranslation"
    
    While Not inFile.EOS
        alltext = inFile.ReadText
        LineArray = Split(alltext, vbCrLf)
        For i = 0 To UBound(LineArray)
            AllLine = LineArray(i)
            'do your magic
            fOutputFile.WriteLine AllLine
        Next i
    Wend
    
    End Sub
    

    Make sure to always use the proper encoding both when reading and writing.