Search code examples
excelvbautf-8shift-jis

Extracting data from a UTF-8 file format and importing it into a utf-8 file format in VBA


I would like to extract a Japanese letters data from a UTF-8 file format and copying it into another utf-8 file format.

Expected output: カカカカカカカカカ字字字字字字字

Actual output: ・カ・カ・カ・カ・カ・'蟄怜ュ怜ュ怜ュ怜ュ怜ュ怜 (garbage data)

Sub ReadTextFileDataInExcel()
    Dim TblNum As String
    TblNum = Worksheets("data").Range("A2").Value
    Dim RowNumber   As Long
    Dim TextFile    As String
    Dim LineData    As String
    Dim stemp() As Collection
    Dim Test As Variant
    
    TextFile = ThisWorkbook.path & "\" & TblNum & "\" & "CA003" & "\10_RunScript\20.ExpectResult.sql"
    
    MyDir = ThisWorkbook.path & "\" & TblNum & "\" & "CA003" & "\10_RunScript"
        MyFileName = MyDir & "\40.ExpectResult.sql"
      
    RowNumber = 1
     
    Dim newTxt As String
    Open TextFile For Input As #1
        Do While Not EOF(1)
            Line Input #1, LineData
            If LineData Like "insert*" Then
                Worksheets("40.ExpectResult_TEMP").Range("A" & RowNumber).Value = LineData
                LineData = Replace(LineData, "_DUMMY", "")
                newTxt = newTxt & vbCr & LineData
                RowNumber = RowNumber + 1
            End If
            WriteIfFile_utf8 MyFileName, Mid(newTxt, 2)
        Loop
    Close #1
    
            
End Sub

Function WriteIfFile_utf8(strPath As Variant, str As Variant)
    Dim objStream As Object
    Dim utfStr As Variant
    Set objStream = CreateObject("adodb.stream")

    With objStream
        .Type = 2 'adTypeText
        .Charset = "SHIFT-JIS"
        .Open
        .writetext str
        .savetofile strPath, 2
        .Close
    End With
    Set objStream = Nothing
End Function

Solution

  • This worked for me using the sample text you posted:

    Sub ReadWriteUTF8()
        
        Dim inFile As String, outFile As String
        Dim LineData As String, inStream As Object, outStream As Object
        
        inFile = "C:\Temp\inputSample.txt"
        outFile = "C:\Temp\outputSample.txt"
        
        Set inStream = GetUTF8Stream(inFile)
        Set outStream = GetUTF8Stream()
        
        Do While Not inStream.EOS
            LineData = inStream.ReadText(-2) 'adReadLine
            Debug.Print LineData Like "*" & vbCr, LineData
            If LineData Like "insert*" Then
                outStream.WriteText LineData, 1 'adWriteLine
            End If
        Loop
        inStream.Close
        outStream.SaveToFile outFile, 2 'adSaveCreateOverWrite
        outStream.Close
    End Sub
    
    'return an ADODB.Stream set up for UTF-8 text
    Function GetUTF8Stream(Optional strPath As String = "") As Object
        Set GetUTF8Stream = CreateObject("adodb.stream")
        With GetUTF8Stream
            .Type = 2           'adTypeText
            .Charset = "utf-8"
            .LineSeparator = -1 'adCRLF=-1, adCR=13, adLF=10
            .Open
            If Len(strPath) > 0 Then .LoadFromFile strPath
        End With
    End Function