Search code examples
mysqldatabaseutf-8goiso-8859-1

GoLang - Persist using ISO-8859-1 charset


I'm developing a project where we need to persist our information in a legacy database that has ISO-8859-1 tables. So before writing something to the database I need to convert it from UTF-8 to ISO-8859-1, and every time I retrieve it from the database, I need to convert it back to UTF-8.

I was trying to use the library code.google.com/p/go-charset/ as the following for each text field that I need to persist.

import (
  "bytes"
  "code.google.com/p/go-charset/charset"
  _ "code.google.com/p/go-charset/data"
  "fmt"
  "io/ioutil"
  "strings"
)

func toISO88591(utf8 string) string {
    buf := new(bytes.Buffer)

    w, err := charset.NewWriter("latin1", buf)
    if err != nil {
        panic(err)
    }
    defer w.Close()

    fmt.Fprintf(w, utf8)
    return buf.String()
}

func fromISO88591(iso88591 string) string {
    r, err := charset.NewReader("latin1", strings.NewReader(iso88591))
    if err != nil {
        panic(err)
    }

    buf, err := ioutil.ReadAll(r)
    if err != nil {
        panic(err)
    }

    return string(buf)
}

The problem is that the data is still persisted in UTF-8 even if I use the function toISO88591. I am doing something wrong in this conversion?

My database is a MySQL, and I'm using the github.com/go-sql-driver/mysql driver with the following connection parameters:

<user>:<password>@tcp(<host>:<port>)/<database>?collation=latin1_general_ci

Best regards!


Solution

  • package charset

    import "code.google.com/p/go-charset/charset" 
    

    func NewWriter

    func NewWriter(charset string, w io.Writer) (io.WriteCloser, error)
    

    NewWriter returns a new WriteCloser writing to w. It converts writes of UTF-8 text into writes on w of text in the named character set. The Close is necessary to flush any remaining partially translated characters to the output.


    I would follow the instructions: "The Close is necessary to flush any remaining partially translated characters to the output." For example,

    package main
    
    import (
        "bytes"
        "code.google.com/p/go-charset/charset"
        _ "code.google.com/p/go-charset/data"
        "fmt"
        "io/ioutil"
        "strings"
    )
    
    func toISO88591(utf8 string) (string, error) {
        buf := new(bytes.Buffer)
        w, err := charset.NewWriter("latin1", buf)
        if err != nil {
            return "", err
        }
        fmt.Fprintf(w, utf8)
        w.Close()
        return buf.String(), nil
    }
    
    func fromISO88591(iso88591 string) (string, error) {
        r, err := charset.NewReader("latin1", strings.NewReader(iso88591))
        if err != nil {
            return "", err
        }
        buf, err := ioutil.ReadAll(r)
        if err != nil {
            return "", err
        }
        return string(buf), nil
    }
    
    func main() {
        utfi := "£5 for Peppé"
        fmt.Printf("%q\n", utfi)
        iso, err := toISO88591(utfi)
        if err != nil {
            fmt.Println(err)
        }
        fmt.Printf("%q\n", iso)
        utfo, err := fromISO88591(iso)
        if err != nil {
            fmt.Println(err)
        }
        fmt.Printf("%q\n", utfo)
        fmt.Println(utfi == utfo)
    }
    

    Output:

    "£5 for Peppé"
    "\xa35 for Pepp\xe9"
    "£5 for Peppé"
    true