Search code examples

How to handle rendering utf-8 characters with size >= 2B properly?

I want to render characters that have utf-8 size >= 2 bytes. I have already got everything done. There is one problem, though. When a character is drawn, there is also a something following it image

To get a glyph data I use freetype. This is the very minimal implementation, the actual code contains kerning, SDF, etc.

What I think needs explantaion is a atlas. Method "TextureAtlas::PackTexture(data, w, h)" packs a texture data and returns position, origin - top left corner - within the atlas w and h range. So a first char has a origin = [0, 0] and the next char with width lets say 50 will have the origin at [50, 0]. Shortly saying.

        DPI = 72,
        HIGHRES = 64

    struct Glyph
        uint32 codepoint = -1;
        uint32 width = 0; 
        uint32 height = 0;

        Vector2<int> bearing = 0;
        Vector2<float> advance = 0.0f;
        float s0, t0, s1, t1;

    class TextureFont
        TextureFont() = default;

        bool Initialize();
        void LoadFromFile(const std::string& filePath, float fontSize);

        Glyph* getGlyph(const char8_t* codepoint);
        Glyph* FindGlyph(const char8_t* codepoint);

        uint32 LoadGlyph(const char8_t* codepoint);

        int InitFreeType(float size);

        char* filename;

        vector<Glyph> glyphs;
        TextureAtlas atlas;

        FT_Library library;
        FT_Face face;

        float fontSize = 0.0f;
        float ascender = 0.0f;
        float descender = 0.0f;
        float height = 0.0f;
int CharFromUtf8(unsigned int* out_char, const char* in_text, const char* in_text_end)
        unsigned int c = (unsigned int)-1;
        const unsigned char* str = (const unsigned char*)in_text;
        if (!(*str & 0x80)) {
            c = (unsigned int)(*str++);
            *out_char = c;
            return 1;
        if ((*str & 0xe0) == 0xc0) {
            *out_char = 0xFFFD;
            if (in_text_end && in_text_end - (const char*)str < 2) return 1;
            if (*str < 0xc2) return 2;
            c = (unsigned int)((*str++ & 0x1f) << 6);
            if ((*str & 0xc0) != 0x80) return 2;
            c += (*str++ & 0x3f);
            *out_char = c;
            return 2;
        if ((*str & 0xf0) == 0xe0) {
            *out_char = 0xFFFD;
            if (in_text_end && in_text_end - (const char*)str < 3) return 1;
            if (*str == 0xe0 && (str[1] < 0xa0 || str[1] > 0xbf)) return 3;
            if (*str == 0xed && str[1] > 0x9f) return 3;
            c = (unsigned int)((*str++ & 0x0f) << 12);
            if ((*str & 0xc0) != 0x80) return 3;
            c += (unsigned int)((*str++ & 0x3f) << 6);
            if ((*str & 0xc0) != 0x80) return 3;
            c += (*str++ & 0x3f);
            *out_char = c;
            return 3;
        if ((*str & 0xf8) == 0xf0) {
            *out_char = 0xFFFD;
            if (in_text_end && in_text_end - (const char*)str < 4) return 1;
            if (*str > 0xf4) return 4;
            if (*str == 0xf0 && (str[1] < 0x90 || str[1] > 0xbf)) return 4;
            if (*str == 0xf4 && str[1] > 0x8f) return 4; 
            c = (unsigned int)((*str++ & 0x07) << 18);
            if ((*str & 0xc0) != 0x80) return 4;
            c += (unsigned int)((*str++ & 0x3f) << 12);
            if ((*str & 0xc0) != 0x80) return 4;
            c += (unsigned int)((*str++ & 0x3f) << 6);
            if ((*str & 0xc0) != 0x80) return 4;
            c += (*str++ & 0x3f);
            if ((c & 0xFFFFF800) == 0xD800) return 4;
            *out_char = c;
            return 4;
        *out_char = 0;
        return 0;

    bool TextureFont::Initialize()
        FT_Size_Metrics metrics;

        if (!InitFreeType(fontSize * 100.0f)) {
            return false;

        metrics = face->size->metrics;
        ascender = (metrics.ascender >> 6) / 100.0f;
        descender = (metrics.descender >> 6) / 100.0f;
        height = (metrics.height >> 6) / 100.0f;


        return true;

    int TextureFont::InitFreeType(float size)
        FT_Matrix matrix = {
            static_cast<int>((1.0 / HIGHRES) * 0x10000L),
            static_cast<int>((0.0)           * 0x10000L),
            static_cast<int>((0.0)           * 0x10000L),
            static_cast<int>((1.0)           * 0x10000L)};
        FT_Error error;
        error = FT_Init_FreeType(&library);
        if (error) {
            EngineLogError("FREE_TYPE_ERROR: Could not Init FreeType!\n");
            return 0;

        error = FT_New_Face(library, filename, 0, &face);

        if (error) {
            EngineLogError("FREE_TYPE_ERROR: Could not create a new face!\n");
            return 0;

        error = FT_Select_Charmap(face, FT_ENCODING_UNICODE);
        if (error) {
            EngineLogError("FREE_TYPE_ERROR: Could not select charmap!\n");
            return 0;

        error = FT_Set_Char_Size(face, static_cast<ulong>(size * HIGHRES), 0, DPI * HIGHRES, DPI);
        if (error) {
            EngineLogError("FREE_TYPE_ERROR: Could not set char size!\n");
            return 0;

        FT_Set_Transform(face, &matrix, NULL);

        return 1;

    void TextureFont::LoadFromFile(const std::string& filePath, float fontSize)
        atlas.Create(512, 1);
        std::fill(atlas.buffer.begin(), atlas.buffer.end(), 0);
        this->fontSize = fontSize;  
        this->filename = strdup(filePath.c_str());


    Glyph* TextureFont::getGlyph(const char8_t* codepoint)
        if (Glyph* glyph = FindGlyph(codepoint)) {
            return glyph;

        if (LoadGlyph(codepoint)) {
            return FindGlyph(codepoint);

        return nullptr;

    Glyph* TextureFont::FindGlyph(const char8_t* codepoint)
        Glyph* glyph = nullptr;
        uint32 ucodepoint;
        CharFromUtf8(&ucodepoint, (char*)codepoint, NULL);
        for (uint32 i = 0; i < glyphs.size(); ++i) {
            glyph = &glyphs[i];
            if (glyph->codepoint == ucodepoint) {
                return glyph;

        return nullptr;

    uint32 TextureFont::LoadGlyph(const char8_t* codepoint)
        FT_Error error = NULL;
        FT_Glyph ftGlyph = nullptr;
        FT_GlyphSlot slot = nullptr;
        FT_Bitmap bitmap;

        if (!InitFreeType(fontSize)) {
            return 0;

        if (FindGlyph(codepoint)) {
            return 1;

        unsigned int cp;
        CharFromUtf8(&cp, (char*)codepoint, NULL);
        uint32 glyphIndex = FT_Get_Char_Index(face, cp);

        int flag = 0;
        flag |= FT_LOAD_RENDER;
        flag |= FT_LOAD_FORCE_AUTOHINT;

        error = FT_Load_Glyph(face, glyphIndex, flag);
        if (error) {
            EngineLogError("FREE_TYPE_ERROR: Could not load the glyph (line {})!\n", __LINE__);
            return 0;

        slot = face->glyph;
        bitmap = slot->bitmap;
        int glyphTop = slot->bitmap_top;
        int glyphLeft = slot->bitmap_left;

        uint32 srcWidth = bitmap.width / atlas.bytesPerPixel;
        uint32 srcHeight = bitmap.rows;

        uint32 tgtWidth = srcWidth;
        uint32 tgtHeight = srcHeight;

        auto buffer = std::make_unique<uchar[]>(tgtWidth * tgtHeight * atlas.bytesPerPixel);

        uchar* destPointer = buffer.get();
        uchar* srcPointer = bitmap.buffer;

        for (uint32 i = 0; i < srcHeight; ++i) {
            memcpy(destPointer, srcPointer, bitmap.width);
            destPointer += tgtWidth * atlas.bytesPerPixel;
            srcPointer += bitmap.pitch;

        auto origin = atlas.PackTexture(buffer.get(), { tgtWidth, tgtHeight });

        float x = origin.x;
        float y = origin.y;

        Glyph current;
        current.codepoint = cp;
        current.width = tgtWidth;
        current.height = tgtHeight;
        current.bearing.x = glyphLeft;
        current.bearing.y = glyphTop;
        current.s0 = x / (float)atlas.textureSize.w;
        current.t0 = y / (float)atlas.textureSize.h;
        current.s1 = (x + tgtWidth) / (float)atlas.textureSize.w;
        current.t1 = (y + tgtHeight) / (float)atlas.textureSize.h;

        current.advance.x = slot->advance.x / (float)HIGHRES;
        current.advance.y = slot->advance.y / (float)HIGHRES;



        return 1;

to render a string (a single char in this case) I loop through string size, get a glyph, update the atlas and setup render data.

text is a simple quad with a texture on with proper uvs. I do not think that it is necessary to explain what is inside AddVertexData, because it do not cause a problem.

void DrawString(const std::u8string& string, float x, float y)
        for (const auto& c : string) {
            auto glyph = textureFont.getGlyph(&c);

            auto& t = *(Texture2D*)texture.get();

            float x0 = x + static_cast<float>(glyph->bearing.x);
            float y0 = y + (textureFont.ascender + textureFont.descender - static_cast<float>(glyph->bearing.y));
            float x1 = x0 + static_cast<float>(glyph->width);
            float y1 = y0 + static_cast<float>(glyph->height);

            float u0 = glyph->s0;
            float v0 = glyph->t0;
            float u1 = glyph->s1;
            float v1 = glyph->t1;

            //            position                uv                      color
            AddVertexData(Vector2<float>(x0, y0), Vector2<float>(u0, v0), 0xff0000ff);
            AddVertexData(Vector2<float>(x0, y1), Vector2<float>(u0, v1), 0xff0000ff);
            AddVertexData(Vector2<float>(x1, y1), Vector2<float>(u1, v1), 0xff0000ff);
            AddVertexData(Vector2<float>(x1, y0), Vector2<float>(u1, v0), 0xff0000ff);

            // indices for DrawElements() call
            // 0, 1, 2, 2, 3, 0

            x += glyph->advance.x;

ę is utf-8 size == 2, so the loop runs twice, but render only 1 character and does not know the second character (because there is not any second character), so it renders the empty quad.

How to get rid of the quad that follows the character I want to render?


  • In your DrawString function you have the loop

    for (const auto& c : string)

    That loop will iterate byte by byte over the string. So if the string contains the two-byte "ę" character, then the first iteration will get the first byte, and the second iteration will get the second byte.

    You can't use a range-based for loop here, since you need to skip bytes in the string. Either use an iterator-bases loop or an indexed-based loop.

    For example

    for (size_t i = 0; i < string.size(); /* nothing */) {
        // Here you need to get the number of bytes for the current character
        // Then you should increment the index by that amount
        i += byte_count_for_current_character;
        // ... rest of code