python image-processing text image-size wand

How to make auto-sizing boxes in images for text to remain same font size in Python with Wand

I'm utilizing Wand in python in order to create an image that iterates over itself so that it can create black-bordered boxes that populate with text. Here is my current code:

import re
from unicodedata import normalize

from docx import Document
from wand.image import Image
from wand.drawing import Drawing
from wand.font import Font

doc = Document("P.docx")
docText = []
for para in doc.paragraphs:
    docText.append(para.text)
fullText = "\n".join(docText)

ct = 242


def get(source, begin, end):
    try:
        start = source.index(len(begin)) + len(begin)
        finish = source.index(len(end), len(start))
        return source[start:finish]
    except ValueError:
        return ""


def capitalize(string):
    cap = ("".join(j[0].upper() + j[1:]) for j in string)
    return cap


def find_matches(text):
    return capitalize(
        [
            m
            for m in re.findall(
                r"^[^0-9]\s+([^.;]+\s*)+[.;]+", normalize("NFKD", text), re.MULTILINE
            )
        ]
    )


with Image(width=400, height=1000, pseudo='xc:white') as canvas:
    left, top, width, height = 2, 2, 395, 131
    for match in find_matches(text=fullText):
        ct += 1
        with Drawing() as context:
            context.fill_color = 'black'
            context.rectangle(left=left, top=top, width=width, height=height)
            context.fill_color = 'white'
            context.rectangle(left=(left+2), top=(top+2), width=(width-4), height=(height-4))
            canvas.font = Font('/System/Library/Fonts/timesnewroman.ttf')
            context(canvas)
            canvas.caption(match + '\n' + str(ct), left=(left+5), top=top, width=(width-10), height=height,
                           gravity='center')
        top += 135
    canvas.crop(bottom=top)
    canvas.save(filename='patdrawTest.png')

With this code, I'm able to produce the following output:

The problem with this is obviously the lack of uniformity when it comes to font size. The following is what my output is supposed to look like (ignoring the text within the boxes):

The only thing that I'm concerned with is making sure that all the text is the same font size (size 12 is great), with those bordered boxes resizing to the text. Thanks for any help!

EDIT

From the provided answer, here is my current code:

import re
from unicodedata import normalize

from docx import Document
from wand.image import Image
from wand.drawing import Drawing

doc = Document("P.docx")
docText = []
for para in doc.paragraphs:
    docText.append(para.text)
fullText = "\n".join(docText)

ct = 242


def get(source, begin, end):
    try:
        start = source.index(len(begin)) + len(begin)
        finish = source.index(len(end), len(start))
        return source[start:finish]
    except ValueError:
        return ""


def capitalize(string):
    cap = ("".join(j[0].upper() + j[1:]) for j in string)
    return cap


def find_matches(text):
    return capitalize(
        [
            m
            for m in re.findall(
                r"^[^0-9]\s+([^.;]+\s*)+[.;]+", normalize("NFKD", text), re.MULTILINE
            )
        ]
    )


def to_chunks(words, size):
    for idx in range(0, len(words), size):
        yield words[idx:idx + size]


def rebuild_text(words, size):
    return "\n".join([" ".join(w) for w in to_chunks(words, size)])


target_width = 396
target_height = 0
y_offset = 0
y_padding = 10
x_padding = 5

with Image(width=1000, height=1000, pseudo='xc:white') as img:
    for match in find_matches(text=fullText):
        ct += 1
        with Drawing() as ctx:
            ctx.font_size = 16
            ctx.text_alignment = 'center'
            words = match.split(" ")
            words.append("\n" + str(ct))
            word_count = len(words)
            while True:
                temp_text = rebuild_text(words, word_count)
                metrics = ctx.get_font_metrics(img, temp_text, multiline=True)
                if metrics.text_width > target_width:
                    word_count -= 1
                else:
                    text = temp_text
                    target_height = int(metrics.text_height + 0.5)
                    break
            ctx.push()
            ctx.fill_color = 'white'
            ctx.stroke_width = 4
            ctx.stroke_color = 'black'
            ctx.rectangle(0, y_offset + y_padding, width=2*x_padding+target_width,
                          height=2*y_padding+target_height)
            ctx.pop()
            ctx.text(x_padding + (target_width // 2), 4*y_padding+y_offset, text)
            ctx(img)
            y_offset = target_height + 4*y_padding
    img.trim()
    img.save(filename='patdrawdemoTest.png')

From this code, I get the following output:

I'm not really sure how to fix the strange spacing. The first and third boxes seem to be great, however there's that weird white space between the first and second box, the borders on the sides aren't even, and there's also two more boxes that are supposed to be there, as shown in the original post. Any help would be greatly appreciated!

Solution

The only thing that I'm concerned with is making sure that all the text is the same font size (size 12 is great), with those bordered boxes resizing to the text. Thanks for any help!

Drop the Image.caption method (that does the opposite of what you want), and use Drawing.text method. The Drawing class also has a get_font_metrics method to calculate the finial rending size. This allows you to format text, check if it'll work, then draw (or re-format & repeat).

Take this rough example...

content = [
    """Donec pretium vulputate sapien nec sagittis aliquam malesuada. Neque aliquam vestibulum morbi blandit cursus risus at ultrices mi.\n111""",
    """Adipiscing elit ut aliquam purus sit amet luctus venenatis. Eget mauris pharetra et ultrices neque ornare aenean. Viverra orci sagittis eu volutpat odio facilisis mauris. Vitae proin sagittis nisl rhoncus mattis rhoncus. Sapien nec sagittis aliquam malesuada bibendum arcu vitae.\n222"""
]

def to_chunks(words, size):
    for idx in range(0, len(words), size):
        yield words[idx:idx + size]

def rebuild_text(words, size):
    return "\n".join([" ".join(w) for w in to_chunks(words, size)])
TARGET_WIDTH = 395
TARGET_HEIGHT = 0
BOX_Y_OFFSET = 0
BOX_Y_PADDING = 10
BOX_X_PADDING = 5

with Image(width=1000, height=1000, background='white') as img:
    for text in content:
        with Drawing() as ctx:
            ctx.font_size = 16
            ctx.text_alignment = 'center'
            words = text.split(" ")
            word_count = len(words)
            while True:
                tmp_text = rebuild_text(words, word_count)
                metrics = ctx.get_font_metrics(img, tmp_text, multiline=True)
                if metrics.text_width > TARGET_WIDTH:
                    word_count -= 1
                else:
                    text = tmp_text
                    TARGET_HEIGHT = int(metrics.text_height + 0.5)
                    break
            ctx.push()
            ctx.fill_color = 'white'
            ctx.stroke_width = 4
            ctx.stroke_color = 'black'
            ctx.rectangle(0, BOX_Y_OFFSET + BOX_Y_PADDING, width=2*BOX_X_PADDING+TARGET_WIDTH, height=2*BOX_Y_PADDING+TARGET_HEIGHT)
            ctx.pop()
            ctx.text(BOX_X_PADDING + (TARGET_WIDTH // 2), 4*BOX_Y_PADDING+BOX_Y_OFFSET, text)
            ctx(img)
            BOX_Y_OFFSET = TARGET_HEIGHT + 4*BOX_Y_PADDING
            
    img.trim()
    img.save(filename='output.png')

All this does is:

Define a target width.
While the to-be-rendered font is larger, decrease words per line.
Draw box & text.

Please remember.

ImageMagick is not a type-setter. You'll have to build a word-hyphenation / line-break algorithm.
Special UTF-8 characters can become a challange
The code posted above is an example, and doesn't handle errors or protect against recursive loops.