I use this approach to convert an ordinary picture into ascii art. But processing even small images on not strong processors just kills them. Is it possible to optimize this? I tried to make something with numpy but I didn't get anywhere. Any help on optimization would be appreciated, thanks. Here is my failed attempt(
Algorithm taken from here: https://github.com/Akascape/Ascify-Art
My code:
from PIL import Image, ImageDraw, ImageFont
import math
def make_magic_old(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09):
# Rounding scale
scaleFactor = round(scale, 3)
# Calculate the length of the character list
charLength = len(list(chars))
# Calculate the interval for converting a pixel value into a character
interval = charLength / 256
# Convert the image to RGB
photo = photo.convert("RGB")
# Load font
fnt = ImageFont.truetype("assets/fonts/FiraCode-Bold.ttf", char_size)
# Get size of the image
width, height = photo.size
# Scaling the image
photo = photo.resize((int(scaleFactor * width), int(scaleFactor * height * (char_width / char_height))), Image.Resampling.NEAREST)
# Getting the sizes in a new way after scaling
width, height = photo.size
# Load pixels
pix = photo.load()
# Create a new image to display the result
outputImage = Image.new("RGB", (char_width * width, char_height * height), color="black")
# Create a drawing tool
draw = ImageDraw.Draw(outputImage)
# Replace pixes to text
for i in range(height):
for j in range(width):
r, g, b = pix[j, i]
# Calculate the average color value
h = int(r / 3 + g / 3 + b / 3)
# Convert pixel colors
pix[j, i] = (h, h, h)
# Display a symbol instead of a pixel
draw.text((j * char_width, i * char_height), chars[math.floor(h * interval)], font=fnt, fill=(r, g, b))
return outputImage
def main():
photo = Image.open("test.png")
result_photo = make_magic(photo)
result_photo.save("result.jpg")
print("Done!")
if __name__ == "__main__":
main()
Attempt to optimize with numpy:
import numpy as np
def make_magic(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09):
# Rounding scale
scaleFactor = round(scale, 3)
# Calculate the length of the character list
charLength = len(chars)
# Convert the image to RGB and then to numpy array
photo = np.array(photo.convert("RGB"))
# Load font
fnt = ImageFont.truetype("assets/fonts/FiraCode-Bold.ttf", char_size)
# Get size of the image
height, width, _ = photo.shape
# Scaling the image
photo = np.array(Image.fromarray(photo).resize((int(scaleFactor * width), int(scaleFactor * height * (char_width / char_height))), Image.NEAREST))
# Getting the sizes in a new way after scaling
height, width, _ = photo.shape
# Convert the image to grayscale
grayscale_photo = np.mean(photo, axis=2).astype(np.uint8)
# Calculate indices for character selection
indices = (grayscale_photo * (charLength / 256)).astype(int)
# Create a new image to display the result
outputImage = Image.new("RGB", (char_width * width, char_height * height), color="black")
# Create a drawing tool
draw = ImageDraw.Draw(outputImage)
# Create character array
char_array = np.array(list(chars))
# Replace pixels with text
for i in range(height):
for j in range(width):
draw.text((j * char_width, i * char_height), char_array[indices[i, j]], font=fnt, fill=tuple(photo[i, j]))
return outputImage
The original algorithm really isn't that bad, so alot of speedup is going to be pretty challenging. For very small image outputs, the solution presented here is about 2x slower, and for very large outputs, this solution is about 30x faster.
Testing on my machine even with an SSD, PIL.Image.open
and PIL.Image.save
were very impactful on the runtime, especially on the smaller files. That is unavoidable, so I focused on the image creation component.
The basic concept of this solution is to pre-generate all of the letters, then tile them into overall image taking advantage of numpy
and numba
's amazing synergy for basic matrix operations like that.
The font step is not particularly amenable to being numba
compiled, so that is left in python-land. If you needed to process many files, you could call the sub function in a loop, and spare the repeated font step.
Here is the solution I came up with, with a little extra testing boilerplate code:
import time
from PIL import Image, ImageDraw, ImageFont
import math
import numba
import numpy as np
def make_magic_old(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09):
# Rounding scale
scaleFactor = round(scale, 3)
# Calculate the length of the character list
charLength = len(list(chars))
# Calculate the interval for converting a pixel value into a character
interval = charLength / 256
# Convert the image to RGB
photo = photo.convert("RGB")
# Load font
fnt = ImageFont.truetype("font.ttf", char_size)
# Get size of the image
width, height = photo.size
# Scaling the image
photo = photo.resize((int(scaleFactor * width), int(scaleFactor * height * (char_width / char_height))),
Image.Resampling.NEAREST)
# Getting the sizes in a new way after scaling
width, height = photo.size
# Load pixels
pix = photo.load()
# Create a new image to display the result
outputImage = Image.new("RGB", (char_width * width, char_height * height), color="black")
# Create a drawing tool
draw = ImageDraw.Draw(outputImage)
# Replace pixes to text
for i in range(height):
for j in range(width):
r, g, b = pix[j, i]
# Calculate the average color value
h = int(r / 3 + g / 3 + b / 3)
# Convert pixel colors
pix[j, i] = (h, h, h)
# Display a symbol instead of a pixel
draw.text((j * char_width, i * char_height), chars[math.floor(h * interval)], font=fnt, fill=(r, g, b))
return outputImage
def make_magic(photo, chars="01", char_size=15, char_width=10, char_height=18, scale=0.09):
# Convert the image to RGB
photo = photo.convert("RGB")
# Load font
fnt = ImageFont.truetype("font.ttf", char_size)
# Make character masks to tile into output image
char_masks = np.empty((len(chars), char_height, char_width, 3), np.ubyte)
for i, char in enumerate(chars):
tim = Image.new('RGB', (char_width, char_height), color='black')
draw = ImageDraw.Draw(tim)
draw.text((0, 0), char, font=fnt, fill=(255, 255, 255))
char_masks[i, :] = np.array(tim)
# Call the numpy + numba optimized function
new_img_array = _make_magic_sub(np.array(photo), char_masks, char_width, char_height, scale)
return Image.fromarray(new_img_array, 'RGB')
@numba.njit(cache=True, parallel=True)
def _make_magic_sub(photo, char_masks, char_width, char_height, scale):
interval = 1 / char_masks.shape[0]
new_size = (int(photo.shape[0] * scale * char_width / char_height), int(photo.shape[1] * scale), 3)
outimage = np.empty((new_size[0] * char_height, new_size[1] * char_width, 3), np.ubyte)
for i in numba.prange(new_size[0]):
for j in range(new_size[1]):
rgb = photo[int(i / new_size[0] * photo.shape[0]), int(j / new_size[1] * photo.shape[1])] / 255
char_num = int(np.floor(np.sum(rgb) / 3 / interval))
outimage[i * char_height: (i + 1) * char_height, j * char_width: (j + 1) * char_width, :] = char_masks[char_num] * rgb
return outimage
def _gt(s=0.0):
return time.perf_counter() - s
def main():
photo = Image.open("test.png")
N = 10
for scale in [0.01, 0.05, 0.1, 0.2, 0.5, 1.0]:
for fun in [make_magic_old, make_magic]:
fun(photo) # To skip any caching / compilation times
s = _gt()
for i in range(N):
result_photo = fun(photo, scale=scale)
e = _gt(s)
print(f'{fun.__name__:16}{scale:4.2f} : {e / N * 1000:7.1f} ms')
print()
res_old = make_magic_old(photo, scale=0.2)
res_new = make_magic(photo, scale=0.2)
res_old.save('result_old.png')
res_new.save('result_new.png')
if __name__ == "__main__":
main()
Which gives the following results:
make_magic_old 0.01 : 2.2 ms
make_magic 0.01 : 4.7 ms
make_magic_old 0.05 : 18.6 ms
make_magic 0.05 : 5.1 ms
make_magic_old 0.10 : 65.9 ms
make_magic 0.10 : 6.9 ms
make_magic_old 0.20 : 256.1 ms
make_magic 0.20 : 13.1 ms
make_magic_old 0.50 : 1601.3 ms
make_magic 0.50 : 58.7 ms
make_magic_old 1.00 : 6379.3 ms
make_magic 1.00 : 194.2 ms
Tested on Windows 10, i9-10900K, Python 3.11.4
Your results may vary significantly, I'm running on a far from "not strong processor", but I think this will help you on most multithreaded processors, and you can see that we got about a 32x speed up on the largest output image.
And output from the old code for comparison:
Let me know if you have any questions.