I have a binary file containing image pixel data consisting only bytes equal to 0 or 1 (0x00 or 0x01). I want to create a black and white image from this data.
A reproducible example of my code so far using Pillow in Python (note I would normally be loading the data from file rather than creating it in place):
import numpy as np
from PIL import Image, ImageOps
w = 128 # image could be much bigger, keeping it small for the example
h = 128
data = np.random.randint(2, size=w*h, dtype=np.uint8).tobytes()
img = Image.frombuffer('L', (h,w), data)
img.show()
The problem is, the image is interpreting pixel data as grayscale, so a value of 1 is almost black. I want 1 to be white (i.e. 255 in grayscale) and 0 to be black.
Is there functionality within Pillow where it "just knows" that I want 1=white & 0=black (rather than 255=white & 0=black)?
With help of answers below, I have found a number of solutions where I can modify my data to replace 1's with 255's... I have posted an answer with results of these solutions. Some of them are very fast as they are, so probably not much more performance to be gained.
But IF there is a nice solution which avoids this overhead altogether, i.e.to tell Pillow directly to just treat 1's as white and 0's as black, that would be ideal.
With thanks to the comment from @MarkSetchell pointing me to https://stackoverflow.com/a/64682849/2836621, I have used Palette's to tell Pillow directly to treat 0 as black, and 1 as white.
Here is the code:
def create_images_palette():
palette = [ 0, 0, 0,
255,255,255]
palette = palette + [0]*(768-len(palette))
imgs = []
with open(filename, 'rb') as ifile:
for data in iter(partial(ifile.read, w*h), b''):
img = Image.frombuffer('L', (h,w), data)
img.putpalette(palette)
imgs.append(img)
return imgs
Results compared to the winners from tests below, but this time I used w=1024, h=1024, N=1000 (more realistic for my usage):
create_images3 0.42854620320013054
create_images6 0.32936501539988966
create_images7 0.31196588300008443
create_images_palette 0.21011565389999304
So the palette solutions wins.
With the help of the answers, I have tested a number of solutions where I can modify my data to replace 1's with 255's. Here are the results of those tests.
I will accept an answer over this one that, as per the question, tells Pillow to directly treat 1's as white and 0's as black. Failing that, some of these solutions work and perform well for my needs.
Note that in my real world application, I can have data for numerous images back-to-back in one binary file. These solutions reflect that.
import numpy as np
import os
from functools import partial
from PIL import Image, ImageOps
w = 128 # image could be much bigger, keeping it small for the example
h = 128
N = 100
filename = 'byte_imgs.dat'
data = np.random.randint(2, size=w*h*N, dtype=np.uint8).tobytes()
f = open(filename, 'wb')
f.write(data)
f.close()
print("image data written to file")
def create_images1():
imgs = []
with open(filename, 'rb') as ifile:
for data in iter(partial(ifile.read, w*h), b''):
img = Image.frombuffer('L', (h,w), data)
img = ImageOps.autocontrast(img)
imgs.append(img)
return imgs
def create_images2():
imgs = []
with open(filename, 'rb') as ifile:
for data in iter(partial(ifile.read, w*h), b''):
data = bytes([0 if b==0 else 255 for b in data])
img = Image.frombuffer('L', (h,w), data)
imgs.append(img)
return imgs
def create_images3():
imgs = []
with open(filename, 'rb') as ifile:
for data in iter(partial(ifile.read, w*h), b''):
mem = memoryview(data).cast('B', shape=[w,h])
arr = np.asarray(mem)
img = Image.fromarray(arr*255)
imgs.append(img)
return imgs
def create_images4():
data = bytearray(w*h)
imgs = []
with open(filename, "rb") as f:
byte = f.read(1)
while byte != b'':
for i in range(w*h):
data[i] = int.from_bytes(byte, "big") * 0xFF
byte = f.read(1)
img = Image.frombuffer('L', (h,w), bytes(data))
imgs.append(img)
return imgs
def create_images5():
imgs = []
with open(filename, "rb") as f:
i = 0
data = bytearray()
byte = f.read(1)
while byte != b'':
if byte != b'\x00':
data.append(0xff)
else:
data.append(0x00)
byte = f.read(1)
i+=1
if i == w*h:
img = Image.frombuffer('L', (h,w), bytes(data))
imgs.append(img)
i=0
data = bytearray()
return imgs
def create_images6():
imgs = []
with open(filename, 'rb') as ifile:
while True:
arr = np.fromfile(ifile, dtype=np.uint8, count=w*h)
if arr.size < w*h:
break
img = Image.fromarray(arr.reshape(w,h)*255)
imgs.append(img)
return imgs
def create_images7():
imgs = []
with open(filename, 'rb') as ifile:
for dat in iter(partial(ifile.read, w*h), b''):
arr = np.frombuffer(dat, dtype=np.uint8).reshape((w,h))
img = Image.fromarray(arr*255)
imgs.append(img)
return imgs
def create_images8():
imgs = []
data = np.fromfile(filename, dtype=np.int8)
n = int(data.size / (w*h))
for i in range(n):
offset = i*w*h
state = np.reshape(data[offset:offset+w*h], (w, h))
img = Image.fromarray(state*255)
imgs.append(img)
return imgs
def create_images9():
os.system(r"bbe -e 's/\x01/\xff/g' byte_imgs.dat > byte_imgs_new.dat")
imgs = []
with open('byte_imgs_new.dat', 'rb') as ifile:
for data in iter(partial(ifile.read, w*h), b''):
img = Image.frombuffer('L', (h,w), data)
imgs.append(img)
return imgs
import timeit
number = 10
print("create_images1", timeit.timeit('[func() for func in (create_images1,)]', number=number, globals=globals()) / number)
print("create_images2", timeit.timeit('[func() for func in (create_images2,)]', number=number, globals=globals()) / number)
print("create_images3", timeit.timeit('[func() for func in (create_images3,)]', number=number, globals=globals()) / number)
print("create_images4", timeit.timeit('[func() for func in (create_images4,)]', number=number, globals=globals()) / number)
print("create_images5", timeit.timeit('[func() for func in (create_images5,)]', number=number, globals=globals()) / number)
print("create_images6", timeit.timeit('[func() for func in (create_images6,)]', number=number, globals=globals()) / number)
print("create_images7", timeit.timeit('[func() for func in (create_images7,)]', number=number, globals=globals()) / number)
print("create_images8", timeit.timeit('[func() for func in (create_images8,)]', number=number, globals=globals()) / number)
print("create_images9", timeit.timeit('[func() for func in (create_images9,)]', number=number, globals=globals()) / number)
average runtime for each function reported in seconds. create_images3()
and create_images7()
are the clear winners in this test.
create_images1 0.012226119600018136
create_images2 0.09197459420001905
create_images3 0.0021811368000271615
create_images4 0.30249598119999066
create_images5 0.3393335546000344
create_images6 0.0033311289999801374
create_images7 0.0021913534999839614
create_images8 0.015457254699958867
create_images9 0.044248268000046664