I'm trying to read some binary files with python for my analysis generated with Zemax OpticStudio. The structure of the file is supposed to be the following:
Each chunk is made by
The structures' definition is the following:
typedef struct
{
unsigned int status;
int level;
int hit_object;
int hit_face;
int unused;
int in_object;
int parent;
int storage;
int xybin, lmbin;
double index, starting_phase;
double x, y, z;
double l, m, n;
double nx, ny, nz;
double path_to, intensity;
double phase_of, phase_at;
double exr, exi, eyr, eyi, ezr, ezi;
}
which has a size of 208 bytes, for your convenience.
Here is the code that I wrote with some research and a couple of brilliant answers from here.
from pathlib import Path
from functools import partial
from io import DEFAULT_BUFFER_SIZE
import struct
def little_endian_int(x):
return int.from_bytes(x,'little')
def file_byte_iterator(path):
"""iterator over lazily loaded file
"""
path = Path(path)
with path.open('rb') as file:
reader = partial(file.read1, DEFAULT_BUFFER_SIZE)
file_iterator = iter(reader, bytes())
for chunk in file_iterator:
yield from chunk
def ray_tell(rays_idcs:list,ray_idx:int,seg_idx:int):
idx = rays_idcs[ray_idx][0]
idx += 4 + 208*seg_idx
return idx
def read_header(bytearr:bytearray):
version = int.from_bytes(bytearr[0:4],'little')
zrd_format = version//10000
version = version%10000
num_seg_max = int.from_bytes(bytearr[4:8],'little')
return zrd_format,version,num_seg_max
def rays_indices(bytearr:bytearray):
index=8
rays=[]
while index <len(bytearr):
num_seg = int.from_bytes(bytearr[index:index+4],'little')
rays.append((index,num_seg))
index = index+4 + 208*num_seg
return rays
def read_ray(bytearr:bytearray,ray):
ray_idx,num_seg = ray
data = []
ray_idx = ray_idx + 4
seg_idx=0
for ray_idx in range(8,8+num_seg*208,208):
offsets = [0,4,8,12,16,20,24,28,32,36,40,48,56,64,72,80,88,96,104,112,120,128,136,144,152,160,168,176,184,192,200]
int_vars = offsets[0:11]
doubl_vars = offsets[11:]
data_integ = [bytearr[ray_idx+offset:ray_idx+offset+4] for offset in int_vars]
data_doubl = [bytearr[ray_idx+offset:ray_idx+offset+8] for offset in doubl_vars]
data.append([seg_idx,data_integ,data_doubl])
seg_idx += 1
return data
file="test_uncompressed.ZRD"
raypath = {}
filebin = bytearray(file_byte_iterator(file))
header = read_header(filebin)
print(header)
rays_idcs = rays_indices(filebin)
rays = []
for ray in rays_idcs:
rays.append(read_ray(filebin,ray))
ray = rays[1] #Random ray
segm = ray[2] #Random segm
ints = segm[1]
doub = segm[2]
print("integer vars:")
for x in ints:
print(x,little_endian_int(x))
print("double vars:")
for x in doub:
print(x,struct.unpack('<d',x))
I have verified that all of the structures have the right size and number of chunks and structures (my reading matches the number of segments and rays that I read with Zemax, ) , and thanks to the header, I verified the endianness of the file (little endian). My output is the following:
(0, 2002)
bytearray(b'\x1f\xd8\x9c?') 1067243551
bytearray(b'\x06\x80\x00\x00') 32774
bytearray(b'\x02\x00\x00\x00') 2
bytearray(b'\x11\x00\x00\x00') 17
bytearray(b'\x02\x00\x00\x00') 2
bytearray(b'\x00\x00\x00\x00') 0
bytearray(b'\x11\x00\x00\x00') 17
bytearray(b'\x01\x00\x00\x00') 1
bytearray(b'\x00\x00\x00\x00') 0
bytearray(b'\x00\x00\x00\x00') 0
double vars:
bytearray(b'\x00\x00\x00\x00# \xac\xe8') (-1.6425098109028998e+196,)
bytearray(b'\xe8\xe3\xf9?\x00\x00\x00\x00') (5.3030112e-315,)
bytearray(b'\x00\x00\x00\x00\x00\x00\x00\x00') (0.0,)
bytearray(b'\x00\x00\x00\x00p_\xb4\xec') (-4.389425605765071e+215,)
bytearray(b'5\xe3\x9d\xbf\xf0\xbd"\xa2') (-3.001836066957746e-144,)
bytearray(b'z"\xc0?\x00\x00\x00\x00') (5.28431047e-315,)
bytearray(b'\x00\x00\x00\x00 \xc9+\xa3') (-2.9165705864036956e-139,)
bytearray(b'g\xd4\xcd?\x9ch{ ') (3.2707669223572687e-152,)
bytearray(b'q\x1e\xef?\x00\x00\x00\x00') (5.299523535e-315,)
bytearray(b'\x00\x00\x00\x00%\x0c\xb4A') (336340224.0,)
bytearray(b'\t\xf2u\xbf\\3L\xe6') (-5.991371249309652e+184,)
bytearray(b'\xe1\xff\xef\xbf1\x8dV\x1e') (1.5664573023148095e-162,)
bytearray(b'\xa1\xe9\xe8?\x9c\x9a6\xfc') (-2.202825582975923e+290,)
bytearray(b'qV\xb9?\x00\x00\x00\x00') (5.28210966e-315,)
bytearray(b'\x00\x00\x00\x00\x00\x00\x00\x00') (0.0,)
bytearray(b'\x00\x00\x00\x00\xc6\xfd\x0c\xa1') (-1.7713316840526727e-149,)
bytearray(b'\x96\x94\x8d?\xad\xf9(\xcc') (-7.838624888507203e+58,)
bytearray(b'yN\xb2\xbff.\\\x1a') (1.0611651097687064e-181,)
bytearray(b'\xb9*\xae?\xac\xaf\xe5\xe1') (-3.90257774261585e+163,)
bytearray(b'c\xab\xd2\xbf\xccQ\x8bj') (1.7130904564012918e+205,)
bytearray(b'\xc8\xea\x8c\xbf\xdf\xdc\xe49') (8.22891935818188e-30,)
I'm reading correctly just the int values. I don't understand why I get those binaries for all the other variables
EDIT I want to highlight that bytearrays contain non-hexadecimal digits, and I'm sure that binary files are not corrupted, since I can read those in zemax
Solved. It was just an error in my pointer arithmetic in the read_ray function. Thanks to Mad Physicist for his suggestion to unpack the whole structure which put me in the right direction.
def read_ray(bytearr:bytearray,ray):
ray_idx,num_seg = ray
data = []
assert num_seg==little_endian_int(bytearr[ray_idx:ray_idx+4])
ray_idx = ray_idx + 4
for seg_ptr in range(ray_idx,ray_idx + num_seg*208,208):
...
data_integ = [bytearr[seg_ptr+offset:seg_ptr+offset+4] for offset in int_vars]
data_doubl = [bytearr[seg_ptr+offset:seg_ptr+offset+8] for offset in doubl_vars]
...
return data