I have this test code which does the following:
Write a test message to a file > Barrier > Read the test message > Assert equal > Repeat.
from __future__ import print_function
import os
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
loop = True
def main():
global loop
txt_write = 'buhahaha'
with open('test', 'w') as f1:
if rank == 0:
f1.write(txt_write)
f1.flush()
os.fsync(f1.fileno())
comm.barrier()
with open('test') as f2:
txt_read = f2.read()
try:
assert txt_read == txt_write
except:
print("Assertion error", txt_read, "!=", txt_write, 'rank=', rank)
loop = False
finally:
comm.barrier()
if rank == 0:
os.remove('test')
if __name__ == '__main__':
i = 0
while loop:
main()
if i % 1000 == 0 and rank == 0:
print("Iterations:", i)
i += 1
It works for a few 100 or 1000 iterations, but then at one point it reads an empty file and the assertion fails. Other answers had recommended use of flush
and os.fsync
, but that does not seem to help - it just makes the execution slower. Any idea how to fix this?
Maybe you can try something like this, instead:
if rank == 0:
with open('test', 'w') as f1:
f1.write(txt_write)
# as @jschultz410 correctly pointed out,
# we remove f1.flush() and f1.close()
comm.barrier()
with open('test') as f2:
txt_read = f2.read()