Search code examples
pythonpython-3.xmultithreadingpython-multithreading

Multithreading issue : I want add an extra thread to write and save that 4 threads that already read from 4 text files in a new text file


I have 4 threads reading from 4 text files and print it on the screen. I want to make extra thread to write & save that 4 threads read in a new text file. Current Code:

import pandas as pd
import threading
from datetime import datetime 

start_time = datetime.now() 


def print_text(filename):
    text = pd.read_csv(filename, delimiter = "\t")
    print(text)

if __name__ == "__main__":
    filenames = ['file1.txt', "file2.txt", "file3.txt", "file4.txt"]
    
    
    # Create thread for each filename.
    threads = [threading.Thread(target=print_text, args=(filename,)) for filename in filenames]

    # Start execution of each thread.
    for thread in threads:
        thread.start()
    
    # Join threads when execution is complete.
    for thread in threads:
        thread.join()
        
time_elapsed = datetime.now() - start_time 

print('Time elapsed (hh:mm:ss.ms) {}'.format(time_elapsed))

Solution

  • This shows how to use a queue to route the output of your four "writer" threads to a "reader" thread. The astute reader will realize that there is no point in spawning a 5th thread for this. We could call print_result in the mainline code and get the same result with lower overhead.

    import queue
    import threading
    from datetime import datetime 
    
    start_time = datetime.now() 
    
    def print_text(q, filename):
        for line in open(filename):
            q.put(line.strip())
        q.put('--end--')
    
    def print_result(q, count=0):
        while count:
            line = q.get()
            if line == '--end--':
                count -= 1
            else:
                print(line)
    
    if __name__ == "__main__":
        filenames = ['file1.txt', "file2.txt", "file3.txt", "file4.txt"]
        q = queue.Queue()    
        threads = [threading.Thread(target=print_text, args=(q, filename)) for filename in filenames]
        threads.append( threading.Thread(target=print_result, args=(q, len(filenames))) )
    
        # Start execution of each thread.
        for thread in threads:
            thread.start()
        
        # Join threads when execution is complete.
        for thread in threads:
            thread.join()
            
    time_elapsed = datetime.now() - start_time 
    
    print('Time elapsed (hh:mm:ss.ms) {}'.format(time_elapsed))