Search code examples
pythonpython-3.xsockets

Issue with Python Socket Programming - Incomplete Data Reception on the Server-side


Thanks in advance.

I am working on a simple file information transfer application using Python sockets. The client-side code recursively gathers file information (SHA256 hash, size, timestamps, and path) for all files within a specified directory and sends this data to the server. Each file info is sent as a JSON object followed by a newline character.

I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this.

The client first sends the total number of files prepended with @p1, then iterates through each file's info, and finally sends a special termination marker @p2 to indicate that all file info has been sent.

However, I'm encountering an issue where the server does not receive all the file info when it is started after the client begins sending data. Instead, the server only receives part of the data. Here are the simplified snippets of my client and server-side codes: Client Code Snippet:

import os
import hashlib
import json
import time
from datetime import datetime
import math
import socket
import socket as s
def format_file_size(size_in_bytes):

    size_units = ['B', 'KB', 'MB', 'GB']

    size_info = {}

    for unit in size_units:

        divisor = 1024 ** (size_units.index(unit) + 1)

        value = size_in_bytes / divisor

        if value >= 1 or unit == 'B':

            size_info[unit] = f"{value:.2f}"

    return size_info

def get_file_info(directory):
    file_info_list = []
    file_count = 0
    for root, dirs, files in os.walk(directory):
        for file in files:
            filepath = os.path.join(root, file)
            
            # Calculate SHA256 
            with open(filepath, 'rb') as f:
                sha256 = hashlib.sha256(f.read()).hexdigest()

            # Get file size
            size_in_bytes = os.path.getsize(filepath)
            size_info = format_file_size(size_in_bytes)
            
            # Obtain time information
            mtime = datetime.fromtimestamp(os.path.getmtime(filepath)).strftime('%Y-%m-%d %H:%M:%S')
            ctime = datetime.fromtimestamp(os.path.getctime(filepath)).strftime('%Y-%m-%d %H:%M:%S')

            file_info = {
                "sha256": sha256,
                "size": size_info,
                "directory": root,
                "modified_time": mtime,
                "created_time": ctime,
                "filename": file
            }
            file_info_list.append(file_info)
            file_count += 1
    
    return file_info_list, file_count
    
    return file_info_list, file_count
def getip(domain): #Obtain the IPv6 address corresponding to the domain name
    address = socket.getaddrinfo(domain, 'http')
    return address[0][4][0]
def client():
    directories = [r'G:/PCB']  #The list of traversed directories currently has only one path
    total_file_info_list = []
    total_files_count = 0
    
    for directory in directories:
        file_info_list, files_in_directory = get_file_info(directory)
        total_file_info_list.extend(file_info_list)
        total_files_count += files_in_directory

    #print(f"Total files count: {total_files_count}")

    with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
        ip = getip(r"XXXX.com")
        s.connect((ip, 12345))
        s.sendall(str(total_files_count).encode('utf-8') + b'@p1')
        
        print(f"Sending {total_files_count} files info...")
        for file_info in total_file_info_list:
            data = json.dumps(file_info, ensure_ascii=False) + '\n'
            s.sendall(data.encode('utf-8'))
            print(f"Sent file info: {file_info['filename']}")
            #print(f"Sent file info: {file_info['filename']}")
        s.sendall(b'@p2')

            
client()

Server Code Snippet:

import socket
import json

def server():
    host = ''  
    port = 12345

    with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
        s.bind((host, port))
        s.listen()
        conn, addr = s.accept()

        # Receive "total number of files" (using '@ p1' as delimiter)
        total_files_str = b''
        while not b'@p1' in total_files_str:
            chunk = conn.recv(1024)
            if not chunk:
                raise IOError("Connection closed before file count received.")
            total_files_str += chunk

        total_files_end = total_files_str.find(b'@p1')
        total_files = int(total_files_str[:total_files_end].decode('utf-8'))
        print(f"Received total files count: {total_files}")

        file_info_list = []
        received_files_count = 0
        end_marker_received = False
        while not end_marker_received or received_files_count < total_files:
            file_info_json_parts = []

            while True:
                chunk = conn.recv(1024)
                if not chunk:
                    raise IOError("Connection closed before all files received.")
                
                file_info_json_parts.append(chunk)
                
                try:
                    file_info_json = b''.join(file_info_json_parts).decode('utf-8')

                    if file_info_json == '@p2':
                        end_marker_received = True
                        break

                    file_info = json.loads(file_info_json)
                    file_info_list.append(file_info)
                    received_files_count += 1
                    break
                except ValueError:
                  
                    pass

            if end_marker_received and received_files_count == total_files:
                print("All file info received.")
                break

            if received_files_count > 0:
                print(f"Received file info: {file_info['filename']}")

        # Save the received file information to a JSON file
        with open('received_file_info.json', 'w', encoding='utf-8') as f:
            for file_info in file_info_list:
                f.write(json.dumps(file_info, ensure_ascii=False) + '\n')

        # Ensure that all file information has been received before closing the connection
        conn.shutdown(socket.SHUT_RDWR)
        conn.close()

server()

At first, I thought it was because I didn't traverse subdirectories, and even after adding the function of traversing subdirectories, it still wasn't available. I tried to start the client first, and during the process of sending messages, I started the server. I found that the server could receive some information (about only one file's information), but soon the following error still occurred

Traceback (most recent call last):
  File "C:\Users\Administrator\Desktop\23122.py", line 69, in <module>
    server()
  File "C:\Users\Administrator\Desktop\23122.py", line 34, in server
    raise IOError("Connection closed before all files received.")
OSError: Connection closed before all files received.

thank you


Solution

  • Skip the @p1/@p2 and send the file size newline-terminated as well. Then on the server use socket.makefile to wrap the socket in a file-like object and use .readlines() to read the list size and exactly that many JSON lines, e.g.:

    client (truncated to the sending part):

    with socket.socket() as s:
        s.connect(('localhost', 12345))
        s.sendall(f'{total_files_count}'.encode() + b'\n')
        print(f'Sending {total_files_count} files info...')
        for file_info in total_file_info_list:
            data = json.dumps(file_info, ensure_ascii=False) + '\n'
            s.sendall(data.encode())
            print(f'Sent file info: {file_info['filename']}')
    

    Server (complete):

    import socket
    import json
    
    with socket.socket() as s:
        s.bind(('', 12345))
        s.listen()
        conn, addr = s.accept()
        with conn, conn.makefile('r', encoding='utf8') as infile:
            header = infile.readline()
            total_files = int(header)
            print(f'Received total files count: {total_files}')
            file_info_list = [json.loads(infile.readline()) for _ in range(total_files)]
            if len(file_info_list) == total_files:
                print('All file info received.')
    
            with open('received_file_info.json', 'w', encoding='utf-8') as f:
                for file_info in file_info_list:
                    f.write(json.dumps(file_info, ensure_ascii=True) + '\n')