Thanks in advance.
I am working on a simple file information transfer application using Python sockets. The client-side code recursively gathers file information (SHA256 hash, size, timestamps, and path) for all files within a specified directory and sends this data to the server. Each file info is sent as a JSON object followed by a newline character.
I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this.
The client first sends the total number of files prepended with @p1, then iterates through each file's info, and finally sends a special termination marker @p2 to indicate that all file info has been sent.
However, I'm encountering an issue where the server does not receive all the file info when it is started after the client begins sending data. Instead, the server only receives part of the data. Here are the simplified snippets of my client and server-side codes: Client Code Snippet:
import os
import hashlib
import json
import time
from datetime import datetime
import math
import socket
import socket as s
def format_file_size(size_in_bytes):
size_units = ['B', 'KB', 'MB', 'GB']
size_info = {}
for unit in size_units:
divisor = 1024 ** (size_units.index(unit) + 1)
value = size_in_bytes / divisor
if value >= 1 or unit == 'B':
size_info[unit] = f"{value:.2f}"
return size_info
def get_file_info(directory):
file_info_list = []
file_count = 0
for root, dirs, files in os.walk(directory):
for file in files:
filepath = os.path.join(root, file)
# Calculate SHA256
with open(filepath, 'rb') as f:
sha256 = hashlib.sha256(f.read()).hexdigest()
# Get file size
size_in_bytes = os.path.getsize(filepath)
size_info = format_file_size(size_in_bytes)
# Obtain time information
mtime = datetime.fromtimestamp(os.path.getmtime(filepath)).strftime('%Y-%m-%d %H:%M:%S')
ctime = datetime.fromtimestamp(os.path.getctime(filepath)).strftime('%Y-%m-%d %H:%M:%S')
file_info = {
"sha256": sha256,
"size": size_info,
"directory": root,
"modified_time": mtime,
"created_time": ctime,
"filename": file
}
file_info_list.append(file_info)
file_count += 1
return file_info_list, file_count
return file_info_list, file_count
def getip(domain): #Obtain the IPv6 address corresponding to the domain name
address = socket.getaddrinfo(domain, 'http')
return address[0][4][0]
def client():
directories = [r'G:/PCB'] #The list of traversed directories currently has only one path
total_file_info_list = []
total_files_count = 0
for directory in directories:
file_info_list, files_in_directory = get_file_info(directory)
total_file_info_list.extend(file_info_list)
total_files_count += files_in_directory
#print(f"Total files count: {total_files_count}")
with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
ip = getip(r"XXXX.com")
s.connect((ip, 12345))
s.sendall(str(total_files_count).encode('utf-8') + b'@p1')
print(f"Sending {total_files_count} files info...")
for file_info in total_file_info_list:
data = json.dumps(file_info, ensure_ascii=False) + '\n'
s.sendall(data.encode('utf-8'))
print(f"Sent file info: {file_info['filename']}")
#print(f"Sent file info: {file_info['filename']}")
s.sendall(b'@p2')
client()
Server Code Snippet:
import socket
import json
def server():
host = ''
port = 12345
with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
s.bind((host, port))
s.listen()
conn, addr = s.accept()
# Receive "total number of files" (using '@ p1' as delimiter)
total_files_str = b''
while not b'@p1' in total_files_str:
chunk = conn.recv(1024)
if not chunk:
raise IOError("Connection closed before file count received.")
total_files_str += chunk
total_files_end = total_files_str.find(b'@p1')
total_files = int(total_files_str[:total_files_end].decode('utf-8'))
print(f"Received total files count: {total_files}")
file_info_list = []
received_files_count = 0
end_marker_received = False
while not end_marker_received or received_files_count < total_files:
file_info_json_parts = []
while True:
chunk = conn.recv(1024)
if not chunk:
raise IOError("Connection closed before all files received.")
file_info_json_parts.append(chunk)
try:
file_info_json = b''.join(file_info_json_parts).decode('utf-8')
if file_info_json == '@p2':
end_marker_received = True
break
file_info = json.loads(file_info_json)
file_info_list.append(file_info)
received_files_count += 1
break
except ValueError:
pass
if end_marker_received and received_files_count == total_files:
print("All file info received.")
break
if received_files_count > 0:
print(f"Received file info: {file_info['filename']}")
# Save the received file information to a JSON file
with open('received_file_info.json', 'w', encoding='utf-8') as f:
for file_info in file_info_list:
f.write(json.dumps(file_info, ensure_ascii=False) + '\n')
# Ensure that all file information has been received before closing the connection
conn.shutdown(socket.SHUT_RDWR)
conn.close()
server()
At first, I thought it was because I didn't traverse subdirectories, and even after adding the function of traversing subdirectories, it still wasn't available. I tried to start the client first, and during the process of sending messages, I started the server. I found that the server could receive some information (about only one file's information), but soon the following error still occurred
Traceback (most recent call last):
File "C:\Users\Administrator\Desktop\23122.py", line 69, in <module>
server()
File "C:\Users\Administrator\Desktop\23122.py", line 34, in server
raise IOError("Connection closed before all files received.")
OSError: Connection closed before all files received.
thank you
Skip the @p1/@p2 and send the file size newline-terminated as well. Then on the server use socket.makefile
to wrap the socket in a file-like object and use .readlines()
to read the list size and exactly that many JSON lines, e.g.:
client (truncated to the sending part):
with socket.socket() as s:
s.connect(('localhost', 12345))
s.sendall(f'{total_files_count}'.encode() + b'\n')
print(f'Sending {total_files_count} files info...')
for file_info in total_file_info_list:
data = json.dumps(file_info, ensure_ascii=False) + '\n'
s.sendall(data.encode())
print(f'Sent file info: {file_info['filename']}')
Server (complete):
import socket
import json
with socket.socket() as s:
s.bind(('', 12345))
s.listen()
conn, addr = s.accept()
with conn, conn.makefile('r', encoding='utf8') as infile:
header = infile.readline()
total_files = int(header)
print(f'Received total files count: {total_files}')
file_info_list = [json.loads(infile.readline()) for _ in range(total_files)]
if len(file_info_list) == total_files:
print('All file info received.')
with open('received_file_info.json', 'w', encoding='utf-8') as f:
for file_info in file_info_list:
f.write(json.dumps(file_info, ensure_ascii=True) + '\n')