Search code examples
pythonasposeaspose.pdf

How do I fix this aspose-pdf-cloud-python script to successfully download a processed PDF file from aspose storage?


How do I fix this aspose-pdf-cloud-python script to successfully download a processed PDF file from aspose storage?
Environment:

  • Windows 11 23H2;
  • Python 3.11.4;
import os
from asposepdfcloud import PdfApi, models
from asposepdfcloud.api_client import ApiClient

# Replace with your Aspose Cloud App key and App SID
app_key = 'app_key personal'
app_sid = 'app_sid personal'

# Initialize the PdfApi client
pdf_api_client = ApiClient(app_key=app_key, app_sid=app_sid)
pdf_api = PdfApi(pdf_api_client)

def process_pdf_files_in_folder(input_folder, output_folder):
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Iterate over all PDF files in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith(".pdf"):
            input_file_path = os.path.join(input_folder, filename)
            remote_name = filename
            copied_file = f'processed_{filename}'
            
            # Upload PDF file to cloud storage
            pdf_api.upload_file(remote_name, input_file_path)

            # Copy the file
            pdf_api.copy_file(remote_name, copied_file)

            # Replace text
            text_replace = models.TextReplace(old_value='Watermark instead', new_value='', regex=True)
            text_replace_list = models.TextReplaceListRequest(text_replaces=[text_replace])
            pdf_api.post_document_text_replace(copied_file, text_replace_list)
            
            # Download the processed file to the local system
            output_file_path = os.path.join(output_folder, copied_file)
            
            # Retrieve the file content from the cloud
            response = pdf_api.download_file(copied_file)
            
            # Open a file stream to write the downloaded content
            with open(output_file_path, 'wb') as file:
                # Write the content to the file
                file.write(response)

            print(f'Processed and saved: {output_file_path}')

# Use specific folder paths
process_pdf_files_in_folder(r'D:\input', r'D:\output')

The error when running the code is as follows:

D:\>python rm.py
host: https://api.aspose.cloud/v3.0
tokenUrl: https://api.aspose.cloud/connect/token
Traceback (most recent call last):
  File "D:\rm.py", line 49, in <module>
    process_pdf_files_in_folder(r'D:\input', r'D:\output')
  File "D:\rm.py", line 44, in process_pdf_files_in_folder
    file.write(response)
TypeError: a bytes-like object is required, not 'str'

Multiple modify to the code still don't work, need a runnable code.


Solution

  • The problem has been resolved, and this is the successful and executable code I obtained after seeking help through other channels:

    # Import the operating system interface module
    import os
    # Import the module for file and directory operations
    import shutil
    # Import Aspose.PDF cloud API and related models
    from asposepdfcloud import PdfApi, models
    from asposepdfcloud.api_client import ApiClient
    
    # Replace with your Aspose Cloud App key and App SID
    app_key = 'app_key personal'
    app_sid = 'app_sid personal'
    
    # Initialize the PdfApi client
    pdf_api_client = ApiClient(app_key=app_key, app_sid=app_sid)
    pdf_api = PdfApi(pdf_api_client)
    
    def process_pdf_files_in_folder(input_folder, output_folder):
        """
        Process all PDF files in the input folder, replace text, and save the processed files to the output folder.
    
        Parameters:
        input_folder (str): The path to the input folder containing PDF files to be processed.
        output_folder (str): The path to the output folder where processed PDF files will be saved.
        """
        # Ensure the output folder exists, create if it does not
        os.makedirs(output_folder, exist_ok=True)
    
        # Iterate over all files in the input folder
        for filename in os.listdir(input_folder):
            # Only process files that end with .pdf
            if filename.endswith(".pdf"):
                # Get the full path of the input file
                input_file_path = os.path.join(input_folder, filename)
                # Set the remote file name for upload
                remote_name = filename
                # Set the name for the processed file
                copied_file = f'processed_{filename}'
                
                # Upload the PDF file to cloud storage
                pdf_api.upload_file(remote_name, input_file_path)
    
                # Copy the file in cloud storage
                pdf_api.copy_file(remote_name, copied_file)
    
                # Create a text replacement object
                text_replace = models.TextReplace(old_value='Watermark instead', new_value='', regex=True)
                # Create a text replacement list request
                text_replace_list = models.TextReplaceListRequest(text_replaces=[text_replace])
                # Perform text replacement in the copied file
                pdf_api.post_document_text_replace(copied_file, text_replace_list)
                
                # Set the full path for the output file
                output_file_path = os.path.join(output_folder, copied_file)
                
                # Retrieve the file content from the cloud
                download_filepath = pdf_api.download_file(copied_file)
                # Move the downloaded file to the output folder
                shutil.move(download_filepath, output_file_path)
    
                # Print the path of the processed and saved file
                print(f'Processed and saved: {output_file_path}')
    
    # Use specific folder paths
    process_pdf_files_in_folder(r'D:\input', r'D:\output')