Search code examples
pythondjangoamazon-web-servicesamazon-s3heroku

How to upload file to AWS S3 with Django running on Heroku?


I have an app deployed on Heroku. I followed the manual by link to set up static files uploading to S3, and it works. And now I need to upload the CSV file, that was created by the celery task and upload it to S3. The problem is that the Heroku file system is read-only and I can not save a file on it. Hence, I get an error FileNotFoundError: [Errno 2] No such file or directory: 'tmp/a30113c5-bbbc-4432-9826-3918e547d407.csv' How do I?

@app.task
def upload_file(file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket."""

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = os.path.basename(file_name)

    # Upload the file
    s3_client = boto3.client("s3")
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        logging.error(e)
        return False
    return True


@app.task
def make_csv(data: List[Any], task_id: str):
    """Produce csv file with generated fake data and name it as task id."""
    headers: List[str] = ["name", "phone", "email"]

    file_path = os.path.normpath(f"tmp/{task_id}.csv")
    with open(file=file_path, mode="w", encoding="UTF-8", newline="") as csv_file:
        writer = csv.writer(
            csv_file, delimiter=";", quotechar='"', quoting=csv.QUOTE_MINIMAL
        )
        writer.writerow(headers)
        writer.writerows(data)
    return csv_file


@app.task(bind=True)
def generate_fake_data(self, total: int):
    """Generate fake data function."""
    fake_data: List[Any] = []
    for _ in range(total):
        name = fake.name()
        phone = fake.phone_number()
        email = fake.email()
        fake_data.append([name, phone, email])

    csv_file = make_csv(data=fake_data, task_id=self.request.id)
    upload_file(
        file_name=csv_file,
        bucket=os.getenv("AWS_STORAGE_BUCKET_NAME"),
    )
    return f"{total} random data rows created."

As per Ersain answer, I made this way:

@app.task
def upload_file(file_name: str, bucket: str, data: StringIO) -> bool:
    """Upload a file to an S3 bucket."""
    s3_client = boto3.client("s3")
    try:
        response = s3_client.put_object(
            Body=data.getvalue(), Bucket=bucket, Key=f"upload/{file_name}.csv"
        )
    except ClientError as e:
        logging.error(e)
        return False
    return True


@app.task
def make_csv(data: List[Any]) -> StringIO:
    """Generate a fake data and store it in memory."""
    headers: List[str] = ["name", "phone", "email"]

    # Create a StringIO object to store the data in memory
    file_buffer = StringIO()

    writer = csv.writer(
        file_buffer, delimiter=";", quotechar='"', quoting=csv.QUOTE_MINIMAL
    )
    writer.writerow(headers)
    writer.writerows(data)
    return file_buffer

Solution

  • If the file system is read-only, then save the file in memory (using StringIO) and upload it to s3 accordingly:

    from io import StringIO
    
    
    @app.task
    def make_csv(data: List[Any], task_id: str):
        """Produce csv file with generated fake data and name it as task id."""
        file_path = os.path.normpath(f"tmp/{task_id}.csv")
        csv_file = StringIO()
        
        writer = csv.writer(
            csv_file, delimiter=";", quotechar='"', quoting=csv.QUOTE_MINIMAL
        )
        writer.writerow(headers)
        writer.writerows(data)
        return csv_file
    

    And in your upload_file task, use put_object method instead of upload_file, and send the content as bytes:

    @app.task
    def upload_file(file, bucket, object_name=None):
        ...
    
        s3_client = boto3.client("s3")
        raw_file = bytes(file.getvalue())  # convert to bytes
        try:
            response = s3_client.put_object(raw_file, bucket, object_name)
        except ClientError as e:
            logging.error(e)
            return False
        return True