Search code examples
pythondockergoogle-cloud-storagefastapi

Invalid bucket name: gcsfs.retry.HttpError: Invalid bucket name: 'None', 400


I have built an API using FastAPI to retrieve data from a bucket in Google Cloud Storage. I tested the API locally with swagger and Postman (to double check), and it worked fine, but when I containerize it with docker, I get an Invalid bucket name: "None", 400 error.

Here's the code...

import os
import gcsfs
from typing import Any, List

import pandas as pd
from dotenv import load_dotenv
from fastapi import APIRouter

from app import schemas 
from app.config import settings

load_dotenv()

GOOGLE_SERVICE_ACCOUNT = os.getenv("GOOGLE_SERVICE_ACCOUNT")
GCP_FILE_PATH = os.getenv("GCP_FILE_PATH")
fs = gcsfs.GCSFileSystem()

api_router = APIRouter()

@api_router.get("/health", response_model=schemas.Health, status_code=200)
def health() -> dict:
    """
    Root Get
    """
    health = schemas.Health(
        name=settings.PROJECT_NAME, api_version="1.0.0"
    )

    return health.dict()

@api_router.get("/consumer_type_values", response_model=schemas.UniqueConsumerType, status_code=200)
def consumer_type_values() -> List:
    
    X = pd.read_parquet(f"{GCP_FILE_PATH}/X.parquet", filesystem=fs)

    unique_consumer_type = list(X.index.unique(level="consumer_type"))

    results = {
        "values": unique_consumer_type
    }
    
    return results

@api_router.get("/area_values", response_model=schemas.UniqueArea, status_code=200)
def area_values() -> List:
    
    X = pd.read_parquet(f"{GCP_FILE_PATH}/X.parquet", filesystem=fs)

    unique_area = list(X.index.unique(level="area"))

    results = {
        "values": unique_area
    }
    
    return results

@api_router.get("/predictions", response_model=schemas.PredictionResults, status_code=200)
def get_predictions() -> Any:
    """
    Get predictions from GCP
    """
    y_train = pd.read_parquet(f"{GCP_FILE_PATH}/y.parquet", filesystem=fs)
    preds = pd.read_parquet(f"{GCP_FILE_PATH}/predictions.parquet", filesystem=fs)
    
    datetime_utc = y_train.index.get_level_values("datetime_utc").to_list()
    area = y_train.index.get_level_values("area").to_list()
    consumer_type = y_train.index.get_level_values("consumer_type").to_list()
    energy_consumption = y_train["energy_consumption"].to_list()

    preds_datetime_utc = preds.index.get_level_values("datetime_utc").to_list() 
    preds_area = preds.index.get_level_values("area").to_list()
    preds_consumer_type = preds.index.get_level_values("consumer_type").to_list()
    preds_energy_consumption = preds["energy_consumption"].to_list()

    results = {
        "datetime_utc": datetime_utc, 
        "area": area,
        "consumer_type": consumer_type,
        "energy_consumption": energy_consumption,
        "preds_datetime_utc": preds_datetime_utc,
        "preds_area": preds_area,
        "preds_consumer_type": preds_consumer_type,
        "preds_energy_consumption": preds_energy_consumption
        }

    return results

Here's the logs from my Docker container...

ERROR:    Exception in ASGI application
2023-04-07 16:38:03 energy-forecasting-api-1       | Traceback (most recent call last):
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/uvicorn/protocols/http/h11_impl.py", line 429, in run_asgi
2023-04-07 16:38:03 energy-forecasting-api-1       |     result = await app(  # type: ignore[func-returns-value]
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/uvicorn/middleware/proxy_headers.py", line 78, in __call__
2023-04-07 16:38:03 energy-forecasting-api-1       |     return await self.app(scope, receive, send)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/fastapi/applications.py", line 276, in __call__
2023-04-07 16:38:03 energy-forecasting-api-1       |     await super().__call__(scope, receive, send)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/starlette/applications.py", line 122, in __call__
2023-04-07 16:38:03 energy-forecasting-api-1       |     await self.middleware_stack(scope, receive, send)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/starlette/middleware/errors.py", line 184, in __call__
2023-04-07 16:38:03 energy-forecasting-api-1       |     raise exc
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/starlette/middleware/errors.py", line 162, in __call__
2023-04-07 16:38:03 energy-forecasting-api-1       |     await self.app(scope, receive, _send)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/starlette/middleware/exceptions.py", line 79, in __call__
2023-04-07 16:38:03 energy-forecasting-api-1       |     raise exc
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/starlette/middleware/exceptions.py", line 68, in __call__
2023-04-07 16:38:03 energy-forecasting-api-1       |     await self.app(scope, receive, sender)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/fastapi/middleware/asyncexitstack.py", line 21, in __call__
2023-04-07 16:38:03 energy-forecasting-api-1       |     raise e
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
2023-04-07 16:38:03 energy-forecasting-api-1       |     await self.app(scope, receive, send)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/starlette/routing.py", line 718, in __call__
2023-04-07 16:38:03 energy-forecasting-api-1       |     await route.handle(scope, receive, send)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/starlette/routing.py", line 276, in handle
2023-04-07 16:38:03 energy-forecasting-api-1       |     await self.app(scope, receive, send)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/starlette/routing.py", line 66, in app
2023-04-07 16:38:03 energy-forecasting-api-1       |     response = await func(request)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/fastapi/routing.py", line 237, in app
2023-04-07 16:38:03 energy-forecasting-api-1       |     raw_response = await run_endpoint_function(
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/fastapi/routing.py", line 165, in run_endpoint_function
2023-04-07 16:38:03 energy-forecasting-api-1       |     return await run_in_threadpool(dependant.call, **values)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/starlette/concurrency.py", line 41, in run_in_threadpool
2023-04-07 16:38:03 energy-forecasting-api-1       |     return await anyio.to_thread.run_sync(func, *args)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/anyio/to_thread.py", line 31, in run_sync
2023-04-07 16:38:03 energy-forecasting-api-1       |     return await get_asynclib().run_sync_in_worker_thread(
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
2023-04-07 16:38:03 energy-forecasting-api-1       |     return await future
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 867, in run
2023-04-07 16:38:03 energy-forecasting-api-1       |     result = context.run(func, *args)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/opt/api/app/api.py", line 47, in area_values
2023-04-07 16:38:03 energy-forecasting-api-1       |     X = pd.read_parquet(f"{GCP_FILE_PATH}/X.parquet", filesystem=fs)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/pandas/io/parquet.py", line 503, in read_parquet
2023-04-07 16:38:03 energy-forecasting-api-1       |     return impl.read(
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/pandas/io/parquet.py", line 251, in read
2023-04-07 16:38:03 energy-forecasting-api-1       |     result = self.api.parquet.read_table(
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/pyarrow/parquet/core.py", line 2926, in read_table
2023-04-07 16:38:03 energy-forecasting-api-1       |     dataset = _ParquetDatasetV2(
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/pyarrow/parquet/core.py", line 2452, in __init__
2023-04-07 16:38:03 energy-forecasting-api-1       |     finfo = filesystem.get_file_info(path_or_paths)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "pyarrow/_fs.pyx", line 571, in pyarrow._fs.FileSystem.get_file_info
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "pyarrow/error.pxi", line 144, in pyarrow.lib.pyarrow_internal_check_status
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "pyarrow/_fs.pyx", line 1490, in pyarrow._fs._cb_get_file_info
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/pyarrow/fs.py", line 330, in get_file_info
2023-04-07 16:38:03 energy-forecasting-api-1       |     info = self.fs.info(path)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/fsspec/asyn.py", line 115, in wrapper
2023-04-07 16:38:03 energy-forecasting-api-1       |     return sync(self.loop, func, *args, **kwargs)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/fsspec/asyn.py", line 100, in sync
2023-04-07 16:38:03 energy-forecasting-api-1       |     raise return_result
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/fsspec/asyn.py", line 55, in _runner
2023-04-07 16:38:03 energy-forecasting-api-1       |     result[0] = await coro
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/gcsfs/core.py", line 790, in _info
2023-04-07 16:38:03 energy-forecasting-api-1       |     exact = await self._get_object(path)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/gcsfs/core.py", line 491, in _get_object
2023-04-07 16:38:03 energy-forecasting-api-1       |     res = await self._call(
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/gcsfs/core.py", line 418, in _call
2023-04-07 16:38:03 energy-forecasting-api-1       |     status, headers, info, contents = await self._request(
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/decorator.py", line 221, in fun
2023-04-07 16:38:03 energy-forecasting-api-1       |     return await caller(func, *(extras + args), **kw)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/gcsfs/retry.py", line 149, in retry_request
2023-04-07 16:38:03 energy-forecasting-api-1       |     raise e
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/gcsfs/retry.py", line 114, in retry_request
2023-04-07 16:38:03 energy-forecasting-api-1       |     return await func(*args, **kwargs)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/gcsfs/core.py", line 411, in _request
2023-04-07 16:38:03 energy-forecasting-api-1       |     validate_response(status, contents, path, args)
2023-04-07 16:38:03 energy-forecasting-api-1       |   File "/usr/local/lib/python3.9/site-packages/gcsfs/retry.py", line 101, in validate_response
2023-04-07 16:38:03 energy-forecasting-api-1       |     raise HttpError(error)
2023-04-07 16:38:03 energy-forecasting-api-1       | gcsfs.retry.HttpError: Invalid bucket name: 'None', 400

Any idea's why this may be happening?


Solution

  • GCP_FILE_PATH = os.getenv("GCP_FILE_PATH")
    

    The variable was not found in your environment. os.getenv() returns None in that case, so GCP_FILE_PATH is None.

    Then, when this code runs:

    X = pd.read_parquet(f"{GCP_FILE_PATH}/X.parquet", filesystem=fs)
    

    The f-string "{GCP_FILE_PATH}" becomes the word "None", which causes the error because you don't have a bucket named "None".