Search code examples
azureazure-functionsazure-ai-translator

Azure AI Translation - No document found in source with the given path and filters


I am trying to use Azure AI Translation for my Azure web application. I am facing the issue that for some reason I can't access my documents in my blob storage. I want to take one blob/document from the blob storage and translate it and put it into a target container. But I get an error saying:

'innerError': {'code': 'NoDocumentsFound', 'message': 'No document found in source with the given path and filters.'}}

The thing is that if I copy the url and put it into my web-browser it finds the source document and can download the document without a problem.

def translate_single_blob(client, blob_ID):

    subscription_key = settings.AZURE_TRANSLATOR['SUBSCRIPTION_KEY']
    endpoint_document = settings.AZURE_TRANSLATOR['DOCUMENT_TRANSLATION_ENDPOINT']
    region = settings.AZURE_TRANSLATOR['REGION']
    target_language = "fr"  # You can change this to the desired target language (e.g., 'fr' for French)
    version = '2024-05-01'

    blob_service_client = BlobServiceClient.from_connection_string(os.getenv("STORAGE_CON_STRING"))
    
    target_container_name = "translated-blobs"
    create_storage_container(os.getenv("STORAGE_CON_STRING"), target_container_name)
    #target_container = blob_service_client.get_container_client(target_container_name)

    try:
        blobs = client.list_blobs()

        for blob in blobs:
            blob_client = client.get_blob_client(blob)
            blob_props = blob_client.get_blob_properties()
            metadata = blob_props.metadata
            if metadata:
                metadata_id = metadata.get("id", "").lower()  # Access "id" key inside metadata
                if metadata_id == blob_ID.lower():  # Convert both IDs to lowercase for case-insensitive comparison
                    
                    sas_token = generate_blob_sas(
                         account_name=blob_service_client.account_name,
                         account_key=os.getenv("AZURE_ACCOUNT_KEY"),
                         container_name=blob_props.container,
                         blob_name=blob_props.name,
                         permission=BlobSasPermissions(read=True, list=True),
                         expiry=datetime.utcnow() + timedelta(minutes=15)
                     )
                    

                    blob_url = f"{blob_client.url}?{sas_token}"


                    sas_token_target = generate_container_sas(
                         account_name=blob_service_client.account_name,
                         account_key=os.getenv("AZURE_ACCOUNT_KEY"),
                         container_name=target_container_name,
                         permission=ContainerSasPermissions(write=True, list=True),
                         expiry=datetime.utcnow() + timedelta(minutes=15)
                     )
                    
                    container_target_sas_url = f"https://{blob_service_client.account_name}.blob.core.windows.net/{target_container_name}?{sas_token_target}"


                    # Document translation request body
                    payload= {
                            "inputs": [
                                {
                                    "storageType": "File",
                                    "source": {
                                        "sourceUrl": f"{blob_url}",
                                        "storageSource": "AzureBlob",
                                        "language": "en"
                                    },
                                    "targets": [
                                        {
                                            "targetUrl": f"{container_target_sas_url}",
                                            "storageSource": "AzureBlob",
                                            "category": "general",
                                            "language": f"{target_language}",
                                        }
                                    ]
                                }
                            ]
                        }
                    headers = {
                    'Ocp-Apim-Subscription-Key': subscription_key,
                    'Content-Type': 'application/json'
                    }

                    base_path = f'{endpoint_document}/translator/document/batches'
                    route = f'?api-version={version}'
                    constructed_url = base_path + route

                    response = requests.post(constructed_url, headers=headers, json=payload)
                    print(f'response status code: {response.status_code}\nresponse status: {response.reason}\nresponse headers: {response.headers}')

                    time.sleep(5)
                    if response.status_code == 202:
                        job_id = response.json().get('id')
                        i = 0

                        while i < 5:

                            # Azure Translator API configuration

                            # Construct the URL
                            url = f"{endpoint_document}/translator/document/batches/{job_id}?api-version={version}"

                            # Set headers
                            headers = {
                                "Ocp-Apim-Subscription-Key": subscription_key,
                                "Ocp-Apim-Subscription-Region": region,
                            }

                            # Make the GET request
                            response = requests.get(url, headers=headers)
                            

                            # Check the response status
                            if response.status_code == 200:
                                # Parse the JSON response
                                result = response.json()
                                print("Translation Status:")
                                print(result)
                                i = 5
                            else:
                                i +=1
                                time.sleep(5)
                                print(f"Failed to get translation status. HTTP Status Code: {response.status_code}")
                                print(f"Response: {response.text}")
                        return True

                    else:
                        print(f"Translation initiation failed. Status Code: {response.status_code}, Response: {response.text}")
                        return False

        print(f"No blob found with ID '{blob_ID}'.")
        return False
        
    except Exception as e:
        print(f"An error occurred while translating blob with ID '{blob_ID}': {str(e)}")
        return False

As the code illustrates I include the SAS token with read access for the source url and the SAS with write access for the target url. I have also managed the identity for the translator on the Azure website such that the translator has the "Contributor" role on the blob storage. Nothing works. Have anyone faced similar issues and resolved them?

EDIT:

The modules used for the whole app looks like this:

azure-appconfiguration==1.1.1
azure-cli
azure-batch==14.2.0
azure-identity==1.12.0
azure-keyvault-administration==4.4.0b2
azure-keyvault-certificates==4.7.0
azure-keyvault-keys==4.9.0b3
azure-keyvault-secrets==4.7.0
azure-mgmt-cognitiveservices==13.5.0
azure-mgmt-compute==30.6.0
azure-mgmt-containerinstance==10.1.0
azure-mgmt-containerregistry==10.3.0
azure-mgmt-containerservice==30.0.0
azure-mgmt-core==1.4.0
azure-mgmt-sql==4.0.0b16
azure-mgmt-sqlvirtualmachine==1.0.0b5
azure-mgmt-storage==21.1.0
azure-search-documents==11.6.0b3
azure-storage-blob==12.19.0
azure-storage-common==1.4.2
langchain_core
Django
nltk==3.8.1
oauthlib==3.2.2
openai==1.58.1
python-dotenv==1.0.0
whitenoise
psycopg2-binary
django-extensions
pyotp
msal
pytest
pytest-django
azure-ai-translation-text
azure-ai-translation-document

I get the following error/log:

Response status code: 202
Response status: Accepted
Response headers: {'Transfer-Encoding': 'chunked', 'Content-Type': 'application/json; charset=utf-8', 'x-requestid': '2cff681b-719d-428e-8686-7b17115fd5ce', 'operation-location': 'https://<TRANSLATOR_NAME>.cognitiveservices.azure.com/translator/document/batches/8a7ebc85-5d21-4c77-90a3-ca******ea?api-version=2024-05-01', 'x-envoy-upstream-service-time': '29', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains; preload', 'apim-request-id': '2cff681b-719d-428e-8686-7b*******5ce', 'x-content-type-options': 'nosniff', 'x-ms-region': 'North Europe', 'Date': 'Mon, 27 Jan 2025 10:53:56 GMT'}
Translation Status:
{'id': '8a7ebc85-****-4c77-90a3-**********', 'createdDateTimeUtc': '2025-01-27T10:53:57.4631194Z', 'lastActionDateTimeUtc': '2025-01-27T10:53:57.8320937Z', 'status': 'ValidationFailed', 'error': {'code': 'InvalidRequest', 'message': 'No document found in source with the given path and filters.', 'target': 'Operation', 'innerError': {'code': 'NoDocumentsFound', 'message': 'No document found in source with the given path and filters.'}}, 'summary': {'total': 0, 'failed': 0, 'success': 0, 'inProgress': 0, 'notYetStarted': 0, 'cancelled': 0, 'totalCharacterCharged': 0}}

Solution

  • I resolved the issue by renaming the blobs in my blobstorage. Before the blob names did not include the extension of the files, so something like:

    10201_02
    2021_321
    file
    

    But renaming the blobs to include the extension like this:

    10201_02.pdf
    2021_321.docx
    file.pdf
    

    fixed the issue.