Search code examples
azure-cognitive-searchazure-cognitive-services

Translation Skill not writing fields to output


I have been unable to get my translated content fields to output in my results.

Here is a sample result from a query. I would expect my skillset to output 6 additional fields based on the two skills defined.

{
    "@odata.context": "https://text-skill-test.search.windows.net/indexes('translate-english1')/$metadata#docs(*)",
    "value": [
        {
            "@search.score": 1,
            "index_id": "1",
            "metadata_content_type": null,
            "language": null,
            "title": "3 Things to Know About Release Notes",
            "content": "\r\n<p>Hey there Office Insiders!</p>\r\n"
        }
    ]
}

My Indexer:


{
  "@odata.context": "https://text-skill-test.search.windows.net/$metadata#indexers/$entity",
  "name": "indexer1613347856608",
  "description": null,
  "dataSourceName": "englishfolder",
  "skillsetName": "azureblob-skillset",
  "targetIndexName": "translate-english1",
  "disabled": null,
  "schedule": null,
  "parameters": {
    "batchSize": null,
    "maxFailedItems": 0,
    "maxFailedItemsPerBatch": 0,
    "base64EncodeKeys": null,
    "configuration": {
      "dataToExtract": "contentAndMetadata",
      "parsingMode": "json"
    }
  },
  "fieldMappings": [
    {
      "sourceFieldName": "metadata_storage_path",
      "targetFieldName": "index_id",
      "mappingFunction": {
        "name": "base64Encode",
        "parameters": null
      }
    }
  ],
  "outputFieldMappings": [
    {
      "sourceFieldName": "/document/title",
      "targetFieldName": "title"
    },
    {
      "sourceFieldName": "/document/content",
      "targetFieldName": "content"
    }
  ]
}

My skill definition is this. I have also tried playing with the source field below and leave out /document but it didn't work.

{
  "@odata.context": "https://text-skill-test.search.windows.net/$metadata#skillsets/$entity",
  "@odata.etag": "\"0x8D8D1466CF19C8E\"",
  "name": "azureblob-skillset",
  "description": "Skillset created from the portal. skillsetName: azureblob-skillset; contentField: metadata_storage_content_type; enrichmentGranularity: document; knowledgeStoreStorageAccount: ;",
  "skills": [
    {
      "@odata.type": "#Microsoft.Skills.Text.TranslationSkill",
      "name": "#1",
      "description": null,
      "context": "/document",
      "defaultFromLanguageCode": null,
      "defaultToLanguageCode": "fr",
      "suggestedFrom": "en",
      "inputs": [
        {
          "name": "text",
          "source": "/document/content"
        }
      ],
      "outputs": [
        {
          "name": "translatedText",
          "targetName": "translatedContentText_fr"
        },
        {
          "name": "translatedFromLanguageCode",
          "targetName": "translatedContentFromLanguageCode_fr"
        },
        {
          "name": "translatedToLanguageCode",
          "targetName": "translatedContentToLanguageCode_fr"
        }
      ]
    },
    {
      "@odata.type": "#Microsoft.Skills.Text.TranslationSkill",
      "name": "#2",
      "description": null,
      "context": "/document",
      "defaultFromLanguageCode": null,
      "defaultToLanguageCode": "fr",
      "suggestedFrom": "en",
      "inputs": [
        {
          "name": "text",
          "source": "/document/title"
        }
      ],
      "outputs": [
        {
          "name": "translatedText",
          "targetName": "translatedTitle_fr"
        },
        {
          "name": "translatedFromLanguageCode",
          "targetName": "translatedTitleFromLanguageCode_fr"
        },
        {
          "name": "translatedToLanguageCode",
          "targetName": "translatedTitleToLanguageCode_fr"
        }
      ]
    }
  ],
  "cognitiveServices": null,
  "knowledgeStore": null,
  "encryptionKey": null
}

20 succeeded docs and no errors or warnings: enter image description here


Solution

  • I would need to look at your index definition as well to pinpoint the fix. You have one issue with your indexer:

    1. Incorrect indexer output field mappings . Add the following object in your output field mappings. { "sourceFieldName": "/document/translatedContentText_fr", "targetFieldName": "your index field name for French text", "mappingFunction": null }

    2. You could possibly have a second issue with the field missing in the index definition. To fix this add a field to index of type EDM.String and use that name in the output field mapping described above. Make sure the field is retrievable at the least.