Search code examples
azureazure-cognitive-searchazure-ai-search

Azure AI Search native soft delete problem?


I enabled the Native blob soft delete feature in the Azure AI Search datasource (Azure Blob Storage kind). I enabled also soft delete in the ADLS. When I try to verify it, I see the file hasn't been deleted. After some additional research I found in the documentation that "Document keys for the documents in your index must be mapped to either be a blob property or blob metadata, such as "metadata_storage_path".

Ok, but when I try to map the metadata_storage_path to the document key I get an error: "Keys can only contain letters, digits, underscore (_), dash (-), or equal sign (=). " How can I map the metadata_storage_path to the key as written in the documentation?? Path always contains special characters.

my indexer:

 {
  "@odata.context": "*******.search.windows.net/$metadata#indexers/$entity",
  "@odata.etag": "\"*****************\"",
  "name": "tracking-changes-and-deletions-indexer",
  "description": null,
  "dataSourceName": "some",
  "skillsetName": "experimental-skillset-test",
  "targetIndexName": "tracking-changes-and-deletions-index",
  "disabled": null,
  "schedule": null,
  "parameters": {
    "batchSize": null,
    "maxFailedItems": null,
    "maxFailedItemsPerBatch": null,
    "base64EncodeKeys": null,
    "configuration": {
      "dataToExtract": "contentAndMetadata",
      "parsingMode": "text",
      "imageAction": "none"
    }
  },
  "fieldMappings": [
    {
      "sourceFieldName": "metadata_storage_path",
      "targetFieldName": "title",
      "mappingFunction": null
    },
    {
      "sourceFieldName": "metadata_storage_last_modified",
      "targetFieldName": "storage_last_modified",
      "mappingFunction": null
    }
  ],
  "outputFieldMappings": [],
  "cache": null,
  "encryptionKey": null
}

and index:

{
  "name": "tracking-changes-and-deletions-index",
  "defaultScoringProfile": null,
  "fields": [
    {
      "name": "id",
      "type": "Edm.String",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": true,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": "keyword",
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "content",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "title",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "vectorEncoding": null,
      "synonymMaps": []
    },
    {
      "name": "embedding",
      "type": "Collection(Edm.Single)",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "stored": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": 1536,
      "vectorSearchProfile": "vector-profile-1717156292529",
      "vectorEncoding": null,
      "synonymMaps": []
    }
  ],
  "scoringProfiles": [],
  "corsOptions": null,
  "suggesters": [],
  "analyzers": [],
  "normalizers": [],
  "tokenizers": [],
  "tokenFilters": [],
  "charFilters": [],
  "encryptionKey": null,
  "similarity": {
    "@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
    "k1": null,
    "b": null
  },
  "semantic": {
    "defaultConfiguration": null,
    "configurations": []
  },
  "vectorSearch": {
    "algorithms": [
      {
        "name": "vector-config-1717156300344",
        "kind": "hnsw",
        "hnswParameters": {
          "metric": "cosine",
          "m": 4,
          "efConstruction": 400,
          "efSearch": 500
        },
        "exhaustiveKnnParameters": null
      },
      {
        "name": "vector-config-1718463795492",
        "kind": "exhaustiveKnn",
        "hnswParameters": null,
        "exhaustiveKnnParameters": {
          "metric": "cosine"
        }
      }
    ],
    "profiles": [
      {
        "name": "vector-profile-1717156292529",
        "algorithm": "vector-config-1717156300344",
        "vectorizer": "vectorizer-1717156312140",
        "compression": null
      }
    ],
    "vectorizers": [
      {
        "name": "vectorizer-1717156312140",
        "kind": "customWebApi",
        "azureOpenAIParameters": null,
        "customWebApiParameters": {
          "httpMethod": "POST",
          "uri": "***********************************,
          "timeout": "PT3M50S",
          "authResourceId": null,
          "httpHeaders": {},
          "authIdentity": null
        },
        "aiServicesVisionParameters": null,
        "amlParameters": null
      }
    ],
    "compressions": []
  }
}

Solution

  • I got the answer from azure support team, they say that I need explicitly map the ADLS metadata field to the key field with base64 function in the indexer, like: enter image description here