I enabled the Native blob soft delete feature in the Azure AI Search datasource (Azure Blob Storage kind). I enabled also soft delete in the ADLS. When I try to verify it, I see the file hasn’t been deleted. After some additional research I found in the documentation that "Document keys for the documents in your index must be mapped to either be a blob property or blob metadata, such as "metadata_storage_path".
Ok, but when I try to map the metadata_storage_path to the document key I get an error:
"Keys can only contain letters, digits, underscore (_), dash (-), or equal sign (=). "
How can I map the metadata_storage_path to the key as written in the documentation?? Path always contains special characters.
my indexer:
{
"@odata.context": "*******.search.windows.net/$metadata#indexers/$entity",
"@odata.etag": ""*****************"",
"name": "tracking-changes-and-deletions-indexer",
"description": null,
"dataSourceName": "some",
"skillsetName": "experimental-skillset-test",
"targetIndexName": "tracking-changes-and-deletions-index",
"disabled": null,
"schedule": null,
"parameters": {
"batchSize": null,
"maxFailedItems": null,
"maxFailedItemsPerBatch": null,
"base64EncodeKeys": null,
"configuration": {
"dataToExtract": "contentAndMetadata",
"parsingMode": "text",
"imageAction": "none"
}
},
"fieldMappings": [
{
"sourceFieldName": "metadata_storage_path",
"targetFieldName": "title",
"mappingFunction": null
},
{
"sourceFieldName": "metadata_storage_last_modified",
"targetFieldName": "storage_last_modified",
"mappingFunction": null
}
],
"outputFieldMappings": [],
"cache": null,
"encryptionKey": null
}
and index:
{
"name": "tracking-changes-and-deletions-index",
"defaultScoringProfile": null,
"fields": [
{
"name": "id",
"type": "Edm.String",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": true,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": "keyword",
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "content",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "title",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "embedding",
"type": "Collection(Edm.Single)",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": 1536,
"vectorSearchProfile": "vector-profile-1717156292529",
"vectorEncoding": null,
"synonymMaps": []
}
],
"scoringProfiles": [],
"corsOptions": null,
"suggesters": [],
"analyzers": [],
"normalizers": [],
"tokenizers": [],
"tokenFilters": [],
"charFilters": [],
"encryptionKey": null,
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"k1": null,
"b": null
},
"semantic": {
"defaultConfiguration": null,
"configurations": []
},
"vectorSearch": {
"algorithms": [
{
"name": "vector-config-1717156300344",
"kind": "hnsw",
"hnswParameters": {
"metric": "cosine",
"m": 4,
"efConstruction": 400,
"efSearch": 500
},
"exhaustiveKnnParameters": null
},
{
"name": "vector-config-1718463795492",
"kind": "exhaustiveKnn",
"hnswParameters": null,
"exhaustiveKnnParameters": {
"metric": "cosine"
}
}
],
"profiles": [
{
"name": "vector-profile-1717156292529",
"algorithm": "vector-config-1717156300344",
"vectorizer": "vectorizer-1717156312140",
"compression": null
}
],
"vectorizers": [
{
"name": "vectorizer-1717156312140",
"kind": "customWebApi",
"azureOpenAIParameters": null,
"customWebApiParameters": {
"httpMethod": "POST",
"uri": "***********************************,
"timeout": "PT3M50S",
"authResourceId": null,
"httpHeaders": {},
"authIdentity": null
},
"aiServicesVisionParameters": null,
"amlParameters": null
}
],
"compressions": []
}
}
2
Answers
I got the answer from azure support team, they say that I need explicitly map the ADLS metadata field to the key field with base64 function in the indexer, like:
Since you already created the index, the existing index fields cannot be changed. Refer this what changes you can make on index schema.
So, create a new index with
metadata_storage_path
as key.Go to
Import data
in ai service, select data source you created with soft delete enabled.Then in you will get option to select the fields to make it as key.
then you can able it check the latest modified files.
Refer this for more information.