Found the issue. Metadata field mapping was not done in Skillset (indexProjections)
Azure AI Search - Loss of Sharepoint Metadata in Index after adding Skillset for Chunk and Embedding to Indexer
009fe3
6
Reputation points
We are utilizing Azure AI Search Sharepoint Indexer to chat with data from our Sharepoint Document Librarys. After successful deployment, I noticed that all metadata is correct in the index. After that we want to try, vectorizing the content to improve results with Vector Search in our OpenAI Chatbot. The Problem is, that we lost all metadata upon adding a Skillset to the indexer to chunk the content and vectorize it. I can see the vectors in the index, but metadata is null now.
Does anyone have any ideas why this is happening?
INDEX
SKILLSET
{
"@odata.context": "https://MYSEARCHNAME.search.windows.net/$metadata#skillsets/$entity",
"@odata.etag": "\"0x8DC6AC98D85E1D5\"",
"name": "skillset-chunkdocs-genembeddings",
"description": "Skillset to chunk documents and generate embeddings",
"skills": [
{
"@odata.type": "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill",
"name": "#1",
"description": null,
"context": "/document/pages/*",
"resourceUri": "https://MYRESOURCEURI.openai.azure.com",
"apiKey": "<redacted>",
"deploymentId": "MYDEPLOYMENTID-text-embedding-ada-002",
"inputs": [
{
"name": "text",
"source": "/document/pages/*"
}
],
"outputs": [
{
"name": "embedding",
"targetName": "vector"
}
],
"authIdentity": null
},
{
"@odata.type": "#Microsoft.Skills.Text.SplitSkill",
"name": "#2",
"description": "Split skill to chunk documents",
"context": "/document",
"defaultLanguageCode": "en",
"textSplitMode": "pages",
"maximumPageLength": 2000,
"pageOverlapLength": 500,
"maximumPagesToTake": 0,
"inputs": [
{
"name": "text",
"source": "/document/content"
}
],
"outputs": [
{
"name": "textItems",
"targetName": "pages"
}
]
}
],
"cognitiveServices": null,
"knowledgeStore": null,
"indexProjections": {
"selectors": [
{
"targetIndexName": "index-spo2",
"parentKeyFieldName": "parent_id",
"sourceContext": "/document/pages/*",
"mappings": [
{
"name": "chunk",
"source": "/document/pages/*",
"sourceContext": null,
"inputs": []
},
{
"name": "vector",
"source": "/document/pages/*/vector",
"sourceContext": null,
"inputs": []
},
{
"name": "title",
"source": "/document/metadata_spo_item_name",
"sourceContext": null,
"inputs": []
}
]
}
],
"parameters": {
"projectionMode": "skipIndexingParentDocuments"
}
},
"encryptionKey": null
}