Azure AI Search
An Azure search service with built-in artificial intelligence capabilities that enrich information to help identify and explore relevant content at scale.
991 questions
This browser is no longer supported.
Upgrade to Microsoft Edge to take advantage of the latest features, security updates, and technical support.
Hi, I was creating an AI Search index to on my png files. I followed the instruction from the below sources, but still getting errors. Please help me out on where I missed in the code.
I got the following errors when running the indexer.
The data field 'myLayoutText' in the document with key 'xxxxx' has an invalid value of type 'Collection(Edm.ComplexType)' ('JSON arrays with element type 'Object' map to Collection(Edm.ComplexType)'). The expected type was 'Edm.ComplexType'.
Here is my code:
headers = {
'Content-Type':'application/json',
'api-key':search_key
}
data_source_name = 'document-bot-files-datasource'
index_name = 'ocr-index'
index_data = {
"name" : index_name,
"fields": [
{ "name": "ID", "type": "Edm.String", "key": True, "searchable": True,"filterable": True, "sortable": True,"facetable":True,"analyzer":"keyword" },
{ "name": "parent_id", "type": "Edm.String", "searchable": True, "filterable": True, "sortable": False,"facetable":False},
{ "name": "myText", "type": "Edm.String", "searchable": True, "filterable": False, "sortable": False,"facetable":False},
{ "name": "myLayoutText", "type": "Edm.ComplexType","fields":[
{ "name": "language", "type": "Edm.String", "searchable": False, "filterable": False, "sortable": False,"facetable":False},
{ "name": "text", "type": "Edm.String", "searchable": True, "filterable": False, "sortable": False,"facetable":False},
{ "name": "lines", "type": "Collection(Edm.ComplexType)","fields":[
{ "name": "boundingBox", "type": "Collection(Edm.ComplexType)", "fields":[
{ "name": "x", "type": "Edm.Int32", "searchable": False, "filterable": False, "sortable": False,"facetable":False},
{ "name": "y", "type": "Edm.Int32", "searchable": False, "filterable": False, "sortable": False,"facetable":False},
]},
{ "name": "text", "type": "Edm.String", "searchable": True, "filterable": False, "sortable": False,"facetable":False},
]},
{ "name": "words", "type": "Collection(Edm.ComplexType)","fields":[
{ "name": "boundingBox", "type": "Collection(Edm.ComplexType)", "fields":[
{ "name": "x", "type": "Edm.Int32", "searchable": False, "filterable": False, "sortable": False,"facetable":False},
{ "name": "y", "type": "Edm.Int32", "searchable": False, "filterable": False, "sortable": False,"facetable":False},
]},
{ "name": "text", "type": "Edm.String", "searchable": True, "filterable": False, "sortable": False,"facetable":False},
]}
]},
],
"vectorSearch": {
"profiles": [
{
"name": "myHnswProfile",
"algorithm": "myHnsw"
}
],
"algorithms": [
{
"name": "myHnsw",
"kind": "hnsw",
"hnswParameters": {
"m": 4,
"metric": "cosine"
}
}
]
}
}
index_delete_response = requests.delete(f"{search_endpoint}/indexes/{index_name}?api-version=2020-06-30",headers=headers)
index_response = requests.put(f"{search_endpoint}/indexes('{index_name}')?api-version=2023-11-01",json=index_data,headers=headers)
print("index creation:", index_response.text)
skill_name = 'ocr-skillset'
skill_data = {
"skills": [
{
"description": "Extracts text (plain and structured) from image.",
"@odata.type": "#Microsoft.Skills.Vision.OcrSkill",
"context": "/document/normalized_images/*",
"defaultLanguageCode": '',
"detectOrientation": True,
"inputs": [
{
"name": "image",
"source": "/document/normalized_images/*"
}
],
"outputs": [
{
"name": "text",
"targetName": "myText"
},
{
"name": "layoutText",
"targetName": "myLayoutText"
}
],
}
]
}
skill_update_response = requests.put(f"{search_endpoint}/skillsets('{skill_name}')?api-version=2023-10-01-Preview",json=skill_data,headers=headers)
print("skill creation update code:", skill_update_response.status_code)
# print("skill creation update code:", skill_update_response.text)
indexer_name = 'ocr-indexer'
indexer_data = {
"name" : indexer_name,
"dataSourceName" : data_source_name,
"targetIndexName" : index_name,
"skillsetName":skill_name,
"parameters": {
"configuration": {
"indexedFileNameExtensions" : ".pdf,.docx,.txt,.png,.jpeg",
"dataToExtract": "contentAndMetadata",
"parsingMode": "default",
"imageAction": "generateNormalizedImages"
}
},
"schedule" : { },
"fieldMappings" : [
],
"outputFieldMappings": [
{
"sourceFieldName": "/document/normalized_images/*/myText",
"targetFieldName": "myText"
},
{
"sourceFieldName": "/document/normalized_images/*/myLayoutText",
"targetFieldName": "myLayoutText"
},
]
}
indexer_update_response = requests.put(f"{search_endpoint}/indexers/{indexer_name}?api-version=2020-06-30",json=indexer_data,headers=headers)
print("indexer update code: ",indexer_update_response.status_code)
indexer_reset_response = requests.post(f"{search_endpoint}/indexers/{indexer_name}/reset?api-version=2020-06-30",headers=headers)
print("indexer reset code: ",indexer_reset_response.status_code)
# time.sleep(3)
indexer_run_response = requests.post(f"{search_endpoint}/indexers/{indexer_name}/run?api-version=2020-06-30",headers=headers)
print("indexer rerun code: ",indexer_run_response.status_code)