system error when vectorising data

Question

When running an indexer to add data in an index to a vector field I'm getting the following error

The data field 'contentVector/0' in the document with key 'b3NNVkFQcVdoSk1EQUFBQUFBQUFBQT090' has an invalid value of type 'Collection(Edm.Double)' ('JSON arrays with element type 'Float' map to Collection(Edm.Double)'). The expected type was 'Collection(Edm.Single)'.

I've run a debuging session and I can see everything works right up to the point where the indexer mapps teh vector data back to the index column User's image

and the columns in the index are as follows

{
  "@odata.context": "https://jwlsearchservice.search.windows.net/$metadata#indexes/$entity",
  "@odata.etag": "\"0x8DC17F662985B24\"",
  "name": "cosmosdb-index-vector",
  "defaultScoringProfile": null,
  "fields": [
    {
      "name": "id",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": false,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "path",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "content",
      "type": "Edm.String",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": "standard.lucene",
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "contentVector",
      "type": "Collection(Edm.Single)",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": 1536,
      "vectorSearchProfile": "my-vector-profile",
      "synonymMaps": []
    },
    {
      "name": "remoteUrl",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "rid",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": true,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    }
  ],
  "scoringProfiles": [],
  "corsOptions": null,
  "suggesters": [],
  "analyzers": [],
  "normalizers": [],
  "tokenizers": [],
  "tokenFilters": [],
  "charFilters": [],
  "encryptionKey": null,
  "similarity": {
    "@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
    "k1": null,
    "b": null
  },
  "semantic": null,
  "vectorSearch": {
    "algorithms": [
      {
        "name": "myHnsw",
        "kind": "hnsw",
        "hnswParameters": {
          "metric": "cosine",
          "m": 4,
          "efConstruction": 400,
          "efSearch": 500
        },
        "exhaustiveKnnParameters": null
      },
      {
        "name": "myExhaustiveKnn",
        "kind": "exhaustiveKnn",
        "hnswParameters": null,
        "exhaustiveKnnParameters": {
          "metric": "cosine"
        }
      }
    ],
    "profiles": [
      {
        "name": "my-vector-profile",
        "algorithm": "myHnsw",
        "vectorizer": "myOpenAIVectorizer"
      }
    ],
    "vectorizers": [
      {
        "name": "myOpenAIVectorizer",
        "kind": "azureOpenAI",
        "azureOpenAIParameters": {
          "resourceUri": "https://jwlausteast.openai.azure.com",
          "deploymentId": "ada-002",
          "apiKey": "#####",
          "authIdentity": null
        },
        "customWebApiParameters": null
      }
    ]
  }
}

if I try to change the type to Edm.Double it cannot be saved because it is required to be Edm.Single. My skillset is below

{
  "@odata.context": "https://jwlsearchservice.search.windows.net/$metadata#skillsets/$entity",
  "@odata.etag": "\"0x8DC171EEE534A0F\"",
  "name": "skillsetvector",
  "description": "",
  "skills": [
    {
      "@odata.type": "#Microsoft.Skills.Text.SplitSkill",
      "name": "#1",
      "description": "",
      "context": "/document",
      "defaultLanguageCode": "en",
      "textSplitMode": "sentences",
      "maximumPageLength": 500,
      "inputs": [
        {
          "name": "text",
          "source": "/document/content"
        }
      ],
      "outputs": [
        {
          "name": "textItems",
          "targetName": "textItems"
        }
      ]
    },
    {
      "@odata.type": "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill",
      "name": "#2",
      "description": null,
      "context": "/document/textItems/*",
      "resourceUri": "https://jwlausteast.openai.azure.com",
      "apiKey": "####",
      "deploymentId": "ada-002",
      "inputs": [
        {
          "name": "text",
          "source": "/document/textItems/*"
        }
      ],
      "outputs": [
        {
          "name": "embedding",
          "targetName": "contentVector"
        }
      ],
      "authIdentity": null
    }
  ],
  "cognitiveServices": {
    "@odata.type": "#Microsoft.Azure.Search.DefaultCognitiveServices",
    "description": null
  },
  "knowledgeStore": null,
  "indexProjections": null,
  "encryptionKey": null
}

Indexer as follows

{
  "@odata.context": "https://jwlsearchservice.search.windows.net/$metadata#indexers/$entity",
  "@odata.etag": "\"0x8DC17F72CFE8907\"",
  "name": "cosmosdb-indexer-vector",
  "description": "",
  "dataSourceName": "jwlcosmos",
  "skillsetName": "skillsetvector",
  "targetIndexName": "cosmosdb-index-vector",
  "disabled": null,
  "schedule": null,
  "parameters": {
    "batchSize": null,
    "maxFailedItems": 0,
    "maxFailedItemsPerBatch": 0,
    "base64EncodeKeys": false,
    "configuration": {}
  },
  "fieldMappings": [],
  "outputFieldMappings": [
    {
      "sourceFieldName": "/document/textItems/*/contentVector",
      "targetFieldName": "contentVector"
    }
  ],
  "cache": null,
  "encryptionKey": null
}

What is it that I'm doing wrong?

Accepted Answer

Hi, I had the same issue! The reason this is not working is because you need to use projections in this "one-to-many" scenario: https://learn.microsoft.com/en-us/azure/search/index-projections-concept-intro?tabs=kstore-rest. In essence you need the following in your skillset (please customize to your needs) and a parentId-field in your index:

...
"indexProjections": {

    "selectors": [
      {
        "targetIndexName": "",
        "parentKeyFieldName": "",
        "sourceContext": "/document/text/*",
        "mappings": [
          {
            "name": "chunk",
            "source": "/document/textItems/*",
            "sourceContext": null,
            "inputs": []
          },
            "name": "vector",
            "source": "/document/textItems/*/vector",
          }
        ]
      }
    ],
    "parameters": {
      "projectionMode": "skipIndexingParentDocuments"
    }
},
...

Hope this helps!

Answer

@John Newsome Tt seems that the data type of the field 'contentVector' in your index is 'Collection(Edm.Single)', but the data type of the field 'contentVector/0' in the document you are trying to index is 'Collection(Edm.Double)'. This is causing the error. To resolve this issue, you need to make sure that the data type of the field 'contentVector' in your index and the data type of the field 'contentVector' in your document are the same. You can either change the data type of the field 'contentVector' in your index to 'Collection(Edm.Double)' or change the data type of the field 'contentVector/0' in your document to 'Collection(Edm.Single)'. If you change the data type of the field 'contentVector' in your index to 'Collection(Edm.Double)', you will need to update your skillset to output 'Collection(Edm.Double)' for the 'contentVector' field. You will also need to update your output field mapping in your indexer to map the 'contentVector' field to the 'contentVector' field in your index. Alternatively, if you change the data type of the field 'contentVector/0' in your document to 'Collection(Edm.Single)', you will not need to make any changes to your skillset or output field mapping in your indexer.

Share via

system error when vectorising data

1 additional answer