system error when vectorising data

John Newsome 51 Reputation points
2024-01-18T07:48:28.14+00:00

When running an indexer to add data in an index to a vector field I'm getting the following error

The data field 'contentVector/0' in the document with key 'b3NNVkFQcVdoSk1EQUFBQUFBQUFBQT090' has an invalid value of type 'Collection(Edm.Double)' ('JSON arrays with element type 'Float' map to Collection(Edm.Double)'). The expected type was 'Collection(Edm.Single)'.

I've run a debuging session and I can see everything works right up to the point where the indexer mapps teh vector data back to the index column User's image

and the columns in the index are as follows

{
  "@odata.context": "https://jwlsearchservice.search.windows.net/$metadata#indexes/$entity",
  "@odata.etag": "\"0x8DC17F662985B24\"",
  "name": "cosmosdb-index-vector",
  "defaultScoringProfile": null,
  "fields": [
    {
      "name": "id",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": false,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "path",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "content",
      "type": "Edm.String",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": "standard.lucene",
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "contentVector",
      "type": "Collection(Edm.Single)",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": 1536,
      "vectorSearchProfile": "my-vector-profile",
      "synonymMaps": []
    },
    {
      "name": "remoteUrl",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "rid",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": true,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    }
  ],
  "scoringProfiles": [],
  "corsOptions": null,
  "suggesters": [],
  "analyzers": [],
  "normalizers": [],
  "tokenizers": [],
  "tokenFilters": [],
  "charFilters": [],
  "encryptionKey": null,
  "similarity": {
    "@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
    "k1": null,
    "b": null
  },
  "semantic": null,
  "vectorSearch": {
    "algorithms": [
      {
        "name": "myHnsw",
        "kind": "hnsw",
        "hnswParameters": {
          "metric": "cosine",
          "m": 4,
          "efConstruction": 400,
          "efSearch": 500
        },
        "exhaustiveKnnParameters": null
      },
      {
        "name": "myExhaustiveKnn",
        "kind": "exhaustiveKnn",
        "hnswParameters": null,
        "exhaustiveKnnParameters": {
          "metric": "cosine"
        }
      }
    ],
    "profiles": [
      {
        "name": "my-vector-profile",
        "algorithm": "myHnsw",
        "vectorizer": "myOpenAIVectorizer"
      }
    ],
    "vectorizers": [
      {
        "name": "myOpenAIVectorizer",
        "kind": "azureOpenAI",
        "azureOpenAIParameters": {
          "resourceUri": "https://jwlausteast.openai.azure.com",
          "deploymentId": "ada-002",
          "apiKey": "#####",
          "authIdentity": null
        },
        "customWebApiParameters": null
      }
    ]
  }
}

if I try to change the type to Edm.Double it cannot be saved because it is required to be Edm.Single. My skillset is below

{
  "@odata.context": "https://jwlsearchservice.search.windows.net/$metadata#skillsets/$entity",
  "@odata.etag": "\"0x8DC171EEE534A0F\"",
  "name": "skillsetvector",
  "description": "",
  "skills": [
    {
      "@odata.type": "#Microsoft.Skills.Text.SplitSkill",
      "name": "#1",
      "description": "",
      "context": "/document",
      "defaultLanguageCode": "en",
      "textSplitMode": "sentences",
      "maximumPageLength": 500,
      "inputs": [
        {
          "name": "text",
          "source": "/document/content"
        }
      ],
      "outputs": [
        {
          "name": "textItems",
          "targetName": "textItems"
        }
      ]
    },
    {
      "@odata.type": "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill",
      "name": "#2",
      "description": null,
      "context": "/document/textItems/*",
      "resourceUri": "https://jwlausteast.openai.azure.com",
      "apiKey": "####",
      "deploymentId": "ada-002",
      "inputs": [
        {
          "name": "text",
          "source": "/document/textItems/*"
        }
      ],
      "outputs": [
        {
          "name": "embedding",
          "targetName": "contentVector"
        }
      ],
      "authIdentity": null
    }
  ],
  "cognitiveServices": {
    "@odata.type": "#Microsoft.Azure.Search.DefaultCognitiveServices",
    "description": null
  },
  "knowledgeStore": null,
  "indexProjections": null,
  "encryptionKey": null
}

Indexer as follows

{
  "@odata.context": "https://jwlsearchservice.search.windows.net/$metadata#indexers/$entity",
  "@odata.etag": "\"0x8DC17F72CFE8907\"",
  "name": "cosmosdb-indexer-vector",
  "description": "",
  "dataSourceName": "jwlcosmos",
  "skillsetName": "skillsetvector",
  "targetIndexName": "cosmosdb-index-vector",
  "disabled": null,
  "schedule": null,
  "parameters": {
    "batchSize": null,
    "maxFailedItems": 0,
    "maxFailedItemsPerBatch": 0,
    "base64EncodeKeys": false,
    "configuration": {}
  },
  "fieldMappings": [],
  "outputFieldMappings": [
    {
      "sourceFieldName": "/document/textItems/*/contentVector",
      "targetFieldName": "contentVector"
    }
  ],
  "cache": null,
  "encryptionKey": null
}

What is it that I'm doing wrong?

Azure AI Search
Azure AI Search
An Azure search service with built-in artificial intelligence capabilities that enrich information to help identify and explore relevant content at scale.
767 questions
{count} votes

Accepted answer
  1. Stefan Zauner-Rakousky 75 Reputation points
    2024-01-22T14:45:21.5933333+00:00

    Hi, I had the same issue! The reason this is not working is because you need to use projections in this "one-to-many" scenario: https://learn.microsoft.com/en-us/azure/search/index-projections-concept-intro?tabs=kstore-rest. In essence you need the following in your skillset (please customize to your needs) and a parentId-field in your index:

    ...
    "indexProjections": {
    
        "selectors": [
          {
            "targetIndexName": "<name of index>",
            "parentKeyFieldName": "<name of parent-id-field>",
            "sourceContext": "/document/text/*",
            "mappings": [
              {
                "name": "chunk",
                "source": "/document/textItems/*",
                "sourceContext": null,
                "inputs": []
              },
                "name": "vector",
                "source": "/document/textItems/*/vector",
              }
            ]
          }
        ],
        "parameters": {
          "projectionMode": "skipIndexingParentDocuments"
        }
    },
    ...
    

    Hope this helps!


1 additional answer

Sort by: Most helpful
  1. brtrach-MSFT 15,366 Reputation points Microsoft Employee
    2024-01-20T03:06:29.8666667+00:00

    @John Newsome Tt seems that the data type of the field 'contentVector' in your index is 'Collection(Edm.Single)', but the data type of the field 'contentVector/0' in the document you are trying to index is 'Collection(Edm.Double)'. This is causing the error. To resolve this issue, you need to make sure that the data type of the field 'contentVector' in your index and the data type of the field 'contentVector' in your document are the same. You can either change the data type of the field 'contentVector' in your index to 'Collection(Edm.Double)' or change the data type of the field 'contentVector/0' in your document to 'Collection(Edm.Single)'. If you change the data type of the field 'contentVector' in your index to 'Collection(Edm.Double)', you will need to update your skillset to output 'Collection(Edm.Double)' for the 'contentVector' field. You will also need to update your output field mapping in your indexer to map the 'contentVector' field to the 'contentVector' field in your index. Alternatively, if you change the data type of the field 'contentVector/0' in your document to 'Collection(Edm.Single)', you will not need to make any changes to your skillset or output field mapping in your indexer.