JSON Embeddings not fully uploaded to Index

AI Project Sweden 20 Reputation points
2023-08-15T23:20:00.6366667+00:00

Hello,

I am trying to upload a JSON embedding file that has content and the embedding vectors to it. I can succesfully upload it but only the first content line is being added.

This is the part of the code to generate embeddings and write it to a JSON embedding file.Skärmbild 2023-08-16 015504.pngSkärmbild 2023-08-16 015529.png

EDIT: I have come to understand and confirmed that the upload to the index is only uploading the last portion of my JSON-file, regardless of the unique ID I set for the key identifier in the index. What could be the reason for it?

def Create_Search_Index_Client():
    index_client = SearchIndexClient(
        endpoint=service_endpoint, credential=credential)
    fields = [
        SimpleField(name="document_title", type=SearchFieldDataType.String, sortable=True, filterable=True, facetable=True),
        SearchableField(name="decision_date", type=SearchFieldDataType.String),
        SearchableField(name="unique_id", type=SearchFieldDataType.String, key=True),
        SearchableField(name="chapter_title", type=SearchFieldDataType.String, filterable=True),
        SearchableField(name="subchapter_title", type=SearchFieldDataType.String, filterable=True),
        SearchableField(name="content_text", type=SearchFieldDataType.String, filterable=True),
        SearchField(name="content_vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_configuration="my-vector-config"),
        SearchField(name="subchapter_vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_configuration="my-vector-config"),
    ]

    vector_search = VectorSearch(
        algorithm_configurations=[
            HnswVectorSearchAlgorithmConfiguration(
                name="my-vector-config",
                kind="hnsw",
                parameters={
                    "m": 4,
                    "efConstruction": 400,
                    "efSearch": 500,
                    "metric": "cosine"
                }
            )
        ]
    )

    semantic_config = SemanticConfiguration(
        name="my-semantic-config",
        prioritized_fields=PrioritizedFields(
            title_field=SemanticField(field_name="subchapter_title"),
            prioritized_keywords_fields=[SemanticField(field_name="chapter_title")],
            prioritized_content_fields=[SemanticField(field_name="content_text")]
        )
    )

    # Create the semantic settings with the configuration
    semantic_settings = SemanticSettings(configurations=[semantic_config])

    # Create the search index with the semantic settings
    index = SearchIndex(name=index_name, fields=fields,
                        vector_search=vector_search, semantic_settings=semantic_settings)
    result = index_client.create_or_update_index(index)
    print(f' {result.name} created')

And this is the upload to the Index

def Upload_all_json_embeddings_files_to_index(embeddings_directory):
    print(os.getcwd())
    # Create a search client
    search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)

    # Iterate over all files in the specified folder
    total_uploaded = 0
    for filename in os.listdir(embeddings_directory):
        # Check if the file is a JSON file
        if filename.endswith('_embeddings.json'):
            json_path = os.path.join(embeddings_directory, filename)
            
            # Read the .json file
            with open(json_path, 'r', encoding='utf-8-sig') as file:
                documents = json.load(file)
                
            # Upload the documents to the Azure Search index
            result = search_client.upload_documents(documents)
            
            # Update the total number of uploaded documents
            total_uploaded += len(documents)
    
    print(f"Uploaded {total_uploaded} documents from {embeddings_directory}")
Azure AI Search
Azure AI Search
An Azure search service with built-in artificial intelligence capabilities that enrich information to help identify and explore relevant content at scale.
1,281 questions
{count} votes

Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.