Hello @Nihit Mody ,
We need to have the OpenAI resources and supported models.
Check the supported provider and models here.
Since you already having the models and embedding endpoints with in ai services you can use either sdk or rest api,
below is the sample to create indexer on json array data via python sdk
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient, SearchIndexerClient
from azure.search.documents.indexes.models import (
SearchIndexer,
IndexingParameters,
IndexingParametersConfiguration,
SearchIndexerDataSourceConnection,
SearchIndexerDataContainer,
SearchIndexerSkillset,
FieldMapping,
InputFieldMappingEntry,
OutputFieldMappingEntry,
AzureOpenAIEmbeddingSkill,
)
# Azure Search Service Configuration
service_name = "ai_search_name"
index_name = "index_name"
api_key = "your-azure-search-api-key"
endpoint = f"https://{service_name}.search.windows.net/"
# Azure Blob Storage Configuration
blob_connection_string = "your-blob-connection-string"
container_name = "aicsvdata"
# Azure OpenAI Configuration
azure_openai_service = "your-openai-service-name"
azure_openai_api_key = "your-openai-api-key"
azure_openai_embedding_deployment = "text-embedding-ada-002"
def create_indexer_with_skillset(index_client, indexer_client):
"""Creates an indexer with a skillset to vectorize the 'plot' field."""
data_source_name = "blob-datasource"
indexer_name = "blob-indexer-with-vector"
skillset_name = "plot-vectorization-skillset"
# 1. Data Source
container = SearchIndexerDataContainer(name=container_name)
data_source = SearchIndexerDataSourceConnection(
name=data_source_name,
connection_string=blob_connection_string,
container=container,
type="azureblob",
)
# 2. Skillset - Embedding Generation
embedding_skill = AzureOpenAIEmbeddingSkill(
name="plot-embedding",
description="Generates vector embeddings for plot field",
context="/document",
resource_url=f"https://{azure_openai_service}.openai.azure.com/",
api_key=azure_openai_api_key,
deployment_name=azure_openai_embedding_deployment,
model_name="text-embedding-ada-002",
inputs=[InputFieldMappingEntry(name="text", source="/document/plot")],
outputs=[OutputFieldMappingEntry(name="embedding", target_name="PlotVector")],
)
skillset = SearchIndexerSkillset(
name=skillset_name,
description="Skillset to vectorize plot",
skills=[embedding_skill],
)
# 3. Field Mappings
field_mappings = [
FieldMapping(source_field_name="/document/PlotVector", target_field_name="PlotVector"),
]
indexing_parameters = IndexingParameters(
query_timeout=None,
configuration=IndexingParametersConfiguration(
parsing_mode="jsonArray", # Options: 'default', 'delimitedText', 'json', 'jsonArray', 'jsonLines', 'text', 'markdown'
data_to_extract="contentAndMetadata" # Options: 'allMetadata', 'contentAndMetadata', 'storageMetadata'
)
)
# 4. Indexer
indexer = SearchIndexer(
name=indexer_name,
data_source_name=data_source_name,
target_index_name=index_name,
skillset_name=skillset_name,
parameters = indexing_parameters,
output_field_mappings = field_mappings
)
# Create skillset and data source
indexer_client.create_or_update_skillset(skillset)
indexer_client.create_or_update_data_source_connection(data_source)
# Create and run the indexer
result = indexer_client.create_or_update_indexer(indexer)
print(f"Indexer '{result.name}' created.")
indexer_client.run_indexer(indexer_name)
def main():
try:
credential = AzureKeyCredential(api_key)
index_client = SearchIndexClient(endpoint, credential)
indexer_client = SearchIndexerClient(endpoint, credential)
create_indexer_with_skillset(index_client, indexer_client)
except Exception as ex:
print(f"An error occurred: {ex}")
if __name__ == "__main__":
main()
Please do accept
the solution and give feedback by clicking on yes
.
Thank you