Note
Access to this page requires authorization. You can try signing in or changing directories.
Access to this page requires authorization. You can try changing directories.
The following example shows how to log a custom Python model for Databricks to use to generate embeddings.
The input schema should be a single ColSpec string, and the output schema should be TensorSpec as your signature. The embeddings should be returned in a NumPy array.
class CustomEmbeddingModel(PythonModel):
def load_context(self, context):
import os
self.databricks_token = os.environ.get('DATABRICKS_TOKEN')
self.base_url = os.environ.get("BASE_URL")
self.model_name = "databricks-gte-large-en"
def predict(self, context, model_input, params=None):
from openai import OpenAI
import numpy as np
client = OpenAI(api_key=self.databricks_token, base_url=self.base_url)
embeddings = client.embeddings.create(
input=model_input.iloc[:, 0], model=self.model_name)
results = np.stack([e.embedding for e in embeddings.data])
return results
with mlflow.start_run() as run:
input_schema = Schema([ColSpec(DataType.string)])
output_schema = Schema([TensorSpec(np.dtype(np.float32), [-1, 1024])])
signature = ModelSignature(inputs=input_schema, outputs=output_schema)
mlflow.pyfunc.log_model(
artifact_path="model",
python_model=CustomEmbeddingModel(),
pip_requirements=["mlflow==2.21.3", "openai==1.69.0"],
signature=signature,
input_example=input_example
)