Azure Machine Learning notebook using Synapse Spark Pool Failing - MLClient throwing "No value for given attribute" exceptions
In an Azure Machine Learning notebook, when using a Synapse Apache Spark compute pool as our selected compute resource, the following code throws a ValueError
(code example genericized; some values used below are not verbatim what we are using)
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes
import pandas as pd
SUBSCRIPTION_ID = '<<sub id here>>'
RESOURCE_GROUPS_NAME = '<<rg name here>>'
WORKSPACE_NAME = '<<wkspace name here>>'
DATASTORE_NAME = '<<datastore name here>>'
STRUCTURED_DATA_LAKE_URI = f"azureml://subscriptions/{SUBSCRIPTION_ID}/resourcegroups/{RESOURCE_GROUPS_NAME}/workspaces/{WORKSPACE_NAME}/datastores/{DATASTORE_NAME}/paths"
# authenticate
credential = DefaultAzureCredential()
# Get a handle to the workspace
ml_client = MLClient(
credential=credential,
subscription_id=SUBSCRIPTION_ID,
resource_groups_name=RESOURCE_GROUPS_NAME,
workspace_name=WORKSPACE_NAME,
)
ws = ml_client.workspaces.get(WORKSPACE_NAME)
Since this error emerges when simply trying to inspect our own AML workspace, it certainly shouldn't be expected to work when creating or getting data assets. We've confirmed that is in fact the case with the code below:
some_path = STRUCTURED_DATA_LAKE_URI + '/foo/bar/baz.parquet'
data = Data(
name="my-asset-name",
version='v1',
description="test desc",
path=some_path ,
type=AssetTypes.URI_FILE,
)
try:
ml_client.data.get(name='my-asset-name', version='v1')
except Exception as e:
print(e)
try:
ml_client.data.create_or_update(data)
print(f"Data asset created. Name: {data .name}, version: {data .version}")
except Exception as f:
print(f)
Result: two exceptions get printed, both being ValueError: No value for given attribute
. See stack trace below
Cell In [19], line 27
19 # Get a handle to the workspace
20 ml_client = MLClient(
21 credential=credential,
22 subscription_id=SUBSCRIPTION_ID,
23 resource_groups_name=RESOURCE_GROUPS_NAME,
24 workspace_name=WORKSPACE_NAME,
25 )
---> 27 ws = ml_client.workspaces.get(WORKSPACE_NAME)
File ~/cluster-env/env/lib/python3.10/site-packages/azure/ai/ml/_telemetry/activity.py:275, in monitor_with_activity.<locals>.monitor.<locals>.wrapper(*args, **kwargs)
272 @functools.wraps(f)
273 def wrapper(*args, **kwargs):
274 with log_activity(logger, activity_name or f.__name__, activity_type, custom_dimensions):
--> 275 return f(*args, **kwargs)
File ~/cluster-env/env/lib/python3.10/site-packages/azure/core/tracing/decorator.py:78, in distributed_trace.<locals>.decorator.<locals>.wrapper_use_tracer(*args, **kwargs)
76 span_impl_type = settings.tracing_implementation()
77 if span_impl_type is None:
---> 78 return func(*args, **kwargs)
80 # Merge span is parameter is set, but only if no explicit parent are passed
81 if merge_span and not passed_in_parent:
File ~/cluster-env/env/lib/python3.10/site-packages/azure/ai/ml/operations/_workspace_operations.py:121, in WorkspaceOperations.get(self, name, **kwargs)
100 @monitor_with_activity(logger, "Workspace.Get", ActivityType.PUBLICAPI)
101 @distributed_trace
102 # pylint: disable=arguments-renamed
103 def get(self, name: Optional[str] = None, **kwargs: Dict) -> Workspace:
104 """Get a Workspace by name.
105
106 :param name: Name of the workspace.
(...)
118 :caption: Get the workspace with the given name.
119 """
--> 121 return super().get(workspace_name=name, **kwargs)
File ~/cluster-env/env/lib/python3.10/site-packages/azure/ai/ml/operations/_workspace_operations_base.py:85, in WorkspaceOperationsBase.get(self, workspace_name, **kwargs)
83 workspace_name = self._check_workspace_name(workspace_name)
84 resource_group = kwargs.get("resource_group") or self._resource_group_name
---> 85 obj = self._operation.get(resource_group, workspace_name)
86 return Workspace._from_rest_object(obj)
File ~/cluster-env/env/lib/python3.10/site-packages/azure/core/tracing/decorator.py:78, in distributed_trace.<locals>.decorator.<locals>.wrapper_use_tracer(*args, **kwargs)
76 span_impl_type = settings.tracing_implementation()
77 if span_impl_type is None:
---> 78 return func(*args, **kwargs)
80 # Merge span is parameter is set, but only if no explicit parent are passed
81 if merge_span and not passed_in_parent:
File ~/cluster-env/env/lib/python3.10/site-packages/azure/ai/ml/_restclient/v2023_08_01_preview/operations/_workspaces_operations.py:912, in WorkspacesOperations.get(self, resource_group_name, workspace_name, **kwargs)
909 api_version: str = kwargs.pop("api_version", _params.pop("api-version", self._config.api_version))
910 cls: ClsType[_models.Workspace] = kwargs.pop("cls", None)
--> 912 request = build_get_request(
913 resource_group_name=resource_group_name,
914 workspace_name=workspace_name,
915 subscription_id=self._config.subscription_id,
916 api_version=api_version,
917 template_url=self.get.metadata["url"],
918 headers=_headers,
919 params=_params,
920 )
921 request = _convert_request(request)
922 request.url = self._client.format_url(request.url)
File ~/cluster-env/env/lib/python3.10/site-packages/azure/ai/ml/_restclient/v2023_08_01_preview/operations/_workspaces_operations.py:168, in build_get_request(resource_group_name, workspace_name, subscription_id, **kwargs)
161 # Construct URL
162 _url = kwargs.pop(
163 "template_url",
164 "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/{workspaceName}",
165 ) # pylint: disable=line-too-long
166 path_format_arguments = {
167 "subscriptionId": _SERIALIZER.url("subscription_id", subscription_id, "str", min_length=1),
--> 168 "resourceGroupName": _SERIALIZER.url(
169 "resource_group_name", resource_group_name, "str", max_length=90, min_length=1
170 ),
171 "workspaceName": _SERIALIZER.url(
172 "workspace_name", workspace_name, "str", pattern=r"^[a-zA-Z0-9][a-zA-Z0-9_-]{2,32}$"
173 ),
174 }
176 _url: str = _url.format(**path_format_arguments) # type: ignore
178 # Construct parameters
File ~/cluster-env/env/lib/python3.10/site-packages/azure/ai/ml/_restclient/v2023_08_01_preview/_serialization.py:713, in Serializer.url(self, name, data, data_type, **kwargs)
704 """Serialize data intended for a URL path.
705
706 :param data: The data to be serialized.
(...)
710 :raises: ValueError if data is None
711 """
712 try:
--> 713 output = self.serialize_data(data, data_type, **kwargs)
714 if data_type == "bool":
715 output = json.dumps(output)
File ~/cluster-env/env/lib/python3.10/site-packages/azure/ai/ml/_restclient/v2023_08_01_preview/_serialization.py:790, in Serializer.serialize_data(self, data, data_type, **kwargs)
779 """Serialize generic data according to supplied data type.
780
781 :param data: The data to be serialized.
(...)
787 :raises: SerializationError if serialization fails.
788 """
789 if data is None:
--> 790 raise ValueError("No value for given attribute")
792 try:
793 if data is AzureCoreNull:
ValueError: No value for given attribute
We've also tried the same steps in the getting-started tutorials Please advise. Thank you!