I have the ADLSGen2 registered as a DataStore and trying to access the excel data from one of its folders from the Notebook. Following is the code
from azureml.data.datapath import DataPath
from azureml.data.data_reference import DataReference
from azureml.core import Workspace, Datastore, Dataset
ws = get_ws()
dstore_name = 'ssss_aravind_store'
aravind_dstore = Datastore.get(ws, dstore_name)
raw_input_path = DataReference(
datastore=aravind_dstore,
data_reference_name='ticket_raw_data_ref',
path_on_datastore='semi-structured/ticket-incident-emails/raw_input_data_eng.xlsx')
print('Raw DataReference:', raw_input_path)
parent_keywords_path = DataReference(
datastore=aravind_dstore,
data_reference_name='parent_keywords_data_ref',
path_on_datastore='semi-structured/ticket-incident-emails/parent_keywords.xlsx')
print('Parent Keywords DataReference:', parent_keywords_path)
third_level_keywords_path = DataReference(
datastore=aravind_dstore,
data_reference_name='parent_keywords_data_ref',
path_on_datastore='semi-structured/ticket-incident-emails/third_level_keywords.xlsx')
print('3rd Level Keywords DataReference:', third_level_keywords_path)
cleaned_data_path = DataReference(
datastore=aravind_dstore,
data_reference_name='ticket_phase1_data_ref',
path_on_datastore='semi-structured/ticket-incident-emails/phase1_op_dummy_data.xlsx')
print('Cleaned DataReference:', cleaned_data_path)
raw_dset = Dataset.from_excel_files(raw_input_path, sheet_name= 'data', use_column_headers=True, infer_column_types=True)
parent_kwords_dset = Dataset.from_excel_files(parent_keywords_path, sheet_name= 'New_keywords', use_column_headers=True, infer_column_types=True)
level3_kwords_dset = Dataset.from_excel_files(third_level_keywords_path, use_column_headers=True, infer_column_types=True)
cleaned_dset = Dataset.from_excel_files(cleaned_data_path, sheet_name= 'phase1_op', use_column_headers=True, infer_column_types=True)
Error
---------------------------------------------------------------------------
ExecutionError Traceback (most recent call last)
<ipython-input-22-c4226f154137> in <module>
44 # from_excel_files(path, sheet_name=None, use_column_headers=False, skip_rows=0, include_path=False, infer_column_types=True,
45 # partition_format=None)
---> 46 raw_dset = Dataset.from_excel_files(raw_input_path, sheet_name= 'data', use_column_headers=True, infer_column_types=True)
47 parent_kwords_dset = Dataset.from_excel_files(parent_keywords_path, sheet_name= 'New_keywords', use_column_headers=True, infer_column_types=True)
48 level3_kwords_dset = Dataset.from_excel_files(third_level_keywords_path, use_column_headers=True, infer_column_types=True)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/data/_dataset_deprecation.py in wrapper(*args, **kwargs)
20 _warn_deprecation(target, replacement) # only raise warning for top-level invocation
21 _warning_silenced_for = target
---> 22 result = func(*args, **kwargs)
23 if _warning_silenced_for == target:
24 _warning_silenced_for = None
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/data/_loggerfactory.py in wrapper(*args, **kwargs)
124 with _LoggerFactory.track_activity(logger, func.__name__, activity_type, custom_dimensions) as al:
125 try:
--> 126 return func(*args, **kwargs)
127 except Exception as e:
128 if hasattr(al, 'activity_info') and hasattr(e, 'error_code'):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/core/dataset.py in from_excel_files(path, sheet_name, use_column_headers, skip_rows, include_path, infer_column_types, partition_format)
661 include_path,
662 infer_column_types,
--> 663 partition_format)
664
665 @staticmethod
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/data/_dataset_client.py in from_excel_files(path, sheet_name, use_column_headers, skip_rows, include_path, infer_column_types, partition_format)
810 inference_arguments = dprep.InferenceArguments(day_first=True)
811 dataflow = dprep.read_excel(
--> 812 path, sheet_name, use_column_headers, inference_arguments, skip_rows, include_path)
813 dataflow._name = sheet_name
814 return _DatasetClient._get_dataset_from_dataflow(
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/dataprep/api/readers.py in read_excel(path, sheet_name, use_column_headers, inference_arguments, skip_rows, include_path, infer_column_types, verify_exists)
186 df = df.read_excel(sheet_name, use_column_headers, skip_rows)
187
--> 188 df = _handle_type_inference_and_path(df, inference_arguments, infer_column_types, include_path)
189
190 if verify_exists:
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/dataprep/api/readers.py in _handle_type_inference_and_path(df, inference_arguments, infer_column_types, include_path)
32 column_types_builder = df.builders.set_column_types()
33 if use_inference_arguments:
---> 34 column_types_builder.learn(inference_arguments)
35 else:
36 column_types_builder.learn()
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/dataprep/api/builders.py in learn(self, inference_arguments)
193 if inference_arguments is not None and not isinstance(inference_arguments, InferenceArguments):
194 raise ValueError('Unexpected inference arguments. Expected instance of InferenceArguments class')
--> 195 self._conversion_candidates = self._run_type_inference(self._dataflow._get_steps())
196 if inference_arguments is not None:
197 self._resolve_date_ambiguity(inference_arguments.day_first)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/dataprep/api/builders.py in _run_type_inference(self, steps)
79 inferences = self._engine_api.infer_types_with_span_context(InferTypesWithSpanContextMessageArguments(
80 blocks=steps_to_block_datas(steps),
---> 81 span_context=to_dprep_span_context(span.get_context())
82 ))
83 return {col: _inference_info_from_result(inference) for col, inference in inferences.items()}
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/dataprep/api/_aml_helper.py in wrapper(op_code, message, cancellation_token)
36 if len(changed) > 0:
37 engine_api_func().update_environment_variable(changed)
---> 38 return send_message_func(op_code, message, cancellation_token)
39
40 return wrapper
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/dataprep/api/engineapi/api.py in infer_types_with_span_context(self, message_args, cancellation_token)
183 @update_aml_env_vars(get_engine_api)
184 def infer_types_with_span_context(self, message_args: typedefinitions.InferTypesWithSpanContextMessageArguments, cancellation_token: CancellationToken = None) -> Dict[str, typedefinitions.FieldInference]:
--> 185 response = self._message_channel.send_message('Engine.InferTypesWithSpanContextMessage', message_args, cancellation_token)
186 return {k: typedefinitions.FieldInference.from_pod(v) if v is not None else None for k, v in response.items()} if response is not None else None
187
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/dataprep/api/engineapi/engine.py in send_message(self, op_code, message, cancellation_token)
180 response = self._read_response()
181 if 'error' in response:
--> 182 raise_engine_error(response['error'])
183 elif response.get('id') == message_id:
184 return response['result']
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/dataprep/api/errorhandlers.py in raise_engine_error(error_response)
8 error_code = error_response['errorCode']
9 if 'ScriptExecution' in error_code:
---> 10 raise ExecutionError(error_response)
11 if 'Validation' in error_code:
12 raise ValidationError(error_response)
ExecutionError:
Error Code: ScriptExecution.StreamAccess.Unexpected
Failed Step: 0....
Error Message: ScriptExecutionException was caused by StreamAccessException.
StreamAccessException was caused by UnexpectedException.
Unexpected error when attempting 'GetHttpResourceStream' for 'https://stgaccount.dfs.core.windows.net/aravind/semi-structured/ticket-incident-emails/raw_input_data_eng.xlsx'.
Too many open files in system
| session_id=ff6......