Same error and I can confirm --data_path", dataset.as_download() worked.
Too difficult to get solution for pipeline debugging, any troubleshooting guidance here?
Should I just post error I got to debug? No efficiency to do so.
Thank you anyway.
This browser is no longer supported.
Upgrade to Microsoft Edge to take advantage of the latest features, security updates, and technical support.
data_prep_step = PythonScriptStep(
script_name='data_prep.py',
source_directory='./src',
arguments=["--data_path", dataset.as_mount(), "--out_folder", output_data],
compute_target='cpu-cluster',
runconfig=aml_run_config,
allow_reuse=True
)
train_step = PythonScriptStep(
script_name='train.py',
source_directory='./src',
arguments=["--output_folder", output_data.as_input()],
compute_target='cpu-cluster',
runconfig=aml_run_config,
allow_reuse=True
)
train_pipeline = Pipeline(workspace = ws, steps = [data_prep_step, train_step])
experiment = Experiment(workspace = ws, name = 'training-pipeline' )
pipeline_run = experiment.submit(train_pipeline)
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
files = []
for filename in os.listdir(SOURCE):
file = os.path.join(SOURCE, filename)
if os.path.getsize(file) > 0:
files.append(filename)
else:
print(filename + " is zero length, so ignoring.")
training_length = int(len(files) * SPLIT_SIZE)
testing_length = int(len(files) - training_length)
shuffled_set = random.sample(files, len(files))
training_set = shuffled_set[0:training_length]
testing_set = shuffled_set[training_length:]
for filename in training_set:
this_file = os.path.join(SOURCE, filename)
destination = os.path.join(TRAINING, filename)
copy(this_file, TRAINING)
for filename in testing_set:
this_file = os.path.join(SOURCE, filename)
destination = os.path.join(TESTING, filename)
copy(this_file, TESTING)
run = Run.get_context()
if name == "main":
parser = argparse.ArgumentParser()
parser.add_argument('--data_path',
type=str,
help='Path to uploaded data')
parser.add_argument('--out_folder',
type=str
)
#parser.add_argument('--data_path_test',
# type=str,
# help='Path to test dataflow')
#args = parser.parse_args()
args = parser.parse_args()
output_folder = args.out_folder
inputs = args.data_path
try:
os.makedirs(os.path.join(output_folder, '/train/defect'), exist_ok=True) #'args.data_path_folder/train/defect/')
os.makedirs(os.path.join(output_folder, '/train/no-defect'), exist_ok=True) #'args.data_path_folder/train/no-defect/')
os.makedirs(os.path.join(output_folder, '/test/defect'), exist_ok=True) #'args.data_path_folder/test/defect/')
os.makedirs(os.path.join(output_folder, '/test/no-defect'), exist_ok=True) #'args.data_path_folder/test/no-defect/')
#os.mkdir('/tmp/cats-v-dogs/training/dogs')
#os.mkdir('/tmp/cats-v-dogs/testing/cats')
#os.mkdir('/tmp/cats-v-dogs/testing/dogs')
except OSError:
pass
train_datagen = ImageDataGenerator(
rescale = 1./255)
val_datagen = ImageDataGenerator(
rescale = 1./255)
test_datagen = ImageDataGenerator(
rescale = 1./255)
class_mode = 'binary'
batch_size = 5
NO_DEFECT_SOURCE_DIR = os.path.join(inputs, "Good")
TRAINING_NO_DEFECT_DIR = os.path.join(output_folder, '/train/no-defect') #'output_folder/train/no-defect/' #os.path.join(args.data_path_train, "no-defect/")
TESTING_NO_DEFECT_DIR = os.path.join(output_folder, '/test/no-defect') #'output_folder/test/no-defect/' #os.path.join(args.data_path_test, "no-defect/")
DEFECT_SOURCE_DIR = os.path.join(inputs, "Defective")
TRAINING_DEFECT_DIR = os.path.join(output_folder, '/train/defect') #'output_folder/train/defect/' #os.path.join(args.data_path_train, "defect/")
TESTING_DEFECT_DIR = os.path.join(output_folder, '/test/defect') #'output_folder/test/defect/' #os.path.join(args.data_path_test, "defect/")
split_size = .8
split_data(NO_DEFECT_SOURCE_DIR, TRAINING_NO_DEFECT_DIR, TESTING_NO_DEFECT_DIR, split_size)
split_data(DEFECT_SOURCE_DIR, TRAINING_DEFECT_DIR, TESTING_DEFECT_DIR, split_size)
{'code': data-capability.DatasetMountSession:input_915071c1.ExecutionError, 'message':
Error Code: ScriptExecution.StreamAccess.NotFound
, 'target': , 'category': UserError, 'error_details': [{'key': NonCompliantReason, 'value':
Error Code: ScriptExecution.StreamAccess.NotFound
Failed Step: 92a8bfed-63f0-497a-bbcf-b0bfa1be2d9a
Error Message: ScriptExecutionException was caused by StreamAccessException.
StreamAccessException was caused by NotFoundException.
Found no resources for the input provided: 'https://mich7068071609.blob.core.windows.net/azureml-blobstore-e23f8d3d-4bfa-4d73-8330-db867b66a523/dataset/3c085033-67b7-4c25-8e29-1e58a993e90a/prepped/'
| session_id=067cfe37-82a4-46e1-900c-8184e503ebfb}, {'key': StackTrace, 'value': File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/data_capability/capability_session.py", line 47, in start
(data_path, sub_data_path) = session.start()
File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/data_capability/data_sessions.py", line 171, in start
if self._is_single_file:
File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/data_capability/data_sessions.py", line 119, in _is_single_file
path = dataflow._to_pyrecords()[0][temp_column]
File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/dataflow.py", line 756, in _to_pyrecords
intermediate_files = _write_preppy_with_fallback('Dataflow.to_pyrecords', self, span_context=to_dprep_span_context(span.get_context()))
File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/_dataframereader.py", line 190, in _write_preppy_with_fallback
_execute_with_fallback(activity, dataflow_to_execute, force_clex=force_clex, span_context=span_context)
File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/_dataframereader.py", line 238, in _execute_with_fallback
clex_execute()
File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/_dataframereader.py", line 219, in clex_execute
span_context=span_context
File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/_aml_helper.py", line 38, in wrapper
return send_message_func(op_code, message, cancellation_token)
File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/api.py", line 154, in execute_anonymous_activity
response = self._message_channel.send_message('Engine.ExecuteActivity', message_args, cancellation_token)
File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/engine.py", line 291, in send_message
raise_engine_error(response['error'])
File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/errorhandlers.py", line 10, in raise_engine_error
raise ExecutionError(error_response)
}, ], 'inner_e
Same error and I can confirm --data_path", dataset.as_download() worked.
Too difficult to get solution for pipeline debugging, any troubleshooting guidance here?
Should I just post error I got to debug? No efficiency to do so.
Thank you anyway.