Error while creating pipeline between first and second page - first step runs get error when second steps start

Question

Error while creating pipeline between first and second page - first step runs get error when second steps start

TO 1

first step of pipeline

data_prep_step = PythonScriptStep(
script_name='data_prep.py',
source_directory='./src',
arguments=["--data_path", dataset.as_mount(), "--out_folder", output_data],
compute_target='cpu-cluster',
runconfig=aml_run_config,
allow_reuse=True
)

second step of pipeline

train_step = PythonScriptStep(
script_name='train.py',
source_directory='./src',
arguments=["--output_folder", output_data.as_input()],
compute_target='cpu-cluster',
runconfig=aml_run_config,
allow_reuse=True
)

run

train_pipeline = Pipeline(workspace = ws, steps = [data_prep_step, train_step])
experiment = Experiment(workspace = ws, name = 'training-pipeline' )
pipeline_run = experiment.submit(train_pipeline)

code first step completes, I get error when second step starts

code for first step below

def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
files = []
for filename in os.listdir(SOURCE):
file = os.path.join(SOURCE, filename)
if os.path.getsize(file) > 0:
files.append(filename)
else:
print(filename + " is zero length, so ignoring.")

training_length = int(len(files) * SPLIT_SIZE)
testing_length = int(len(files) - training_length)
shuffled_set = random.sample(files, len(files))
training_set = shuffled_set[0:training_length]
testing_set = shuffled_set[training_length:]

for filename in training_set:
    this_file = os.path.join(SOURCE, filename)
    destination = os.path.join(TRAINING, filename)
    copy(this_file, TRAINING)

for filename in testing_set:
    this_file = os.path.join(SOURCE, filename)
    destination = os.path.join(TESTING, filename)
    copy(this_file, TESTING)

run = Run.get_context()
if name == "main":

parser = argparse.ArgumentParser()
parser.add_argument('--data_path',
                    type=str,
                    help='Path to uploaded data')
parser.add_argument('--out_folder', 
                   type=str

                   )
#parser.add_argument('--data_path_test', 
#                    type=str,
#                   help='Path to test dataflow')
#args = parser.parse_args()
args = parser.parse_args()
output_folder = args.out_folder
inputs = args.data_path

try:
    os.makedirs(os.path.join(output_folder, '/train/defect'), exist_ok=True) #'args.data_path_folder/train/defect/')
    os.makedirs(os.path.join(output_folder, '/train/no-defect'), exist_ok=True) #'args.data_path_folder/train/no-defect/')
    os.makedirs(os.path.join(output_folder, '/test/defect'), exist_ok=True) #'args.data_path_folder/test/defect/')
    os.makedirs(os.path.join(output_folder, '/test/no-defect'), exist_ok=True) #'args.data_path_folder/test/no-defect/')
    #os.mkdir('/tmp/cats-v-dogs/training/dogs')
    #os.mkdir('/tmp/cats-v-dogs/testing/cats')
    #os.mkdir('/tmp/cats-v-dogs/testing/dogs')
except OSError:
    pass

train_datagen = ImageDataGenerator(
rescale = 1./255)
val_datagen = ImageDataGenerator(
rescale = 1./255)
test_datagen = ImageDataGenerator(
rescale = 1./255)

class_mode = 'binary'
batch_size = 5


NO_DEFECT_SOURCE_DIR =  os.path.join(inputs, "Good")
TRAINING_NO_DEFECT_DIR = os.path.join(output_folder, '/train/no-defect') #'output_folder/train/no-defect/'   #os.path.join(args.data_path_train, "no-defect/")
TESTING_NO_DEFECT_DIR =  os.path.join(output_folder, '/test/no-defect') #'output_folder/test/no-defect/'    #os.path.join(args.data_path_test, "no-defect/")
DEFECT_SOURCE_DIR = os.path.join(inputs, "Defective")
TRAINING_DEFECT_DIR = os.path.join(output_folder, '/train/defect') #'output_folder/train/defect/' #os.path.join(args.data_path_train, "defect/")
TESTING_DEFECT_DIR = os.path.join(output_folder, '/test/defect') #'output_folder/test/defect/' #os.path.join(args.data_path_test, "defect/")

split_size = .8
split_data(NO_DEFECT_SOURCE_DIR, TRAINING_NO_DEFECT_DIR, TESTING_NO_DEFECT_DIR, split_size)
split_data(DEFECT_SOURCE_DIR, TRAINING_DEFECT_DIR, TESTING_DEFECT_DIR, split_size)

error received below

{'code': data-capability.DatasetMountSession:input_915071c1.ExecutionError, 'message':
Error Code: ScriptExecution.StreamAccess.NotFound
, 'target': , 'category': UserError, 'error_details': [{'key': NonCompliantReason, 'value':
Error Code: ScriptExecution.StreamAccess.NotFound
Failed Step: 92a8bfed-63f0-497a-bbcf-b0bfa1be2d9a
Error Message: ScriptExecutionException was caused by StreamAccessException.
StreamAccessException was caused by NotFoundException.
Found no resources for the input provided: 'https://mich7068071609.blob.core.windows.net/azureml-blobstore-e23f8d3d-4bfa-4d73-8330-db867b66a523/dataset/3c085033-67b7-4c25-8e29-1e58a993e90a/prepped/'
| session_id=067cfe37-82a4-46e1-900c-8184e503ebfb}, {'key': StackTrace, 'value': File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/data_capability/capability_session.py", line 47, in start
(data_path, sub_data_path) = session.start()

File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/data_capability/data_sessions.py", line 171, in start
if self._is_single_file:

File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/data_capability/data_sessions.py", line 119, in _is_single_file
path = dataflow._to_pyrecords()[0][temp_column]

File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/dataflow.py", line 756, in _to_pyrecords
intermediate_files = _write_preppy_with_fallback('Dataflow.to_pyrecords', self, span_context=to_dprep_span_context(span.get_context()))

File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/_dataframereader.py", line 190, in _write_preppy_with_fallback
_execute_with_fallback(activity, dataflow_to_execute, force_clex=force_clex, span_context=span_context)

File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/_dataframereader.py", line 238, in _execute_with_fallback
clex_execute()

File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/_dataframereader.py", line 219, in clex_execute
span_context=span_context

File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/_aml_helper.py", line 38, in wrapper
return send_message_func(op_code, message, cancellation_token)

File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/api.py", line 154, in execute_anonymous_activity
response = self._message_channel.send_message('Engine.ExecuteActivity', message_args, cancellation_token)

File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/engine.py", line 291, in send_message
raise_engine_error(response['error'])

File "/opt/miniconda/envs/data-capability/lib/python3.7/site-packages/azureml/dataprep/api/errorhandlers.py", line 10, in raise_engine_error
raise ExecutionError(error_response)
}, ], 'inner_e

romungi-MSFT 48,906 Reputation points Microsoft Employee Moderator

2022-03-17T08:13:10.427+00:00
@TO I think the error indicates that either your input dataset is not mounted when the first step is actually started. Did you run this pipeline before or is the setup being done for the first time?
For the input dataset can you try to download instead of mount if the data can be handled by your compute?

I believe just this change should be enough.

--data_path", dataset.as_download()

Also, in the first step you can configure OutputFileDatasetConfig as output and then re-use in the second step as temporary data output between your pipeline steps.
romungi-MSFT 48,906 Reputation points Microsoft Employee Moderator

2022-03-22T10:13:28.283+00:00

@TO Did you get a chance to check if the below suggestion helped to run your pipeline steps successfully?

1 answer

Your answer

romungi-MSFT 48,906 Reputation points Microsoft Employee Moderator

2022-03-17T08:13:10.427+00:00

@TO I think the error indicates that either your input dataset is not mounted when the first step is actually started. Did you run this pipeline before or is the setup being done for the first time?
For the input dataset can you try to download instead of mount if the data can be handled by your compute?

I believe just this change should be enough.

--data_path", dataset.as_download()

Also, in the first step you can configure OutputFileDatasetConfig as output and then re-use in the second step as temporary data output between your pipeline steps.
romungi-MSFT 48,906 Reputation points Microsoft Employee Moderator

2022-03-22T10:13:28.283+00:00

@TO Did you get a chance to check if the below suggestion helped to run your pipeline steps successfully?

Answer 1

YongliWei 6

Same error and I can confirm --data_path", dataset.as_download() worked.

Too difficult to get solution for pipeline debugging, any troubleshooting guidance here?

Should I just post error I got to debug? No efficiency to do so.

Thank you anyway.

Share via

Error while creating pipeline between first and second page - first step runs get error when second steps start

first step of pipeline

second step of pipeline

run

code first step completes, I get error when second step starts

code for first step below

error received below

1 answer

Your answer