def get_dataset(num_classes, rank=0, size=1):
from tensorflow import keras
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data('MNIST-data-%d' % rank)
x_train = x_train[rank::size]
y_train = y_train[rank::size]
x_test = x_test[rank::size]
y_test = y_test[rank::size]
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
return (x_train, y_train), (x_test, y_test)
def get_model(num_classes):
from tensorflow.keras import models
from tensorflow.keras import layers
model = models.Sequential()
model.add(layers.Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=(28, 28, 1)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.25))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(num_classes, activation='softmax'))
return model
# Specify training parameters
batch_size = 128
epochs = 1
num_classes = 10
def train(learning_rate=1.0):
from tensorflow import keras
(x_train, y_train), (x_test, y_test) = get_dataset(num_classes)
model = get_model(num_classes)
# Specify the optimizer (Adadelta in this example), using the learning rate input parameter of the function so that Horovod can adjust the learning rate during training
optimizer = keras.optimizers.Adadelta(lr=learning_rate)
model.compile(optimizer=optimizer,
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=2,
validation_data=(x_test, y_test))
return model
model = train(learning_rate=0.1)
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
16384/11490434 [..............................] - ETA: 1s
540672/11490434 [>.............................] - ETA: 1s
8093696/11490434 [====================>.........] - ETA: 0s
11493376/11490434 [==============================] - 0s 0us/step
11501568/11490434 [==============================] - 0s 0us/step
/databricks/python/lib/python3.8/site-packages/keras/optimizer_v2/adadelta.py:74: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
super(Adadelta, self).__init__(name, **kwargs)
469/469 - 11s - loss: 0.6214 - accuracy: 0.8098 - val_loss: 0.2215 - val_accuracy: 0.9351 - 11s/epoch - 24ms/step
INFO:tensorflow:Assets written to: /tmp/tmp1cre2wdh/model/data/model/assets
_, (x_test, y_test) = get_dataset(num_classes)
loss, accuracy = model.evaluate(x_test, y_test, batch_size=128)
print("loss:", loss)
print("accuracy:", accuracy)
1/79 [..............................] - ETA: 1s - loss: 0.1615 - accuracy: 0.9688
16/79 [=====>........................] - ETA: 0s - loss: 0.2934 - accuracy: 0.9092
30/79 [==========>...................] - ETA: 0s - loss: 0.2865 - accuracy: 0.9130
45/79 [================>.............] - ETA: 0s - loss: 0.2670 - accuracy: 0.9187
61/79 [======================>.......] - ETA: 0s - loss: 0.2377 - accuracy: 0.9307
76/79 [===========================>..] - ETA: 0s - loss: 0.2151 - accuracy: 0.9369
79/79 [==============================] - 0s 3ms/step - loss: 0.2215 - accuracy: 0.9351
loss: 0.22154481709003448
accuracy: 0.9351000189781189
def train_hvd(checkpoint_path=None, learning_rate=1.0, timeline=None):
# Import tensorflow modules to each worker
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
import tensorflow as tf
import os
from tensorflow import keras
import horovod.tensorflow.keras as hvd
# --- WORKAROUND FOR DYNAMIC TIMELINE ISSUE --- #
worker_timeline = None
dbfs_timeline = None
if timeline and timeline.startswith("/dbfs"):
dbfs_timeline = timeline
worker_timeline = "/tmp"+timeline[5:]
timeline = worker_timeline
os.makedirs(os.path.dirname(worker_timeline), exist_ok=True)
if os.environ.get("HOROVOD_TIMELINE", "").startswith("/dbfs"):
dbfs_timeline = os.environ["HOROVOD_TIMELINE"]
worker_timeline = "/tmp"+os.environ["HOROVOD_TIMELINE"][5:]
os.environ["HOROVOD_TIMELINE"] = worker_timeline
os.makedirs(os.path.dirname(worker_timeline), exist_ok=True)
print(f"dbfs_timeline:{dbfs_timeline} worker_timeline:{worker_timeline}")
# --- WORKAROUND FOR DYNAMIC TIMELINE ISSUE --- #
# Initialize Horovod
hvd.init()
if timeline:
hvd.start_timeline(timeline)
try:
# Pin GPU to be used to process local rank (one GPU per process)
# These steps are skipped on a CPU cluster
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
if gpus:
tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU')
# Call the get_dataset function you created, this time with the Horovod rank and size
(x_train, y_train), (x_test, y_test) = get_dataset(num_classes, hvd.rank(), hvd.size())
model = get_model(num_classes)
# Adjust learning rate based on number of GPUs
optimizer = keras.optimizers.Adadelta(lr=learning_rate * hvd.size())
# Use the Horovod Distributed Optimizer
optimizer = hvd.DistributedOptimizer(optimizer)
model.compile(optimizer=optimizer,
loss='categorical_crossentropy',
metrics=['accuracy'])
# Create a callback to broadcast the initial variable states from rank 0 to all other processes.
# This is required to ensure consistent initialization of all workers when training is started with random weights or restored from a checkpoint.
callbacks = [
hvd.callbacks.BroadcastGlobalVariablesCallback(0),
]
# Save checkpoints only on worker 0 to prevent conflicts between workers
if hvd.rank() == 0 and checkpoint_path:
callbacks.append(keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only = True))
model.fit(x_train, y_train,
batch_size=batch_size,
callbacks=callbacks,
epochs=epochs,
verbose=2,
validation_data=(x_test, y_test))
if timeline:
hvd.stop_timeline()
finally:
# --- WORKAROUND FOR DYNAMIC TIMELINE ISSUE --- #
if dbfs_timeline and worker_timeline:
if os.path.exists(worker_timeline):
import shutil
print(f"Copying: from: {worker_timeline} to {dbfs_timeline}")
shutil.copy(worker_timeline, dbfs_timeline)
# --- WORKAROUND FOR DYNAMIC TIMELINE ISSUE --- #
from sparkdl import HorovodRunner
os.environ['HOROVOD_TIMELINE'] = timeline_file
hr = HorovodRunner(np=2, driver_log_verbosity='all')
hr.run(train_hvd, learning_rate=0.1)
The global names read or written to by the pickled function are {'num_classes', 'batch_size', 'epochs', 'get_model', 'print', 'get_dataset'}.
The pickled object size is 4469 bytes.
Start training.
Warning: Permanently added '10.68.153.173' (ECDSA) to the list of known hosts.
[1,1]<stdout>:dbfs_timeline:/dbfs/ml/hvd_timeline_1643218354.8848794.json worker_timeline:/tmp/ml/hvd_timeline_1643218354.8848794.json
[1,0]<stdout>:dbfs_timeline:/dbfs/ml/hvd_timeline_1643218354.8848794.json worker_timeline:/tmp/ml/hvd_timeline_1643218354.8848794.json
[1,1]<stderr>:2022-01-26 17:32:49.762267: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,0]<stderr>:2022-01-26 17:32:49.770803: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,1]<stderr>:2022-01-26 17:32:49.771975: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,1]<stderr>:2022-01-26 17:32:49.772912: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,1]<stdout>:Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1,0]<stderr>:2022-01-26 17:32:49.781864: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,0]<stderr>:2022-01-26 17:32:49.782834: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,0]<stdout>:Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz[1,0]<stdout>:
[1,0]<stdout>:
[1,0]<stdout>: 16384/11490434 [..............................] - ETA: 1s[1,1]<stdout>:
[1,1]<stdout>: 16384/11490434 [..............................] - ETA: 1s[1,0]<stdout>:
[1,0]<stdout>: 245760/11490434 [..............................] - ETA: 2s[1,1]<stdout>:
[1,1]<stdout>: 1146880/11490434 [=>............................] - ETA: 0s[1,0]<stdout>:
[1,0]<stdout>: 573440/11490434 [>.............................] - ETA: 1s[1,1]<stdout>:
[1,1]<stdout>:11493376/11490434 [==============================] - 0s 0us/step
[1,1]<stdout>:
[1,1]<stdout>:11501568/11490434 [==============================] - 0s 0us/step
[1,0]<stdout>:
[1,0]<stdout>: 983040/11490434 [=>............................] - ETA: 1s[1,0]<stdout>:
[1,0]<stdout>: 1466368/11490434 [==>...........................] - ETA: 1s[1,0]<stdout>:
[1,0]<stdout>: 2048000/11490434 [====>.........................] - ETA: 1s[1,0]<stdout>:
[1,0]<stdout>: 2752512/11490434 [======>.......................][1,0]<stdout>: - ETA: 0s[1,0]<stdout>:
[1,0]<stdout>: 3571712/11490434 [========>.....................] - ETA: 0s[1,0]<stdout>:
[1,0]<stdout>: 4538368/11490434 [==========>...................] - ETA: 0s[1,1]<stderr>:2022-01-26 17:32:50.353469: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA
[1,1]<stderr>:To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
[1,1]<stderr>:2022-01-26 17:32:50.354500: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,1]<stderr>:2022-01-26 17:32:50.355494: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,1]<stderr>:2022-01-26 17:32:50.356354: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,0]<stdout>:
[1,0]<stdout>: 5660672/11490434 [=============>................][1,0]<stdout>: - ETA: 0s[1,0]<stdout>:
[1,0]<stdout>: 6963200/11490434 [=================>............] - ETA: 0s[1,0]<stdout>:
8552448/11490434 [=====================>........] - ETA: 0s[1,0]<stdout>:
[1,0]<stdout>:10436608/11490434 [==========================>...] - ETA: 0s[1,0]<stdout>:
11493376/11490434 [==============================][1,0]<stdout>: - 1s 0us/step
[1,0]<stdout>:
[1,0]<stdout>:11501568/11490434 [==============================][1,0]<stdout>: - 1s 0us/step
[1,0]<stderr>:2022-01-26 17:32:50.881202: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA
[1,0]<stderr>:To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
[1,0]<stderr>:2022-01-26 17:32:50.882342: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,0]<stderr>:2022-01-26 17:32:50.883336: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,0]<stderr>:2022-01-26 17:32:50.884236: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,1]<stderr>:2022-01-26 17:32:51.927625: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,1]<stderr>:2022-01-26 17:32:51.928614: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,1]<stderr>:2022-01-26 17:32:51.929483: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,1]<stderr>:2022-01-26 17:32:51.930295: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13755 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:00:1e.0, compute capability: 7.5
[1,1]<stderr>:2022-01-26 17:32:52.091960: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 94080000 exceeds 10% of free system memory.
[1,1]<stderr>:2022-01-26 17:32:52.222094: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 94080000 exceeds 10% of free system memory.
[1,0]<stderr>:2022-01-26 17:32:52.514903: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,0]<stderr>:2022-01-26 17:32:52.515861: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,0]<stderr>:2022-01-26 17:32:52.516744: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[1,0]<stderr>:2022-01-26 17:32:52.517572: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13755 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:00:1e.0, compute capability: 7.5
[1,0]<stderr>:/databricks/python/lib/python3.8/site-packages/keras/optimizer_v2/adadelta.py:74: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
[1,0]<stderr>: super(Adadelta, self).__init__(name, **kwargs)
[1,0]<stderr>:2022-01-26 17:32:52.681832: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 94080000 exceeds 10% of free system memory.
[1,0]<stderr>:2022-01-26 17:32:52.814003: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 94080000 exceeds 10% of free system memory.
[1,1]<stderr>:2022-01-26 17:32:54.608782: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8100
[1,0]<stderr>:2022-01-26 17:32:55.296886: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8100
[1,1]<stderr>:2022-01-26 17:32:55.412544: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
[1,1]<stderr>:2022-01-26 17:32:55.413074: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
[1,1]<stderr>:2022-01-26 17:32:55.413113: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
[1,1]<stderr>:2022-01-26 17:32:55.413546: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
[1,1]<stderr>:2022-01-26 17:32:55.413647: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
[1,1]<stderr>:Relying on driver to perform ptx compilation.
[1,1]<stderr>:Modify $PATH to customize ptxas location.
[1,1]<stderr>:This message will be only logged once.
[1,0]<stderr>:2022-01-26 17:32:56.078023: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
[1,0]<stderr>:2022-01-26 17:32:56.078558: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
[1,0]<stderr>:2022-01-26 17:32:56.078593: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
[1,0]<stderr>:2022-01-26 17:32:56.079059: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
[1,0]<stderr>:2022-01-26 17:32:56.079191: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
[1,0]<stderr>:Relying on driver to perform ptx compilation.
[1,0]<stderr>:Modify $PATH to customize ptxas location.
[1,0]<stderr>:This message will be only logged once.
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Bootstrap : Using eth0:10.68.131.158<0>
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO NET/IB : No device found.
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO NET/Socket : Using [0]eth0:10.68.131.158<0>
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Using network Socket
[1,0]<stdout>:NCCL version 2.10.3+cuda11.0
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO Bootstrap : Using eth0:10.68.153.173<0>
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO NET/IB : No device found.
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO NET/Socket : Using [0]eth0:10.68.153.173<0>
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO Using network Socket
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Channel 00/02 : 0 1
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Channel 01/02 : 0 1
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] -1/-1/-1->0->1
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO Trees [0] -1/-1/-1->1->0 [1] 0/-1/-1->1->-1
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO Channel 00 : 0[1e0] -> 1[1e0] [receive] via NET/Socket/0
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO Channel 01 : 0[1e0] -> 1[1e0] [receive] via NET/Socket/0
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Channel 00 : 1[1e0] -> 0[1e0] [receive] via NET/Socket/0
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO Channel 00 : 1[1e0] -> 0[1e0] [send] via NET/Socket/0
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO Channel 01 : 1[1e0] -> 0[1e0] [send] via NET/Socket/0
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Channel 01 : 1[1e0] -> 0[1e0] [receive] via NET/Socket/0
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Channel 00 : 0[1e0] -> 1[1e0] [send] via NET/Socket/0
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Channel 01 : 0[1e0] -> 1[1e0] [send] via NET/Socket/0
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO Connected all rings
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO Connected all trees
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO threadThresholds 8/8/64 | 16/8/64 | 8/8/512
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO 2 coll channels, 2 p2p channels, 1 p2p channels per peer
[1,1]<stdout>:0126-145552-kp1va3w1-10-68-153-173:785:795 [0] NCCL INFO comm 0x7f752c31ffc0 rank 1 nranks 2 cudaDev 0 busId 1e0 - Init COMPLETE
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Connected all rings
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Connected all trees
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO threadThresholds 8/8/64 | 16/8/64 | 8/8/512
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO 2 coll channels, 2 p2p channels, 1 p2p channels per peer
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO comm 0x7f58a4364790 rank 0 nranks 2 cudaDev 0 busId 1e0 - Init COMPLETE
[1,0]<stdout>:0126-145552-kp1va3w1-10-68-131-158:760:763 [0] NCCL INFO Launch mode Parallel
[1,0]<stderr>:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0166s vs `on_train_batch_end` time: 0.0542s). Check your callbacks.
[1,1]<stderr>:/databricks/python/lib/python3.8/site-packages/keras/optimizer_v2/adadelta.py:74: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
[1,1]<stderr>: super(Adadelta, self).__init__(name, **kwargs)
[1,1]<stderr>:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0167s vs `on_train_batch_end` time: 0.0543s). Check your callbacks.
[1,1]<stdout>:235/235 - 9s - loss: 0.5515 - accuracy: 0.8317 - val_loss: 0.2306 - val_accuracy: 0.9330 - 9s/epoch - 36ms/step
[1,0]<stdout>:235/235 - 8s - loss: 0.7107 - accuracy: 0.7814 - val_loss: 0.2189 - val_accuracy: 0.9358 - 8s/epoch - 34ms/step
[1,0]<stdout>:Copying: from: /tmp/ml/hvd_timeline_1643218354.8848794.json to /dbfs/ml/hvd_timeline_1643218354.8848794.json
%sh cat /dbfs/ml/hvd_timeline_1643218354.8848794.json
[
{"name": "process_name", "ph": "M", "pid": 0, "args": {"start_time_since_epoch_in_micros":1643218369720178}},
{"name": "process_sort_index", "ph": "M", "pid": 0, "args": {"sort_index": 0}},
{"name": "process_name", "ph": "M", "pid": 1, "args": {"name": "PartitionedCall/DistributedAdadelta_Allreduce/cond_7/then/_81/DistributedAdadelta_Allreduce/cond_7/HorovodAllreduce_grads_7_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 1, "args": {"sort_index": 1}},
{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 6367380, "pid": 1},{"ph": "X", "name": "1", "ts": 6367382, "pid": 1, "dur": 0},{"name": "process_name", "ph": "M", "pid": 2, "args": {"name": "PartitionedCall/DistributedAdadelta_Allreduce/cond_6/then/_73/DistributedAdadelta_Allreduce/cond_6/HorovodAllreduce_grads_6_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 2, "args": {"sort_index": 2}},
{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 6367386, "pid": 2},{"ph": "X", "name": "1", "ts": 6367387, "pid": 2, "dur": 0},{"name": "process_name", "ph": "M", "pid": 3, "args": {"name": "PartitionedCall/DistributedAdadelta_Allreduce/cond_4/then/_57/DistributedAdadelta_Allreduce/cond_4/HorovodAllreduce_grads_4_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 3, "args": {"sort_index": 3}},
{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 6368373, "pid": 3},{"ph": "X", "name": "1", "ts": 6368374, "pid": 3, "dur": 0},{"name": "process_name", "ph": "M", "pid": 4, "args": {"name": "PartitionedCall/DistributedAdadelta_Allreduce/cond_5/then/_65/DistributedAdadelta_Allreduce/cond_5/HorovodAllreduce_grads_5_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 4, "args": {"sort_index": 4}},
{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 6368379, "pid": 4},{"ph": "X", "name": "1", "ts": 6368380, "pid": 4, "dur": 0},{"name": "process_name", "ph": "M", "pid": 5, "args": {"name": "PartitionedCall/DistributedAdadelta_Allreduce/cond_2/then/_41/DistributedAdadelta_Allreduce/cond_2/HorovodAllreduce_grads_2_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 5, "args": {"sort_index": 5}},
{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 6711148, "pid": 5},{"ph": "X", "name": "1", "ts": 6711150, "pid": 5, "dur": 0},{"name": "process_name", "ph": "M", "pid": 6, "args": {"name": "PartitionedCall/DistributedAdadelta_Allreduce/cond_3/then/_49/DistributedAdadelta_Allreduce/cond_3/HorovodAllreduce_grads_3_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 6, "args": {"sort_index": 6}},
{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 6711154, "pid": 6},{"ph": "X", "name": "1", "ts": 6711155, "pid": 6, "dur": 0},{"name": "process_name", "ph": "M", "pid": 7, "args": {"name": "PartitionedCall/DistributedAdadelta_Allreduce/cond/then/_25/DistributedAdadelta_Allreduce/cond/HorovodAllreduce_grads_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 7, "args": {"sort_index": 7}},
{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 6735467, "pid": 7},{"ph": "X", "name": "1", "ts": 6735468, "pid": 7, "dur": 0},{"name": "process_name", "ph": "M", "pid": 8, "args": {"name": "PartitionedCall/DistributedAdadelta_Allreduce/cond_1/then/_33/DistributedAdadelta_Allreduce/cond_1/HorovodAllreduce_grads_1_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 8, "args": {"sort_index": 8}},
{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 6735473, "pid": 8},{"ph": "X", "name": "1", "ts": 6735474, "pid": 8, "dur": 0},{"ph": "X", "name": "0", "ts": 7023228, "pid": 1, "dur": 0},{"ph": "E", "ts": 7023229, "pid": 1},{"ph": "X", "name": "0", "ts": 7023233, "pid": 2, "dur": 0},{"ph": "E", "ts": 7023234, "pid": 2},{"ph": "B", "name": "ALLREDUCE", "ts": 7023359, "pid": 1},{"ph": "B", "name": "ALLREDUCE", "ts": 7023361, "pid": 2},{"ph": "B", "name": "INIT_FUSION_BUFFER", "ts": 7023363, "pid": 1},{"ph": "B", "name": "INIT_FUSION_BUFFER", "ts": 7023364, "pid": 2},{"ph": "E", "ts": 7024365, "pid": 1},{"ph": "E", "ts": 7024368, "pid": 2},{"ph": "B", "name": "INIT_NCCL", "ts": 7024392, "pid": 1},{"ph": "B", "name": "INIT_NCCL", "ts": 7024394, "pid": 2},{"ph": "E", "ts": 7178768, "pid": 1},{"ph": "E", "ts": 7178772, "pid": 2},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7182220, "pid": 1},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7182224, "pid": 2},{"ph": "E", "ts": 7182234, "pid": 1},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7182235, "pid": 1},{"ph": "E", "ts": 7182238, "pid": 2},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7182240, "pid": 2},{"ph": "E", "ts": 7182298, "pid": 1},{"ph": "E", "ts": 7182299, "pid": 2},{"ph": "B", "name": "QUEUE", "ts": 7189155, "pid": 1},{"ph": "B", "name": "QUEUE", "ts": 7189157, "pid": 2},{"ph": "E", "ts": 7189164, "pid": 1},{"ph": "E", "ts": 7189165, "pid": 2},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7189166, "pid": 1},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7189168, "pid": 2},{"ph": "E", "ts": 7189170, "pid": 1},{"ph": "E", "ts": 7189171, "pid": 2},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7189173, "pid": 1},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7189174, "pid": 2},{"ph": "E", "ts": 7189177, "pid": 1},{"ph": "E", "ts": 7189178, "pid": 2},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7189180, "pid": 1},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7189181, "pid": 2},{"ph": "E", "ts": 7189183, "pid": 1},{"ph": "E", "ts": 7189185, "pid": 2},{"ph": "E", "ts": 7189195, "pid": 1, "args": {"dtype": "float32", "shape": "[10]"}},{"ph": "X", "name": "0", "ts": 7189226, "pid": 3, "dur": 0},{"ph": "E", "ts": 7189228, "pid": 3},{"ph": "E", "ts": 7189359, "pid": 2, "args": {"dtype": "float32", "shape": "[128, 10]"}},{"ph": "X", "name": "0", "ts": 7189368, "pid": 4, "dur": 0},{"ph": "E", "ts": 7189371, "pid": 4},{"ph": "B", "name": "ALLREDUCE", "ts": 7189468, "pid": 3},{"ph": "B", "name": "ALLREDUCE", "ts": 7189469, "pid": 4},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7197440, "pid": 3},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7197443, "pid": 4},{"ph": "E", "ts": 7197450, "pid": 3},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7197451, "pid": 3},{"ph": "E", "ts": 7197454, "pid": 4},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7197459, "pid": 4},{"ph": "E", "ts": 7197517, "pid": 3},{"ph": "E", "ts": 7197519, "pid": 4},{"ph": "B", "name": "QUEUE", "ts": 7205885, "pid": 3},{"ph": "B", "name": "QUEUE", "ts": 7205890, "pid": 4},{"ph": "E", "ts": 7205897, "pid": 3},{"ph": "E", "ts": 7205898, "pid": 4},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7205901, "pid": 3},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7205902, "pid": 4},{"ph": "E", "ts": 7205904, "pid": 3},{"ph": "E", "ts": 7205905, "pid": 4},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7205906, "pid": 3},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7205908, "pid": 4},{"ph": "E", "ts": 7205910, "pid": 3},{"ph": "E", "ts": 7205912, "pid": 4},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7205913, "pid": 3},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7205914, "pid": 4},{"ph": "E", "ts": 7205916, "pid": 3},{"ph": "E", "ts": 7205918, "pid": 4},{"ph": "E", "ts": 7205926, "pid": 3, "args": {"dtype": "float32", "shape": "[9216, 128]"}},{"ph": "E", "ts": 7205948, "pid": 4, "args": {"dtype": "float32", "shape": "[128]"}},{"ph": "X", "name": "0", "ts": 7386540, "pid": 5, "dur": 0},{"ph": "E", "ts": 7386541, "pid": 5},{"ph": "X", "name": "0", "ts": 7386545, "pid": 6, "dur": 0},{"ph": "E", "ts": 7386545, "pid": 6},{"ph": "B", "name": "ALLREDUCE", "ts": 7386668, "pid": 5},{"ph": "B", "name": "ALLREDUCE", "ts": 7386670, "pid": 6},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7386686, "pid": 5},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7386688, "pid": 6},{"ph": "E", "ts": 7386694, "pid": 5},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7386695, "pid": 5},{"ph": "E", "ts": 7386698, "pid": 6},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7386699, "pid": 6},{"ph": "E", "ts": 7386759, "pid": 5},{"ph": "E", "ts": 7386760, "pid": 6},{"ph": "B", "name": "QUEUE", "ts": 7386849, "pid": 5},{"ph": "B", "name": "QUEUE", "ts": 7386851, "pid": 6},{"ph": "E", "ts": 7386860, "pid": 5},{"ph": "E", "ts": 7386861, "pid": 6},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7386864, "pid": 5},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7386865, "pid": 6},{"ph": "E", "ts": 7386868, "pid": 5},{"ph": "E", "ts": 7386869, "pid": 6},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7386871, "pid": 5},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7386872, "pid": 6},{"ph": "E", "ts": 7387274, "pid": 5},{"ph": "E", "ts": 7387276, "pid": 6},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7387279, "pid": 5},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7387280, "pid": 6},{"ph": "E", "ts": 7387283, "pid": 5},{"ph": "E", "ts": 7387284, "pid": 6},{"ph": "E", "ts": 7387299, "pid": 5, "args": {"dtype": "float32", "shape": "[3, 3, 32, 64]"}},{"ph": "E", "ts": 7387329, "pid": 6, "args": {"dtype": "float32", "shape": "[64]"}},{"ph": "X", "name": "0", "ts": 7408842, "pid": 7, "dur": 0},{"ph": "E", "ts": 7408845, "pid": 7},{"ph": "X", "name": "0", "ts": 7408847, "pid": 8, "dur": 0},{"ph": "E", "ts": 7408848, "pid": 8},{"ph": "B", "name": "ALLREDUCE", "ts": 7408960, "pid": 7},{"ph": "B", "name": "ALLREDUCE", "ts": 7408961, "pid": 8},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7408975, "pid": 7},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7408977, "pid": 8},{"ph": "E", "ts": 7408982, "pid": 7},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7408983, "pid": 7},{"ph": "E", "ts": 7408986, "pid": 8},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7408987, "pid": 8},{"ph": "E", "ts": 7409046, "pid": 7},{"ph": "E", "ts": 7409047, "pid": 8},{"ph": "B", "name": "QUEUE", "ts": 7409144, "pid": 7},{"ph": "B", "name": "QUEUE", "ts": 7409146, "pid": 8},{"ph": "E", "ts": 7409154, "pid": 7},{"ph": "E", "ts": 7409155, "pid": 8},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7409158, "pid": 7},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7409159, "pid": 8},{"ph": "E", "ts": 7409171, "pid": 7},{"ph": "E", "ts": 7409172, "pid": 8},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7409173, "pid": 7},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7409174, "pid": 8},{"ph": "E", "ts": 7409258, "pid": 7},{"ph": "E", "ts": 7409259, "pid": 8},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7409261, "pid": 7},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7409262, "pid": 8},{"ph": "E", "ts": 7409265, "pid": 7},{"ph": "E", "ts": 7409266, "pid": 8},{"ph": "E", "ts": 7409280, "pid": 7, "args": {"dtype": "float32", "shape": "[3, 3, 1, 32]"}},{"ph": "E", "ts": 7409302, "pid": 8, "args": {"dtype": "float32", "shape": "[32]"}},{"name": "process_name", "ph": "M", "pid": 9, "args": {"name": "HorovodBroadcast_dense_1_bias_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 9, "args": {"sort_index": 9}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7592647, "pid": 9},{"ph": "X", "name": "1", "ts": 7592649, "pid": 9, "dur": 0},{"name": "process_name", "ph": "M", "pid": 10, "args": {"name": "HorovodBroadcast_dense_1_kernel_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 10, "args": {"sort_index": 10}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7592653, "pid": 10},{"ph": "X", "name": "1", "ts": 7592655, "pid": 10, "dur": 0},{"name": "process_name", "ph": "M", "pid": 11, "args": {"name": "HorovodBroadcast_conv2d_kernel_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 11, "args": {"sort_index": 11}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7592659, "pid": 11},{"ph": "X", "name": "1", "ts": 7592660, "pid": 11, "dur": 0},{"name": "process_name", "ph": "M", "pid": 12, "args": {"name": "HorovodBroadcast_dense_bias_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 12, "args": {"sort_index": 12}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7592663, "pid": 12},{"ph": "X", "name": "1", "ts": 7592664, "pid": 12, "dur": 0},{"name": "process_name", "ph": "M", "pid": 13, "args": {"name": "HorovodBroadcast_conv2d_bias_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 13, "args": {"sort_index": 13}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7592668, "pid": 13},{"ph": "X", "name": "1", "ts": 7592669, "pid": 13, "dur": 0},{"name": "process_name", "ph": "M", "pid": 14, "args": {"name": "HorovodBroadcast_conv2d_1_kernel_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 14, "args": {"sort_index": 14}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7595845, "pid": 14},{"ph": "X", "name": "1", "ts": 7595847, "pid": 14, "dur": 0},{"name": "process_name", "ph": "M", "pid": 15, "args": {"name": "HorovodBroadcast_dense_kernel_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 15, "args": {"sort_index": 15}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7600083, "pid": 15},{"ph": "X", "name": "1", "ts": 7600084, "pid": 15, "dur": 0},{"name": "process_name", "ph": "M", "pid": 16, "args": {"name": "HorovodBroadcast_conv2d_1_bias_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 16, "args": {"sort_index": 16}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7600089, "pid": 16},{"ph": "X", "name": "1", "ts": 7600090, "pid": 16, "dur": 0},{"ph": "X", "name": "0", "ts": 7605279, "pid": 10, "dur": 0},{"ph": "E", "ts": 7605280, "pid": 10},{"ph": "B", "name": "BROADCAST", "ts": 7605413, "pid": 10},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7605416, "pid": 10},{"ph": "E", "ts": 7605419, "pid": 10},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7605420, "pid": 10},{"ph": "E", "ts": 7605483, "pid": 10},{"ph": "B", "name": "MPI_BCAST", "ts": 7605486, "pid": 10},{"ph": "E", "ts": 7605509, "pid": 10},{"ph": "E", "ts": 7605513, "pid": 10},{"ph": "X", "name": "0", "ts": 7609582, "pid": 15, "dur": 0},{"ph": "E", "ts": 7609584, "pid": 15},{"ph": "X", "name": "0", "ts": 7609587, "pid": 12, "dur": 0},{"ph": "E", "ts": 7609588, "pid": 12},{"ph": "X", "name": "0", "ts": 7609590, "pid": 9, "dur": 0},{"ph": "E", "ts": 7609591, "pid": 9},{"ph": "X", "name": "0", "ts": 7609593, "pid": 11, "dur": 0},{"ph": "E", "ts": 7609594, "pid": 11},{"ph": "X", "name": "0", "ts": 7609596, "pid": 13, "dur": 0},{"ph": "E", "ts": 7609597, "pid": 13},{"ph": "X", "name": "0", "ts": 7609599, "pid": 16, "dur": 0},{"ph": "E", "ts": 7609600, "pid": 16},{"ph": "X", "name": "0", "ts": 7609602, "pid": 14, "dur": 0},{"ph": "E", "ts": 7609603, "pid": 14},{"ph": "B", "name": "BROADCAST", "ts": 7609744, "pid": 15},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7609747, "pid": 15},{"ph": "E", "ts": 7609749, "pid": 15},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7609750, "pid": 15},{"ph": "E", "ts": 7609831, "pid": 15},{"ph": "B", "name": "MPI_BCAST", "ts": 7609833, "pid": 15},{"ph": "E", "ts": 7612486, "pid": 15},{"ph": "E", "ts": 7612493, "pid": 15},{"ph": "B", "name": "BROADCAST", "ts": 7612526, "pid": 12},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7612529, "pid": 12},{"ph": "E", "ts": 7612531, "pid": 12},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7612532, "pid": 12},{"ph": "E", "ts": 7612596, "pid": 12},{"ph": "B", "name": "MPI_BCAST", "ts": 7612599, "pid": 12},{"ph": "E", "ts": 7612608, "pid": 12},{"ph": "E", "ts": 7612611, "pid": 12},{"ph": "B", "name": "BROADCAST", "ts": 7612631, "pid": 9},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7612632, "pid": 9},{"ph": "E", "ts": 7612634, "pid": 9},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7612635, "pid": 9},{"ph": "E", "ts": 7612699, "pid": 9},{"ph": "B", "name": "MPI_BCAST", "ts": 7612711, "pid": 9},{"ph": "E", "ts": 7612716, "pid": 9},{"ph": "E", "ts": 7612719, "pid": 9},{"ph": "B", "name": "BROADCAST", "ts": 7612787, "pid": 11},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7612789, "pid": 11},{"ph": "E", "ts": 7612790, "pid": 11},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7612791, "pid": 11},{"ph": "E", "ts": 7612852, "pid": 11},{"ph": "B", "name": "MPI_BCAST", "ts": 7612855, "pid": 11},{"ph": "E", "ts": 7612861, "pid": 11},{"ph": "E", "ts": 7612864, "pid": 11},{"ph": "B", "name": "BROADCAST", "ts": 7612882, "pid": 13},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7612884, "pid": 13},{"ph": "E", "ts": 7612885, "pid": 13},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7612886, "pid": 13},{"ph": "E", "ts": 7612949, "pid": 13},{"ph": "B", "name": "MPI_BCAST", "ts": 7612952, "pid": 13},{"ph": "E", "ts": 7612957, "pid": 13},{"ph": "E", "ts": 7612959, "pid": 13},{"ph": "B", "name": "BROADCAST", "ts": 7612974, "pid": 16},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7612976, "pid": 16},{"ph": "E", "ts": 7612977, "pid": 16},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7612978, "pid": 16},{"ph": "E", "ts": 7613056, "pid": 16},{"ph": "B", "name": "MPI_BCAST", "ts": 7613058, "pid": 16},{"ph": "E", "ts": 7613066, "pid": 16},{"ph": "E", "ts": 7613069, "pid": 16},{"ph": "B", "name": "BROADCAST", "ts": 7613088, "pid": 14},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7613089, "pid": 14},{"ph": "E", "ts": 7613091, "pid": 14},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7613092, "pid": 14},{"ph": "E", "ts": 7613129, "pid": 14},{"ph": "B", "name": "MPI_BCAST", "ts": 7613131, "pid": 14},{"ph": "E", "ts": 7613920, "pid": 14},{"ph": "E", "ts": 7613924, "pid": 14},{"name": "process_name", "ph": "M", "pid": 17, "args": {"name": "HorovodBroadcast_DistributedAdadelta_conv2d_1_bias_accum_grad_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 17, "args": {"sort_index": 17}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7705580, "pid": 17},{"ph": "X", "name": "1", "ts": 7705582, "pid": 17, "dur": 0},{"name": "process_name", "ph": "M", "pid": 18, "args": {"name": "HorovodBroadcast_DistributedAdadelta_conv2d_1_kernel_accum_grad_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 18, "args": {"sort_index": 18}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7705590, "pid": 18},{"ph": "X", "name": "1", "ts": 7705591, "pid": 18, "dur": 0},{"name": "process_name", "ph": "M", "pid": 19, "args": {"name": "HorovodBroadcast_DistributedAdadelta_conv2d_1_bias_accum_var_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 19, "args": {"sort_index": 19}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7705594, "pid": 19},{"ph": "X", "name": "1", "ts": 7705595, "pid": 19, "dur": 0},{"name": "process_name", "ph": "M", "pid": 20, "args": {"name": "HorovodBroadcast_DistributedAdadelta_conv2d_bias_accum_var_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 20, "args": {"sort_index": 20}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7705598, "pid": 20},{"ph": "X", "name": "1", "ts": 7705599, "pid": 20, "dur": 0},{"name": "process_name", "ph": "M", "pid": 21, "args": {"name": "HorovodBroadcast_DistributedAdadelta_conv2d_1_kernel_accum_var_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 21, "args": {"sort_index": 21}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7705602, "pid": 21},{"ph": "X", "name": "1", "ts": 7705603, "pid": 21, "dur": 0},{"name": "process_name", "ph": "M", "pid": 22, "args": {"name": "HorovodBroadcast_DistributedAdadelta_dense_1_bias_accum_var_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 22, "args": {"sort_index": 22}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7705609, "pid": 22},{"ph": "X", "name": "1", "ts": 7705610, "pid": 22, "dur": 0},{"name": "process_name", "ph": "M", "pid": 23, "args": {"name": "HorovodBroadcast_DistributedAdadelta_dense_1_kernel_accum_grad_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 23, "args": {"sort_index": 23}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7705614, "pid": 23},{"ph": "X", "name": "1", "ts": 7705615, "pid": 23, "dur": 0},{"name": "process_name", "ph": "M", "pid": 24, "args": {"name": "HorovodBroadcast_DistributedAdadelta_dense_kernel_accum_grad_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 24, "args": {"sort_index": 24}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7714058, "pid": 24},{"ph": "X", "name": "1", "ts": 7714059, "pid": 24, "dur": 0},{"name": "process_name", "ph": "M", "pid": 25, "args": {"name": "HorovodBroadcast_DistributedAdadelta_dense_bias_accum_grad_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 25, "args": {"sort_index": 25}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7714063, "pid": 25},{"ph": "X", "name": "1", "ts": 7714064, "pid": 25, "dur": 0},{"name": "process_name", "ph": "M", "pid": 26, "args": {"name": "HorovodBroadcast_DistributedAdadelta_conv2d_bias_accum_grad_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 26, "args": {"sort_index": 26}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7714067, "pid": 26},{"ph": "X", "name": "1", "ts": 7714069, "pid": 26, "dur": 0},{"name": "process_name", "ph": "M", "pid": 27, "args": {"name": "HorovodBroadcast_DistributedAdadelta_dense_1_bias_accum_grad_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 27, "args": {"sort_index": 27}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7714072, "pid": 27},{"ph": "X", "name": "1", "ts": 7714073, "pid": 27, "dur": 0},{"name": "process_name", "ph": "M", "pid": 28, "args": {"name": "HorovodBroadcast_DistributedAdadelta_conv2d_kernel_accum_var_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 28, "args": {"sort_index": 28}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7714077, "pid": 28},{"ph": "X", "name": "1", "ts": 7714078, "pid": 28, "dur": 0},{"name": "process_name", "ph": "M", "pid": 29, "args": {"name": "HorovodBroadcast_DistributedAdadelta_iter_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 29, "args": {"sort_index": 29}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7714081, "pid": 29},{"ph": "X", "name": "1", "ts": 7714082, "pid": 29, "dur": 0},{"name": "process_name", "ph": "M", "pid": 30, "args": {"name": "HorovodBroadcast_DistributedAdadelta_conv2d_kernel_accum_grad_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 30, "args": {"sort_index": 30}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7714086, "pid": 30},{"ph": "X", "name": "1", "ts": 7714087, "pid": 30, "dur": 0},{"name": "process_name", "ph": "M", "pid": 31, "args": {"name": "HorovodBroadcast_DistributedAdadelta_dense_kernel_accum_var_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 31, "args": {"sort_index": 31}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7714091, "pid": 31},{"ph": "X", "name": "1", "ts": 7714092, "pid": 31, "dur": 0},{"name": "process_name", "ph": "M", "pid": 32, "args": {"name": "HorovodBroadcast_DistributedAdadelta_dense_1_kernel_accum_var_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 32, "args": {"sort_index": 32}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7714096, "pid": 32},{"ph": "X", "name": "1", "ts": 7714097, "pid": 32, "dur": 0},{"name": "process_name", "ph": "M", "pid": 33, "args": {"name": "HorovodBroadcast_DistributedAdadelta_dense_bias_accum_var_0"}},
{"name": "process_sort_index", "ph": "M", "pid": 33, "args": {"sort_index": 33}},
{"ph": "B", "name": "NEGOTIATE_BROADCAST", "ts": 7714103, "pid": 33},{"ph": "X", "name": "1", "ts": 7714104, "pid": 33, "dur": 0},{"ph": "X", "name": "0", "ts": 7724234, "pid": 30, "dur": 0},{"ph": "E", "ts": 7724237, "pid": 30},{"ph": "X", "name": "0", "ts": 7724240, "pid": 26, "dur": 0},{"ph": "E", "ts": 7724241, "pid": 26},{"ph": "X", "name": "0", "ts": 7724244, "pid": 27, "dur": 0},{"ph": "E", "ts": 7724252, "pid": 27},{"ph": "X", "name": "0", "ts": 7724254, "pid": 28, "dur": 0},{"ph": "E", "ts": 7724255, "pid": 28},{"ph": "X", "name": "0", "ts": 7724256, "pid": 29, "dur": 0},{"ph": "E", "ts": 7724257, "pid": 29},{"ph": "X", "name": "0", "ts": 7724259, "pid": 20, "dur": 0},{"ph": "E", "ts": 7724260, "pid": 20},{"ph": "X", "name": "0", "ts": 7724263, "pid": 32, "dur": 0},{"ph": "E", "ts": 7724264, "pid": 32},{"ph": "X", "name": "0", "ts": 7724265, "pid": 23, "dur": 0},{"ph": "E", "ts": 7724266, "pid": 23},{"ph": "X", "name": "0", "ts": 7724268, "pid": 21, "dur": 0},{"ph": "E", "ts": 7724269, "pid": 21},{"ph": "X", "name": "0", "ts": 7724271, "pid": 22, "dur": 0},{"ph": "E", "ts": 7724272, "pid": 22},{"ph": "X", "name": "0", "ts": 7724274, "pid": 24, "dur": 0},{"ph": "E", "ts": 7724275, "pid": 24},{"ph": "X", "name": "0", "ts": 7724276, "pid": 19, "dur": 0},{"ph": "E", "ts": 7724277, "pid": 19},{"ph": "X", "name": "0", "ts": 7724279, "pid": 18, "dur": 0},{"ph": "E", "ts": 7724280, "pid": 18},{"ph": "X", "name": "0", "ts": 7724282, "pid": 25, "dur": 0},{"ph": "E", "ts": 7724282, "pid": 25},{"ph": "B", "name": "BROADCAST", "ts": 7724400, "pid": 30},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7724403, "pid": 30},{"ph": "E", "ts": 7724405, "pid": 30},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7724406, "pid": 30},{"ph": "E", "ts": 7724469, "pid": 30},{"ph": "B", "name": "MPI_BCAST", "ts": 7724471, "pid": 30},{"ph": "E", "ts": 7724484, "pid": 30},{"ph": "E", "ts": 7724487, "pid": 30},{"ph": "B", "name": "BROADCAST", "ts": 7724514, "pid": 26},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7724516, "pid": 26},{"ph": "E", "ts": 7724517, "pid": 26},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7724518, "pid": 26},{"ph": "E", "ts": 7724581, "pid": 26},{"ph": "B", "name": "MPI_BCAST", "ts": 7724585, "pid": 26},{"ph": "E", "ts": 7724604, "pid": 26},{"ph": "E", "ts": 7724614, "pid": 26},{"ph": "B", "name": "BROADCAST", "ts": 7724670, "pid": 27},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7724674, "pid": 27},{"ph": "E", "ts": 7724675, "pid": 27},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7724676, "pid": 27},{"ph": "E", "ts": 7724738, "pid": 27},{"ph": "B", "name": "MPI_BCAST", "ts": 7724740, "pid": 27},{"ph": "E", "ts": 7724755, "pid": 27},{"ph": "E", "ts": 7724758, "pid": 27},{"ph": "B", "name": "BROADCAST", "ts": 7724855, "pid": 28},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7724857, "pid": 28},{"ph": "E", "ts": 7724858, "pid": 28},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7724859, "pid": 28},{"ph": "E", "ts": 7724918, "pid": 28},{"ph": "B", "name": "MPI_BCAST", "ts": 7724920, "pid": 28},{"ph": "E", "ts": 7724937, "pid": 28},{"ph": "E", "ts": 7724939, "pid": 28},{"ph": "B", "name": "BROADCAST", "ts": 7724960, "pid": 29},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7724962, "pid": 29},{"ph": "E", "ts": 7724963, "pid": 29},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7724964, "pid": 29},{"ph": "E", "ts": 7725027, "pid": 29},{"ph": "B", "name": "MPI_BCAST", "ts": 7725030, "pid": 29},{"ph": "E", "ts": 7725046, "pid": 29},{"ph": "E", "ts": 7725049, "pid": 29},{"ph": "B", "name": "BROADCAST", "ts": 7725068, "pid": 20},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7725070, "pid": 20},{"ph": "E", "ts": 7725071, "pid": 20},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7725072, "pid": 20},{"ph": "E", "ts": 7725132, "pid": 20},{"ph": "B", "name": "MPI_BCAST", "ts": 7725135, "pid": 20},{"ph": "E", "ts": 7725149, "pid": 20},{"ph": "E", "ts": 7725151, "pid": 20},{"ph": "B", "name": "BROADCAST", "ts": 7725169, "pid": 32},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7725171, "pid": 32},{"ph": "E", "ts": 7725177, "pid": 32},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7725179, "pid": 32},{"ph": "E", "ts": 7725241, "pid": 32},{"ph": "B", "name": "MPI_BCAST", "ts": 7725244, "pid": 32},{"ph": "E", "ts": 7725262, "pid": 32},{"ph": "E", "ts": 7725266, "pid": 32},{"ph": "B", "name": "BROADCAST", "ts": 7725287, "pid": 23},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7725290, "pid": 23},{"ph": "E", "ts": 7725291, "pid": 23},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7725292, "pid": 23},{"ph": "E", "ts": 7725356, "pid": 23},{"ph": "B", "name": "MPI_BCAST", "ts": 7725359, "pid": 23},{"ph": "E", "ts": 7725378, "pid": 23},{"ph": "E", "ts": 7725381, "pid": 23},{"ph": "B", "name": "BROADCAST", "ts": 7725403, "pid": 21},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7725405, "pid": 21},{"ph": "E", "ts": 7725406, "pid": 21},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7725407, "pid": 21},{"ph": "E", "ts": 7725469, "pid": 21},{"ph": "B", "name": "MPI_BCAST", "ts": 7725471, "pid": 21},{"ph": "E", "ts": 7725624, "pid": 21},{"ph": "E", "ts": 7725628, "pid": 21},{"ph": "B", "name": "BROADCAST", "ts": 7725645, "pid": 22},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7725647, "pid": 22},{"ph": "E", "ts": 7725649, "pid": 22},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7725650, "pid": 22},{"ph": "E", "ts": 7725712, "pid": 22},{"ph": "B", "name": "MPI_BCAST", "ts": 7725715, "pid": 22},{"ph": "E", "ts": 7725732, "pid": 22},{"ph": "E", "ts": 7725735, "pid": 22},{"ph": "B", "name": "BROADCAST", "ts": 7725756, "pid": 24},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7725758, "pid": 24},{"ph": "E", "ts": 7725759, "pid": 24},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7725760, "pid": 24},{"ph": "E", "ts": 7725926, "pid": 24},{"ph": "B", "name": "MPI_BCAST", "ts": 7725928, "pid": 24},{"ph": "E", "ts": 7727991, "pid": 24},{"ph": "E", "ts": 7727998, "pid": 24},{"ph": "B", "name": "BROADCAST", "ts": 7728025, "pid": 19},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7728028, "pid": 19},{"ph": "E", "ts": 7728030, "pid": 19},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7728031, "pid": 19},{"ph": "E", "ts": 7728099, "pid": 19},{"ph": "B", "name": "MPI_BCAST", "ts": 7728101, "pid": 19},{"ph": "E", "ts": 7728111, "pid": 19},{"ph": "E", "ts": 7728115, "pid": 19},{"ph": "B", "name": "BROADCAST", "ts": 7728202, "pid": 18},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7728204, "pid": 18},{"ph": "E", "ts": 7728206, "pid": 18},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7728207, "pid": 18},{"ph": "E", "ts": 7728269, "pid": 18},{"ph": "B", "name": "MPI_BCAST", "ts": 7728271, "pid": 18},{"ph": "E", "ts": 7729232, "pid": 18},{"ph": "E", "ts": 7729241, "pid": 18},{"ph": "B", "name": "BROADCAST", "ts": 7729260, "pid": 25},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7729262, "pid": 25},{"ph": "E", "ts": 7729264, "pid": 25},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7729265, "pid": 25},{"ph": "E", "ts": 7729427, "pid": 25},{"ph": "B", "name": "MPI_BCAST", "ts": 7729429, "pid": 25},{"ph": "E", "ts": 7729450, "pid": 25},{"ph": "E", "ts": 7729454, "pid": 25},{"ph": "X", "name": "0", "ts": 7729636, "pid": 33, "dur": 0},{"ph": "E", "ts": 7729637, "pid": 33},{"ph": "X", "name": "0", "ts": 7729639, "pid": 17, "dur": 0},{"ph": "E", "ts": 7729640, "pid": 17},{"ph": "X", "name": "0", "ts": 7729642, "pid": 31, "dur": 0},{"ph": "E", "ts": 7729643, "pid": 31},{"ph": "B", "name": "BROADCAST", "ts": 7729688, "pid": 33},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7729690, "pid": 33},{"ph": "E", "ts": 7729691, "pid": 33},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7729692, "pid": 33},{"ph": "E", "ts": 7729756, "pid": 33},{"ph": "B", "name": "MPI_BCAST", "ts": 7729761, "pid": 33},{"ph": "E", "ts": 7729793, "pid": 33},{"ph": "E", "ts": 7729796, "pid": 33},{"ph": "B", "name": "BROADCAST", "ts": 7729944, "pid": 17},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7729946, "pid": 17},{"ph": "E", "ts": 7729948, "pid": 17},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7729949, "pid": 17},{"ph": "E", "ts": 7730007, "pid": 17},{"ph": "B", "name": "MPI_BCAST", "ts": 7730009, "pid": 17},{"ph": "E", "ts": 7730025, "pid": 17},{"ph": "E", "ts": 7730027, "pid": 17},{"ph": "B", "name": "BROADCAST", "ts": 7730046, "pid": 31},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7730048, "pid": 31},{"ph": "E", "ts": 7730050, "pid": 31},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7730051, "pid": 31},{"ph": "E", "ts": 7730187, "pid": 31},{"ph": "B", "name": "MPI_BCAST", "ts": 7730189, "pid": 31},{"ph": "E", "ts": 7731937, "pid": 31},{"ph": "E", "ts": 7731943, "pid": 31},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7741883, "pid": 1},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7741893, "pid": 2},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7741895, "pid": 3},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7741896, "pid": 4},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7741898, "pid": 5},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7741899, "pid": 6},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7741902, "pid": 7},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7741904, "pid": 8},{"ph": "E", "ts": 7747154, "pid": 1},{"ph": "E", "ts": 7747156, "pid": 2},{"ph": "E", "ts": 7747157, "pid": 3},{"ph": "E", "ts": 7747158, "pid": 4},{"ph": "B", "name": "ALLREDUCE", "ts": 7747182, "pid": 1},{"ph": "B", "name": "ALLREDUCE", "ts": 7747184, "pid": 2},{"ph": "B", "name": "ALLREDUCE", "ts": 7747185, "pid": 3},{"ph": "B", "name": "ALLREDUCE", "ts": 7747186, "pid": 4},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7747198, "pid": 1},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7747200, "pid": 2},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7747202, "pid": 3},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7747204, "pid": 4},{"ph": "E", "ts": 7747210, "pid": 1},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7747211, "pid": 1},{"ph": "E", "ts": 7747214, "pid": 2},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7747215, "pid": 2},{"ph": "E", "ts": 7747217, "pid": 3},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7747218, "pid": 3},{"ph": "E", "ts": 7747220, "pid": 4},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7747221, "pid": 4},{"ph": "E", "ts": 7747283, "pid": 1},{"ph": "E", "ts": 7747286, "pid": 2},{"ph": "E", "ts": 7747287, "pid": 3},{"ph": "E", "ts": 7747288, "pid": 4},{"ph": "B", "name": "QUEUE", "ts": 7747368, "pid": 1},{"ph": "B", "name": "QUEUE", "ts": 7747372, "pid": 2},{"ph": "B", "name": "QUEUE", "ts": 7747372, "pid": 3},{"ph": "B", "name": "QUEUE", "ts": 7747374, "pid": 4},{"ph": "E", "ts": 7747378, "pid": 1},{"ph": "E", "ts": 7747379, "pid": 2},{"ph": "E", "ts": 7747381, "pid": 3},{"ph": "E", "ts": 7747382, "pid": 4},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7747384, "pid": 1},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7747386, "pid": 2},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7747387, "pid": 3},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7747388, "pid": 4},{"ph": "E", "ts": 7747441, "pid": 1},{"ph": "E", "ts": 7747443, "pid": 2},{"ph": "E", "ts": 7747444, "pid": 3},{"ph": "E", "ts": 7747446, "pid": 4},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7747447, "pid": 1},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7747449, "pid": 2},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7747451, "pid": 3},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7747452, "pid": 4},{"ph": "E", "ts": 7748278, "pid": 5},{"ph": "E", "ts": 7748280, "pid": 6},{"ph": "E", "ts": 7748286, "pid": 7},{"ph": "E", "ts": 7748288, "pid": 8},{"ph": "B", "name": "ALLREDUCE", "ts": 7748337, "pid": 5},{"ph": "B", "name": "ALLREDUCE", "ts": 7748339, "pid": 6},{"ph": "B", "name": "ALLREDUCE", "ts": 7748341, "pid": 7},{"ph": "B", "name": "ALLREDUCE", "ts": 7748342, "pid": 8},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7748354, "pid": 5},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7748356, "pid": 6},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7748359, "pid": 7},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7748361, "pid": 8},{"ph": "E", "ts": 7749838, "pid": 5},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7749840, "pid": 5},{"ph": "E", "ts": 7749844, "pid": 6},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7749862, "pid": 6},{"ph": "E", "ts": 7750390, "pid": 7},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7750392, "pid": 7},{"ph": "E", "ts": 7750395, "pid": 8},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7750396, "pid": 8},{"ph": "E", "ts": 7750460, "pid": 5},{"ph": "E", "ts": 7750461, "pid": 6},{"ph": "E", "ts": 7750462, "pid": 7},{"ph": "E", "ts": 7750464, "pid": 8},{"ph": "E", "ts": 7755047, "pid": 1},{"ph": "E", "ts": 7755050, "pid": 2},{"ph": "E", "ts": 7755051, "pid": 3},{"ph": "E", "ts": 7755052, "pid": 4},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7755056, "pid": 1},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7755057, "pid": 2},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7755057, "pid": 3},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7755058, "pid": 4},{"ph": "E", "ts": 7755093, "pid": 1},{"ph": "E", "ts": 7755095, "pid": 2},{"ph": "E", "ts": 7755096, "pid": 3},{"ph": "E", "ts": 7755097, "pid": 4},{"ph": "E", "ts": 7755109, "pid": 1, "args": {"dtype": "float32", "shape": "[10]"}},{"ph": "E", "ts": 7755219, "pid": 2, "args": {"dtype": "float32", "shape": "[128, 10]"}},{"ph": "E", "ts": 7755234, "pid": 3, "args": {"dtype": "float32", "shape": "[9216, 128]"}},{"ph": "E", "ts": 7755248, "pid": 4, "args": {"dtype": "float32", "shape": "[128]"}},{"ph": "B", "name": "QUEUE", "ts": 7755370, "pid": 5},{"ph": "B", "name": "QUEUE", "ts": 7755372, "pid": 6},{"ph": "B", "name": "QUEUE", "ts": 7755373, "pid": 7},{"ph": "B", "name": "QUEUE", "ts": 7755374, "pid": 8},{"ph": "E", "ts": 7755379, "pid": 5},{"ph": "E", "ts": 7755381, "pid": 6},{"ph": "E", "ts": 7755382, "pid": 7},{"ph": "E", "ts": 7755383, "pid": 8},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7755385, "pid": 5},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7755386, "pid": 6},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7755388, "pid": 7},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7755389, "pid": 8},{"ph": "E", "ts": 7755391, "pid": 5},{"ph": "E", "ts": 7755393, "pid": 6},{"ph": "E", "ts": 7755394, "pid": 7},{"ph": "E", "ts": 7755395, "pid": 8},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7755397, "pid": 5},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7755398, "pid": 6},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7755399, "pid": 7},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7755400, "pid": 8},{"ph": "E", "ts": 7756683, "pid": 5},{"ph": "E", "ts": 7756685, "pid": 6},{"ph": "E", "ts": 7756686, "pid": 7},{"ph": "E", "ts": 7756687, "pid": 8},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7756689, "pid": 5},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7756689, "pid": 6},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7756690, "pid": 7},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7756691, "pid": 8},{"ph": "E", "ts": 7756694, "pid": 5},{"ph": "E", "ts": 7756695, "pid": 6},{"ph": "E", "ts": 7756696, "pid": 7},{"ph": "E", "ts": 7756697, "pid": 8},{"ph": "E", "ts": 7756705, "pid": 5, "args": {"dtype": "float32", "shape": "[3, 3, 32, 64]"}},{"ph": "E", "ts": 7756720, "pid": 6, "args": {"dtype": "float32", "shape": "[64]"}},{"ph": "E", "ts": 7756781, "pid": 7, "args": {"dtype": "float32", "shape": "[3, 3, 1, 32]"}},{"ph": "E", "ts": 7756843, "pid": 8, "args": {"dtype": "float32", "shape": "[32]"}},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7762259, "pid": 1},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7762261, "pid": 2},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7762262, "pid": 3},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7762263, "pid": 4},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7762265, "pid": 5},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7762278, "pid": 6},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7762279, "pid": 7},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7763288, "pid": 8},{"ph": "E", "ts": 7763290, "pid": 1},{"ph": "E", "ts": 7763291, "pid": 2},{"ph": "E", "ts": 7763292, "pid": 3},{"ph": "E", "ts": 7763293, "pid": 4},{"ph": "E", "ts": 7763294, "pid": 5},{"ph": "E", "ts": 7763295, "pid": 6},{"ph": "E", "ts": 7763296, "pid": 7},{"ph": "E", "ts": 7763298, "pid": 8},{"ph": "B", "name": "ALLREDUCE", "ts": 7763325, "pid": 1},{"ph": "B", "name": "ALLREDUCE", "ts": 7763327, "pid": 2},{"ph": "B", "name": "ALLREDUCE", "ts": 7763328, "pid": 3},{"ph": "B", "name": "ALLREDUCE", "ts": 7763329, "pid": 4},{"ph": "B", "name": "ALLREDUCE", "ts": 7763330, "pid": 5},{"ph": "B", "name": "ALLREDUCE", "ts": 7763331, "pid": 6},{"ph": "B", "name": "ALLREDUCE", "ts": 7763332, "pid": 7},{"ph": "B", "name": "ALLREDUCE", "ts": 7763333, "pid": 8},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7763345, "pid": 1},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7763346, "pid": 2},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7763348, "pid": 3},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7763350, "pid": 4},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7763351, "pid": 5},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7763354, "pid": 6},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7763355, "pid": 7},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7763356, "pid": 8},{"ph": "E", "ts": 7763361, "pid": 1},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7763363, "pid": 1},{"ph": "E", "ts": 7763365, "pid": 2},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7763367, "pid": 2},{"ph": "E", "ts": 7763369, "pid": 3},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7763370, "pid": 3},{"ph": "E", "ts": 7763372, "pid": 4},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7763373, "pid": 4},{"ph": "E", "ts": 7765387, "pid": 5},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7765389, "pid": 5},{"ph": "E", "ts": 7765392, "pid": 6},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7765393, "pid": 6},{"ph": "E", "ts": 7765925, "pid": 7},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7765926, "pid": 7},{"ph": "E", "ts": 7765929, "pid": 8},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7765930, "pid": 8},{"ph": "E", "ts": 7765987, "pid": 1},{"ph": "E", "ts": 7765989, "pid": 2},{"ph": "E", "ts": 7765990, "pid": 3},{"ph": "E", "ts": 7765991, "pid": 4},{"ph": "E", "ts": 7765992, "pid": 5},{"ph": "E", "ts": 7765993, "pid": 6},{"ph": "E", "ts": 7765994, "pid": 7},{"ph": "E", "ts": 7765995, "pid": 8},{"ph": "B", "name": "QUEUE", "ts": 7766078, "pid": 1},{"ph": "B", "name": "QUEUE", "ts": 7766082, "pid": 2},{"ph": "B", "name": "QUEUE", "ts": 7766083, "pid": 3},{"ph": "B", "name": "QUEUE", "ts": 7766084, "pid": 4},{"ph": "B", "name": "QUEUE", "ts": 7766085, "pid": 5},{"ph": "B", "name": "QUEUE", "ts": 7766086, "pid": 6},{"ph": "B", "name": "QUEUE", "ts": 7766088, "pid": 7},{"ph": "B", "name": "QUEUE", "ts": 7766089, "pid": 8},{"ph": "E", "ts": 7766094, "pid": 1},{"ph": "E", "ts": 7766095, "pid": 2},{"ph": "E", "ts": 7766096, "pid": 3},{"ph": "E", "ts": 7766097, "pid": 4},{"ph": "E", "ts": 7766099, "pid": 5},{"ph": "E", "ts": 7766100, "pid": 6},{"ph": "E", "ts": 7766101, "pid": 7},{"ph": "E", "ts": 7766102, "pid": 8},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7766110, "pid": 1},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7766112, "pid": 2},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7766113, "pid": 3},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7766114, "pid": 4},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7766115, "pid": 5},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7766117, "pid": 6},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7766118, "pid": 7},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7766119, "pid": 8},{"ph": "E", "ts": 7766123, "pid": 1},{"ph": "E", "ts": 7766124, "pid": 2},{"ph": "E", "ts": 7766125, "pid": 3},{"ph": "E", "ts": 7766126, "pid": 4},{"ph": "E", "ts": 7766127, "pid": 5},{"ph": "E", "ts": 7766129, "pid": 6},{"ph": "E", "ts": 7766130, "pid": 7},{"ph": "E", "ts": 7766131, "pid": 8},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7766133, "pid": 1},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7766134, "pid": 2},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7766135, "pid": 3},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7766136, "pid": 4},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7766137, "pid": 5},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7766138, "pid": 6},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7766139, "pid": 7},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7766140, "pid": 8},{"ph": "E", "ts": 7773786, "pid": 1},{"ph": "E", "ts": 7773789, "pid": 2},{"ph": "E", "ts": 7773789, "pid": 3},{"ph": "E", "ts": 7773791, "pid": 4},{"ph": "E", "ts": 7773792, "pid": 5},{"ph": "E", "ts": 7773793, "pid": 6},{"ph": "E", "ts": 7773794, "pid": 7},{"ph": "E", "ts": 7773795, "pid": 8},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7773799, "pid": 1},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7773800, "pid": 2},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7773801, "pid": 3},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7773801, "pid": 4},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7773802, "pid": 5},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7773803, "pid": 6},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7773804, "pid": 7},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7773805, "pid": 8},{"ph": "E", "ts": 7773817, "pid": 1},{"ph": "E", "ts": 7773818, "pid": 2},{"ph": "E", "ts": 7773819, "pid": 3},{"ph": "E", "ts": 7773820, "pid": 4},{"ph": "E", "ts": 7773822, "pid": 5},{"ph": "E", "ts": 7773823, "pid": 6},{"ph": "E", "ts": 7773824, "pid": 7},{"ph": "E", "ts": 7773824, "pid": 8},{"ph": "E", "ts": 7773834, "pid": 1, "args": {"dtype": "float32", "shape": "[10]"}},{"ph": "E", "ts": 7773851, "pid": 2, "args": {"dtype": "float32", "shape": "[128, 10]"}},{"ph": "E", "ts": 7773863, "pid": 3, "args": {"dtype": "float32", "shape": "[9216, 128]"}},{"ph": "E", "ts": 7773873, "pid": 4, "args": {"dtype": "float32", "shape": "[128]"}},{"ph": "E", "ts": 7773883, "pid": 5, "args": {"dtype": "float32", "shape": "[3, 3, 32, 64]"}},{"ph": "E", "ts": 7773888, "pid": 6, "args": {"dtype": "float32", "shape": "[64]"}},{"ph": "E", "ts": 7773894, "pid": 7, "args": {"dtype": "float32", "shape": "[3, 3, 1, 32]"}},{"ph": "E", "ts": 7773898, "pid": 8, "args": {"dtype": "float32", "shape": "[32]"}},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7780070, "pid": 1},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7780071, "pid": 2},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7780073, "pid": 3},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7780074, "pid": 4},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7780075, "pid": 5},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7780077, "pid": 6},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7780078, "pid": 7},{"ph": "B", "name": "NEGOTIATE_ALLREDUCE", "ts": 7780079, "pid": 8},{"ph": "E", "ts": 7780080, "pid": 1},{"ph": "E", "ts": 7780081, "pid": 2},{"ph": "E", "ts": 7780082, "pid": 3},{"ph": "E", "ts": 7780084, "pid": 4},{"ph": "B", "name": "ALLREDUCE", "ts": 7780110, "pid": 1},{"ph": "B", "name": "ALLREDUCE", "ts": 7780111, "pid": 2},{"ph": "B", "name": "ALLREDUCE", "ts": 7780112, "pid": 3},{"ph": "B", "name": "ALLREDUCE", "ts": 7780113, "pid": 4},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7780125, "pid": 1},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7780132, "pid": 2},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7780134, "pid": 3},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7780135, "pid": 4},{"ph": "E", "ts": 7780140, "pid": 1},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7780142, "pid": 1},{"ph": "E", "ts": 7780145, "pid": 2},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7780146, "pid": 2},{"ph": "E", "ts": 7780148, "pid": 3},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7780149, "pid": 3},{"ph": "E", "ts": 7780151, "pid": 4},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7780152, "pid": 4},{"ph": "E", "ts": 7780210, "pid": 1},{"ph": "E", "ts": 7780212, "pid": 2},{"ph": "E", "ts": 7780213, "pid": 3},{"ph": "E", "ts": 7780214, "pid": 4},{"ph": "B", "name": "QUEUE", "ts": 7780280, "pid": 1},{"ph": "B", "name": "QUEUE", "ts": 7780282, "pid": 2},{"ph": "B", "name": "QUEUE", "ts": 7780283, "pid": 3},{"ph": "B", "name": "QUEUE", "ts": 7780284, "pid": 4},{"ph": "E", "ts": 7780287, "pid": 1},{"ph": "E", "ts": 7780288, "pid": 2},{"ph": "E", "ts": 7780289, "pid": 3},{"ph": "E", "ts": 7780291, "pid": 4},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7780292, "pid": 1},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7780293, "pid": 2},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7780294, "pid": 3},{"ph": "B", "name": "MEMCPY_IN_FUSION_BUFFER", "ts": 7780295, "pid": 4},{"ph": "E", "ts": 7780435, "pid": 1},{"ph": "E", "ts": 7780436, "pid": 2},{"ph": "E", "ts": 7780438, "pid": 3},{"ph": "E", "ts": 7780439, "pid": 4},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7780440, "pid": 1},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7780441, "pid": 2},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7780441, "pid": 3},{"ph": "B", "name": "NCCL_ALLREDUCE", "ts": 7780442, "pid": 4},{"ph": "E", "ts": 7781128, "pid": 5},{"ph": "E", "ts": 7781130, "pid": 6},{"ph": "E", "ts": 7781132, "pid": 7},{"ph": "E", "ts": 7781133, "pid": 8},{"ph": "B", "name": "ALLREDUCE", "ts": 7781161, "pid": 5},{"ph": "B", "name": "ALLREDUCE", "ts": 7781163, "pid": 6},{"ph": "B", "name": "ALLREDUCE", "ts": 7781165, "pid": 7},{"ph": "B", "name": "ALLREDUCE", "ts": 7781166, "pid": 8},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7781179, "pid": 5},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7781181, "pid": 6},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7781183, "pid": 7},{"ph": "B", "name": "WAIT_FOR_DATA", "ts": 7781186, "pid": 8},{"ph": "E", "ts": 7783125, "pid": 5},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7783127, "pid": 5},{"ph": "E", "ts": 7783132, "pid": 6},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7783133, "pid": 6},{"ph": "E", "ts": 7783675, "pid": 7},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7783677, "pid": 7},{"ph": "E", "ts": 7783680, "pid": 8},{"ph": "B", "name": "WAIT_FOR_OTHER_TENSOR_DATA", "ts": 7783681, "pid": 8},{"ph": "E", "ts": 7783742, "pid": 5},{"ph": "E", "ts": 7783743, "pid": 6},{"ph": "E", "ts": 7783745, "pid": 7},{"ph": "E", "ts": 7783746, "pid": 8},{"ph": "E", "ts": 7786903, "pid": 1},{"ph": "E", "ts": 7786905, "pid": 2},{"ph": "E", "ts": 7786907, "pid": 3},{"ph": "E", "ts": 7786908, "pid": 4},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "ts": 7786912, "pid": 1},{"ph": "B", "name": "MEMCPY_OUT_FUSION_BUFFER", "
*** WARNING: skipped 1621820 bytes of output ***