Hi Pradeep, please find the stack trace below:
com.databricks.sql.io.FileReadException: Error while reading file wasbs:REDACTED_LOCAL_PART@*******.blob.core.windows.net/cook/processYear=2021/processMonth=01/processDay=09/processHour=00/part-00003-tid-4640843606947508963-a580-40bd-ad0d-e7c92f1e5b1f-29229-1.c000.avro.
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:286)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:264)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:205)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:354)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:205)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage58.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:640)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage65.agg_doAggregateWithKeys_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage65.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:640)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:139)
at org.apache.spark.scheduler.Task.run(Task.scala:112)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$13.apply(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1526)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:503)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException
at com.microsoft.azure.storage.core.Utility.initIOException(Utility.java:737)
at com.microsoft.azure.storage.blob.BlobInputStream.dispatchRead(BlobInputStream.java:264)
at com.microsoft.azure.storage.blob.BlobInputStream.readInternal(BlobInputStream.java:448)
at com.microsoft.azure.storage.blob.BlobInputStream.read(BlobInputStream.java:420)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:284)
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
at java.io.DataInputStream.read(DataInputStream.java:149)
at shaded.databricks.org.apache.hadoop.fs.azure.NativeAzureFileSystem$NativeAzureFsInputStream.read(NativeAzureFileSystem.java:876)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:286)
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
at java.io.DataInputStream.read(DataInputStream.java:149)
at com.databricks.spark.metrics.FSInputStreamWithMetrics$$anonfun$read$3.apply$mcI$sp(FileSystemWithMetrics.scala:206)
at com.databricks.spark.metrics.FSInputStreamWithMetrics$$anonfun$read$3.apply(FileSystemWithMetrics.scala:206)
at com.databricks.spark.metrics.FSInputStreamWithMetrics$$anonfun$read$3.apply(FileSystemWithMetrics.scala:206)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics$class.withTimeMetric(FileSystemWithMetrics.scala:151)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics$class.com$databricks$spark$metrics$ExtendedTaskIOMetrics$$withTimeAndBytesMetric(FileSystemWithMetrics.scala:171)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics$$anonfun$withTimeAndBytesReadMetric$1.apply$mcI$sp(FileSystemWithMetrics.scala:185)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics$$anonfun$withTimeAndBytesReadMetric$1.apply(FileSystemWithMetrics.scala:185)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics$$anonfun$withTimeAndBytesReadMetric$1.apply(FileSystemWithMetrics.scala:185)
at com.databricks.spark.metrics.SamplerWithPeriod.sample(FileSystemWithMetrics.scala:78)
at com.databricks.spark.metrics.ExtendedTaskIOMetrics$class.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:185)
at com.databricks.spark.metrics.FSInputStreamWithMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:192)
at com.databricks.spark.metrics.FSInputStreamWithMetrics.read(FileSystemWithMetrics.scala:205)
at java.io.DataInputStream.read(DataInputStream.java:149)
at org.apache.avro.mapred.FsInput.read(FsInput.java:54)
at org.apache.spark.sql.avro.AvroFileFormat$.openAvroReader(AvroFileFormat.scala:275)
at org.apache.spark.sql.avro.AvroFileFormat$$anonfun$buildReader$1.apply(AvroFileFormat.scala:202)
at org.apache.spark.sql.avro.AvroFileFormat$$anonfun$buildReader$1.apply(AvroFileFormat.scala:183)
at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:147)
at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:134)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:235)
... 23 more
Caused by: com.microsoft.azure.storage.StorageException: Blob hash mismatch (integrity check failed), Expected value is xmypzfnpTdq8eFLxZ49DhQ==, retrieved CY7+V9/JEfVroD5omBB2Uw==.
at com.microsoft.azure.storage.blob.CloudBlob$9.postProcessResponse(CloudBlob.java:1409)
at com.microsoft.azure.storage.blob.CloudBlob$9.postProcessResponse(CloudBlob.java:1310)
at com.microsoft.azure.storage.core.ExecutionEngine.executeWithRetry(ExecutionEngine.java:149)
at com.microsoft.azure.storage.blob.CloudBlob.downloadRangeInternal(CloudBlob.java:1493)
at com.microsoft.azure.storage.blob.BlobInputStream.dispatchRead(BlobInputStream.java:255)
... 53 more