I am working on synapse notebook, and also getting the same error , i am trying to use below code
def get_file_details(file_path):
# Get file details using mssparkutils.fs.ls
file_details = mssparkutils.fs.ls(file_path)
print(file_details)
return file_details
# Register the UDF
get_file_details_udf = udf(lambda path:get_file_details(file_path))
# Apply the UDF to the 'file_path' column to get file details
df = df.withColumn("file_details", get_file_details_udf(col("file_path")))
HOwever when i am trying to print dataframe but getting error message as
PythonException:
An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
File "/home/trusted-service-user/cluster-env/env/lib/python3.10/site-packages/notebookutils/__init__.py", line 4, in <module>
from notebookutils.visualization import display, displayHTML, enableMatplotlib
File "/home/trusted-service-user/cluster-env/env/lib/python3.10/site-packages/notebookutils/visualization/__init__.py", line 1, in <module>
from .display import display, display_mount_points
File "/home/trusted-service-user/cluster-env/env/lib/python3.10/site-packages/notebookutils/visualization/display.py", line 11, in <module>
from notebookutils.common.logger import log4jLogger
File "/home/trusted-service-user/cluster-env/env/lib/python3.10/site-packages/notebookutils/common/logger.py", line 6, in <module>
sc = SparkSession.Builder().getOrCreate().sparkContext
File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/session.py", line 269, in getOrCreate
sc = SparkContext.getOrCreate(sparkConf)
File "/opt/spark/python/lib/pyspark.zip/pyspark/context.py", line 484, in getOrCreate
SparkContext(conf=conf or SparkConf())
File "/opt/spark/python/lib/pyspark.zip/pyspark/context.py", line 188, in __init__
SparkContext._assert_on_driver()
File "/opt/spark/python/lib/pyspark.zip/pyspark/context.py", line 1545, in _assert_on_driver
raise RuntimeError("SparkContext should only be created and accessed on the driver.")
RuntimeError: SparkContext should only be created and accessed on the driver.
i also tried creating the spark config as below but still getting the same error.
# Create a SparkConf object and set the necessary configurations
conf = SparkConf().set("spark.executor.allowSparkContext", "true")
# Create a SparkSession with the configured SparkConf
spark = SparkSession.builder.config(conf=conf).getOrCreate()