Executing the following statement on Apache Spark runtime 3.4 results in an error. Tried to do this on different vanilla spark pool configurations on different tennants in West Europe.
database_name = 'test_sppool34'
target_location = "abfss://testsppool34@XXXXXXXX.dfs.core.windows.net/"
stmnt = f"CREATE DATABASE IF NOT EXISTS {database_name} LOCATION '{target_location}'"
spark.sql(stmnt)
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
Cell In[23], line 5
2 target_location = "abfss://testsppool34@XXXXXXXXX.dfs.core.windows.net/"
4 stmnt = f"CREATE DATABASE IF NOT EXISTS {database_name} LOCATION '{target_location}';"
----> 5 spark.sql(stmnt)
File /opt/spark/python/lib/pyspark.zip/pyspark/sql/session.py:1440, in SparkSession.sql(self, sqlQuery, args, **kwargs)
1438 try:
1439 litArgs = {k: _to_java_column(lit(v)) for k, v in (args or {}).items()}
-> 1440 return DataFrame(self._jsparkSession.sql(sqlQuery, litArgs), self)
1441 finally:
1442 if len(kwargs) > 0:
File ~/cluster-env/env/lib/python3.10/site-packages/py4j/java_gateway.py:1322, in JavaMember.__call__(self, *args)
1316 command = proto.CALL_COMMAND_NAME +\
1317 self.command_header +\
1318 args_command +\
1319 proto.END_COMMAND_PART
1321 answer = self.gateway_client.send_command(command)
-> 1322 return_value = get_return_value(
1323 answer, self.gateway_client, self.target_id, self.name)
1325 for temp_arg in temp_args:
1326 if hasattr(temp_arg, "_detach"):
File /opt/spark/python/lib/pyspark.zip/pyspark/errors/exceptions/captured.py:169, in capture_sql_exception.<locals>.deco(*a, **kw)
167 def deco(*a: Any, **kw: Any) -> Any:
168 try:
--> 169 return f(*a, **kw)
170 except Py4JJavaError as e:
171 converted = convert_exception(e.java_exception)
File ~/cluster-env/env/lib/python3.10/site-packages/py4j/protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
Py4JJavaError: An error occurred while calling o277.sql.
: java.lang.NoClassDefFoundError: Could not initialize class org.json4s.jackson.Serialization$
at com.microsoft.azure.synapse.TokenServiceClient.invokePostApi(TokenServiceClient.scala:93)
at com.microsoft.azure.synapse.TokenServiceClient.callTokenApi(TokenServiceClient.scala:152)
at com.microsoft.azure.synapse.tokenlibrary.TokenLibraryInternal.tokenServiceCall$1(TokenLibrary.scala:115)
at com.microsoft.azure.synapse.tokenlibrary.TokenLibraryInternal.$anonfun$getAccessToken$4(TokenLibrary.scala:124)
at com.microsoft.azure.synapse.tokenlibrary.TokenLibraryInternal.getFromCacheOrCallTokenService(TokenLibrary.scala:73)
at com.microsoft.azure.synapse.tokenlibrary.TokenLibraryInternal.getAccessToken(TokenLibrary.scala:124)
at com.microsoft.azure.synapse.tokenlibrary.TokenLibrary$.getAccessToken(TokenLibrary.scala:468)
at com.microsoft.azure.synapse.tokenlibrary.SessionTokenBasedTokenProvider.$anonfun$getAccessToken$1(SessionTokenBasedTokenProvider.scala:128)
at scala.util.Try$.apply(Try.scala:213)
at com.microsoft.azure.synapse.tokenlibrary.SessionTokenBasedTokenProvider.getAccessToken(SessionTokenBasedTokenProvider.scala:126)
at org.apache.hadoop.fs.azurebfs.oauth2.CustomTokenProviderAdapter.refreshToken(CustomTokenProviderAdapter.java:74)
at org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider.getToken(AccessTokenProvider.java:50)
at org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAccessToken(AbfsClient.java:1055)
at org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.executeHttpOperation(AbfsRestOperation.java:256)
at org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.completeExecute(AbfsRestOperation.java:217)
at org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.lambda$execute$0(AbfsRestOperation.java:191)
at org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfInvocation(IOStatisticsBinding.java:464)
at org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.execute(AbfsRestOperation.java:189)
at org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:911)
at org.apache.hadoop.fs.azurebfs.services.AbfsClient.getAclStatus(AbfsClient.java:892)
at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getIsNamespaceEnabled(AzureBlobFileSystemStore.java:421)
at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getFileStatus(AzureBlobFileSystemStore.java:1036)
at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.getFileStatus(AzureBlobFileSystem.java:650)
at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.getFileStatus(AzureBlobFileSystem.java:640)
at org.apache.hadoop.hive.metastore.Warehouse.isDir(Warehouse.java:520)
at com.microsoft.catalog.metastore.metastoreclient.HiveMetastoreClientImp.makeDirs(HiveMetastoreClientImp.java:260)
at com.microsoft.catalog.metastore.metastoreclient.HiveMetastoreClientImp.createDatabase(HiveMetastoreClientImp.java:158)
at com.microsoft.catalog.metastore.metastoreclient.HiveMetastoreClient.createDatabase(HiveMetastoreClient.java:762)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at com.microsoft.catalog.metastore.metastoreclient.PerformanceTelemetryHiveMetastoreClientInvoker.invoke(PerformanceTelemetryHiveMetastoreClientInvoker.java:26)
at com.sun.proxy.$Proxy122.createDatabase(Unknown Source)
at org.apache.hadoop.hive.ql.metadata.Hive.createDatabase(Hive.java:430)
at org.apache.spark.sql.hive.client.Shim_v0_12.createDatabase(HiveShim.scala:574)
at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$createDatabase$1(HiveClientImpl.scala:349)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:304)
at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:234)
at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:233)
at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:283)
at org.apache.spark.sql.hive.client.HiveClientImpl.createDatabase(HiveClientImpl.scala:346)
at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$createDatabase$1(HiveExternalCatalog.scala:193)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:102)
at org.apache.spark.sql.hive.HiveExternalCatalog.createDatabase(HiveExternalCatalog.scala:193)
at org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.createDatabase(ExternalCatalogWithListener.scala:47)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createDatabase(SessionCatalog.scala:317)
at org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog.createNamespace(V2SessionCatalog.scala:307)
at org.apache.spark.sql.connector.catalog.DelegatingCatalogExtension.createNamespace(DelegatingCatalogExtension.java:163)
at org.apache.spark.sql.execution.datasources.v2.CreateNamespaceExec.run(CreateNamespaceExec.scala:47)
at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)
at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)
at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:152)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:120)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:209)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:105)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:67)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:152)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:145)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:488)
at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:145)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:129)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:123)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:230)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:640)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:630)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:662)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:829)