Note
Access to this page requires authorization. You can try signing in or changing directories.
Access to this page requires authorization. You can try changing directories.
This is a special version of parse_url that performs the same operation, but returns a NULL value instead of raising an error if the parsing cannot be performed.
Syntax
from pyspark.sql import functions as sf
sf.try_parse_url(url, partToExtract, key=None)
Parameters
| Parameter | Type | Description |
|---|---|---|
url |
pyspark.sql.Column or str |
A column of strings, each representing a URL. |
partToExtract |
pyspark.sql.Column or str |
A column of strings, each representing the part to extract from the URL. |
key |
pyspark.sql.Column or str, optional |
A column of strings, each representing the key of a query parameter in the URL. |
Returns
pyspark.sql.Column: A new column of strings, each representing the value of the extracted part from the URL.
Examples
Example 1: Extracting the query part from a URL
from pyspark.sql import functions as sf
df = spark.createDataFrame(
[("https://spark.apache.org/path?query=1", "QUERY")],
["url", "part"]
)
df.select(sf.try_parse_url(df.url, df.part)).show()
+------------------------+
|try_parse_url(url, part)|
+------------------------+
| query=1|
+------------------------+
Example 2: Extracting the value of a specific query parameter from a URL
from pyspark.sql import functions as sf
df = spark.createDataFrame(
[("https://spark.apache.org/path?query=1", "QUERY", "query")],
["url", "part", "key"]
)
df.select(sf.try_parse_url(df.url, df.part, df.key)).show()
+-----------------------------+
|try_parse_url(url, part, key)|
+-----------------------------+
| 1|
+-----------------------------+
Example 3: Extracting the protocol part from a URL
from pyspark.sql import functions as sf
df = spark.createDataFrame(
[("https://spark.apache.org/path?query=1", "PROTOCOL")],
["url", "part"]
)
df.select(sf.try_parse_url(df.url, df.part)).show()
+------------------------+
|try_parse_url(url, part)|
+------------------------+
| https|
+------------------------+
Example 4: Extracting the host part from a URL
from pyspark.sql import functions as sf
df = spark.createDataFrame(
[("https://spark.apache.org/path?query=1", "HOST")],
["url", "part"]
)
df.select(sf.try_parse_url(df.url, df.part)).show()
+------------------------+
|try_parse_url(url, part)|
+------------------------+
| spark.apache.org|
+------------------------+
Example 5: Extracting the path part from a URL
from pyspark.sql import functions as sf
df = spark.createDataFrame(
[("https://spark.apache.org/path?query=1", "PATH")],
["url", "part"]
)
df.select(sf.try_parse_url(df.url, df.part)).show()
+------------------------+
|try_parse_url(url, part)|
+------------------------+
| /path|
+------------------------+
Example 6: Invalid URL
from pyspark.sql import functions as sf
df = spark.createDataFrame(
[("inva lid://spark.apache.org/path?query=1", "QUERY", "query")],
["url", "part", "key"]
)
df.select(sf.try_parse_url(df.url, df.part, df.key)).show()
+-----------------------------+
|try_parse_url(url, part, key)|
+-----------------------------+
| NULL|
+-----------------------------+