Share via


str_to_map

Converts a string into a map after splitting the text into key/value pairs using delimiters. Both pairDelim and keyValueDelim are treated as regular expressions.

Syntax

from pyspark.sql import functions as sf

sf.str_to_map(text, pairDelim=None, keyValueDelim=None)

Parameters

Parameter Type Description
text pyspark.sql.Column or str Input column or strings.
pairDelim pyspark.sql.Column or str, optional Delimiter to use to split pairs. Default is comma (,).
keyValueDelim pyspark.sql.Column or str, optional Delimiter to use to split key/value. Default is colon (:).

Returns

pyspark.sql.Column: A new column of map type where each string in the original column is converted into a map.

Examples

Example 1: Using default delimiters

from pyspark.sql import functions as sf
df = spark.createDataFrame([("a:1,b:2,c:3",)], ["e"])
df.select(sf.str_to_map(df.e)).show(truncate=False)
+------------------------+
|str_to_map(e, ,, :)     |
+------------------------+
|{a -> 1, b -> 2, c -> 3}|
+------------------------+

Example 2: Using custom delimiters

from pyspark.sql import functions as sf
df = spark.createDataFrame([("a=1;b=2;c=3",)], ["e"])
df.select(sf.str_to_map(df.e, sf.lit(";"), sf.lit("="))).show(truncate=False)
+------------------------+
|str_to_map(e, ;, =)     |
+------------------------+
|{a -> 1, b -> 2, c -> 3}|
+------------------------+

Example 3: Using different delimiters for different rows

from pyspark.sql import functions as sf
df = spark.createDataFrame([("a:1,b:2,c:3",), ("d=4;e=5;f=6",)], ["e"])
df.select(sf.str_to_map(df.e,
  sf.when(df.e.contains(";"), sf.lit(";")).otherwise(sf.lit(",")),
  sf.when(df.e.contains("="), sf.lit("=")).otherwise(sf.lit(":"))).alias("str_to_map")
).show(truncate=False)
+------------------------+
|str_to_map              |
+------------------------+
|{a -> 1, b -> 2, c -> 3}|
|{d -> 4, e -> 5, f -> 6}|
+------------------------+

Example 4: Using a column of delimiters

from pyspark.sql import functions as sf
df = spark.createDataFrame([("a:1,b:2,c:3", ","), ("d=4;e=5;f=6", ";")], ["e", "delim"])
df.select(sf.str_to_map(df.e, df.delim, sf.lit(":"))).show(truncate=False)
+---------------------------------------+
|str_to_map(e, delim, :)                |
+---------------------------------------+
|{a -> 1, b -> 2, c -> 3}               |
|{d=4 -> NULL, e=5 -> NULL, f=6 -> NULL}|
+---------------------------------------+

Example 5: Using a column of key/value delimiters

from pyspark.sql import functions as sf
df = spark.createDataFrame([("a:1,b:2,c:3", ":"), ("d=4;e=5;f=6", "=")], ["e", "delim"])
df.select(sf.str_to_map(df.e, sf.lit(","), df.delim)).show(truncate=False)
+------------------------+
|str_to_map(e, ,, delim) |
+------------------------+
|{a -> 1, b -> 2, c -> 3}|
|{d -> 4;e=5;f=6}        |
+------------------------+