In [1]: from pyspark.sql.types import StructField, StructType, StringType
...: from pyspark.sql.functions import col, from_json
In [2]: schema = StructType([
...: StructField("a", StringType()),
...: StructField("b", StringType()),
...: StructField("c", StringType()),
...: StructField("d", StringType()),
...: ])
In [3]: df = spark.createDataFrame([("1", '{"a": 1, "b": 2}'),
...: ("2", '{"a": 3, "c": 4}')],
...: schema=["Some col", "body"])
In [4]: df.show()
+--------+----------------+
|Some col| body|
+--------+----------------+
| 1|{"a": 1, "b": 2}|
| 2|{"a": 3, "c": 4}|
+--------+----------------+
In [5]: df.select(from_json(col("body"), schema).alias("data")).select("data.*").show()
+---+----+----+----+
| a| b| c| d|
+---+----+----+----+
| 1| 2|null|null|
| 3|null| 4|null|
+---+----+----+----+
CLICK HERE to find out more related problems solutions.