I have a json data stored as string column in spark dataframe and it has some bad data Ex:
{"name":"neo",
"age":"22",
"city""nowhere",
"country":""}
I need to transform the column value to add "city":"nowhere". I tried the following but it's not working as expected. What should I change to get this working
val regex = """(".*?")(".*?")"""
df.withColumn("updated_col", regexp_replace(col("value"), regex, "$1:$2"))
Check below solution
scala> df
.withColumn(
"updated",
regexp_replace(
$"data",
"""[^:]("")""",
"""\":\""""
)
)
.show(false)
+------------------------------------------------------+------------------------------------------------------+
|data |updated |
+------------------------------------------------------+------------------------------------------------------+
|{"name":"neo","age":"22","city""nowhere","country":""}|{"name":"neo","age":"22","cit":"nowhere","country":""}|
+------------------------------------------------------+------------------------------------------------------+
scala> df
.withColumn(
"updated",
regexp_replace(
$"data",
""""city""nowhere"""",
""""city":"nowhere""""
)
)
.show(false)
+------------------------------------------------------+-------------------------------------------------------+
|data |updated |
+------------------------------------------------------+-------------------------------------------------------+
|{"name":"neo","age":"22","city""nowhere","country":""}|{"name":"neo","age":"22","city":"nowhere","country":""}|
+------------------------------------------------------+-------------------------------------------------------+