I have this dataframe with a single row, with values that are string
, int
, bool
, array
:
col1 | col2 | col3 | col4 | col5 |
---|---|---|---|---|
val1 | val2 | val3 | val4 | val5 |
And I want to transpose it like this:
col1 | col2 | Columns | values |
---|---|---|---|
val1 | val2 | col3 | val3 |
val1 | val2 | col4 | val4 |
val1 | val2 | col5 | val5 |
The first 2 columns are to be AS IS, and the remaining columns need to be unpivoted.
You can use the melt function to unpivot the dataframe:
def melt(df: DataFrame,
idVars: Array[String],
valueVars: Array[String],
varName: String = "Columns",
valueName: String = "values"): DataFrame = {
val columns = valueVars.map(c => Array(lit(c), col(c))).flatten
val varsAndVals = map(columns: _*)
df.select(idVars.map(col(_)).:+(explode(varsAndVals)): _*)
.withColumnRenamed("key", varName)
.withColumnRenamed("value", valueName)
}
then call ii:
melt(df, Array("col1", "col2"), Array("col3", "col4", "col5")).show()
gives:
+----+----+-------+------+
|col1|col2|Columns|values|
+----+----+-------+------+
|val1|val2| col3| val3|
|val1|val2| col4| val4|
|val1|val2| col5| val5|
+----+----+-------+------+