{simpleDF.columns
#output :['color', 'lab', 'value1', 'value2']
indexer = simpleDF.select('lab')
from pyspark.ml.feature import StringIndexer
# Let us create an object of the class StringIndexer
lblindexer=StringIndexer().setInputCol(indexer).setOutputCol("LabelIndexed")
idxRes=lblindexer.fit(simpleDF).transform(simpleDF)
idxRes.show(5)}
it is working fine with this line of code but i want it more general
#lblindexer=StringIndexer().setInputCol('lab').setOutputCol("LabelIndexed")
getting the error: TypeError: Invalid param value given for param "inputCol". Could not convert <class 'pyspark.sql.dataframe.DataFrame'> to string type
Use the column name for the input col, not a dataframe:
lblindexer=StringIndexer().setInputCol('lab').setOutputCol("LabelIndexed")
If you want to use a variable,
indexer = 'lab'
lblindexer=StringIndexer().setInputCol(indexer).setOutputCol("LabelIndexed")