python function dataframe dummy-variable

python function on dataframe did not return expected result

I wrote the following function to convert variable to dummies:

def convert_to_dummies(df, column):
    dummies = pd.get_dummies(df[column])
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(column, axis=1) #when dropping column don't forget "axis=1"

    return df

But when I apply this to categorical variables in a df:

for col in ['col1', 'col2', ....]:
    convert_to_dummies(df, col)

* 'col1', ''col2', ... are categorical columns in df.

I got the original df, and none of the categorical variables are converted to dummies. What did I do wrong?

Solution

You need assign output back:

for col in ['col1', 'col2', ....]:
    df = convert_to_dummies(df, col)

Sample:

df = pd.DataFrame({'col1':list('abcdef'),
                   'col2':list('abadec'),
                   'col3':list('aaadee'),
                   'col4':list('aabbcc')})

print (df)
  col1 col2 col3 col4
0    a    a    a    a
1    b    b    a    a
2    c    a    a    b
3    d    d    d    b
4    e    e    e    c
5    f    c    e    c

for col in ['col1', 'col2']:
    df = convert_to_dummies(df, col)

print (df)
  col3 col4  a  b  c  d  e  f  a  b  c  d  e
0    a    a  1  0  0  0  0  0  1  0  0  0  0
1    a    a  0  1  0  0  0  0  0  1  0  0  0
2    a    b  0  0  1  0  0  0  1  0  0  0  0
3    d    b  0  0  0  1  0  0  0  0  0  1  0
4    e    c  0  0  0  0  1  0  0  0  0  0  1
5    e    c  0  0  0  0  0  1  0  0  1  0  0

If need unique categorical columns better is remove loop:

def convert_to_dummies_cols(df, cols):
    #create all dummies once with all columns selected by subset
    dummies = pd.get_dummies(df[cols], prefix='', prefix_sep='')
    #aggregate max by columns
    dummies = dummies.groupby(level=0, axis=1).max()
    #add to original df
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(cols, axis=1)
    return df


#parameter is list of columns for dummies
df = convert_to_dummies_cols(df, ['col1', 'col2'])
print (df)
  col3 col4  a  b  c  d  e  f
0    a    a  1  0  0  0  0  0
1    a    a  0  1  0  0  0  0
2    a    b  1  0  1  0  0  0
3    d    b  0  0  0  1  0  0
4    e    c  0  0  0  0  1  0
5    e    c  0  0  1  0  0  1