Probably is a easy question, I've searched but i can't find the solution.
My code is something like
data_df = pd.DataFrame([
['2012-02-22', '3', 'a', 6],
['2012-02-23', '3.2', 'g', 8],
['2012-02-24', '5.2', 'l', 2],
['2012-02-25', '1.4', 'i', 4]],
columns=['date', '1', '2', '3'])
dict_a = {
'a': np.array([False, True, False, False], dtype='bool'),
'b': np.array([True, True, False, False], dtype='bool'),
'c': np.array([False, True, True, False], dtype='bool'),
}
and I would like to have a df like this
1 2 3 a b c
date
2012-02-22 3 a 6 False True False
2012-02-23 3.2 g 8 True True True
2012-02-24 5.2 l 2 False False True
2012-02-25 1.4 i 4 False False False
until now the best way I've found is this but it seems hacky to me
data_df = data_df.set_index('date')
df_dict = pd.DataFrame.from_dict(dict_a)
df_dict['date'] = data_df.index
df_dict = df_dict.set_index('date')
df_new = pd.merge(data_df, df_dict, left_index=True, right_index=True)
there is a faster/better way to achieve it?
Thank you all for the very fast responses. I've made some timing and (so far) looks like that with the given data the fastest is the 1st.
def df_new1():
data_df = pd.DataFrame([
['2012-02-22', '3', 'a', 6],
['2012-02-23', '3.2', 'g', 8],
['2012-02-24', '5.2', 'l', 2],
['2012-02-25', '1.4', 'i', 4]],
columns=['date', '1', '2', '3'])
dict_a = {
'a1': np.array([False, True, False, False], dtype='bool'),
'b1': np.array([True, True, False, False], dtype='bool'),
'c1': np.array([False, True, True, False], dtype='bool'),
}
return pd.concat((data_df, pd.DataFrame(dict_a)), axis=1).set_index('date')
def df_new2():
data_df = pd.DataFrame([
['2012-02-22', '3', 'a', 6],
['2012-02-23', '3.2', 'g', 8],
['2012-02-24', '5.2', 'l', 2],
['2012-02-25', '1.4', 'i', 4]],
columns=['date', '1', '2', '3'])
dict_a = {
'a1': np.array([False, True, False, False], dtype='bool'),
'b1': np.array([True, True, False, False], dtype='bool'),
'c1': np.array([False, True, True, False], dtype='bool'),
}
return data_df.assign(**dict_a).set_index('date')
def df_new3():
data_df = pd.DataFrame([
['2012-02-22', '3', 'a', 6],
['2012-02-23', '3.2', 'g', 8],
['2012-02-24', '5.2', 'l', 2],
['2012-02-25', '1.4', 'i', 4]],
columns=['date', '1', '2', '3'])
dict_a = {
'a1': np.array([False, True, False, False], dtype='bool'),
'b1': np.array([True, True, False, False], dtype='bool'),
'c1': np.array([False, True, True, False], dtype='bool'),
}
return data_df.join(pd.DataFrame(dict_a)).set_index('date')
def df_new4():
data_df = pd.DataFrame([
['2012-02-22', '3', 'a', 6],
['2012-02-23', '3.2', 'g', 8],
['2012-02-24', '5.2', 'l', 2],
['2012-02-25', '1.4', 'i', 4]],
columns=['date', '1', '2', '3'])
dict_a = {
'a1': np.array([False, True, False, False], dtype='bool'),
'b1': np.array([True, True, False, False], dtype='bool'),
'c1': np.array([False, True, True, False], dtype='bool'),
}
for keys in dict_a:
data_df[keys] = dict_a[keys]
return data_df.set_index('date')
print('df_new1', timeit(df_new1, number=1000))
print('df_new2', timeit(df_new2, number=1000))
print('df_new3', timeit(df_new3, number=1000))
print('df_new4', timeit(df_new4, number=1000))
df_new1 2.0431520210004237
df_new2 2.6708478379987355
df_new3 2.4773063749998983
df_new4 2.910699995998584
pd.concat
on axis=1
, then set the index
pd.concat((data_df,pd.DataFrame(dict_a)),axis=1).set_index("date")
1 2 3 a b c
date
2012-02-22 3 a 6 False True False
2012-02-23 3.2 g 8 True True True
2012-02-24 5.2 l 2 False False True
2012-02-25 1.4 i 4 False False False