Search code examples
pythonpandasdataframeindexingvalueerror

python dataframe ValueError Both index level and column label


I am trying to join multiple high-low differences for 20 different stocks.
i used this doe, and give me error messages:

raise ValueError(msg) ValueError: 'date' is both an index level and a column label, which is ambiguous.
import pandas as pd

def test_demo():
    tickers = ['ADI', 'ACN', 'ABT']

    df2 = pd.DataFrame()
    main_df = pd.DataFrame()

    for count, ticker in enumerate(tickers):
        df = pd.read_csv('demo\{}.csv'.format(ticker))

        count += 1

        df2['date'] = df['date']
        df2['diff'] = (df['high'] - df['low'])
        df2 = df2.set_index('date')
        df2.rename(columns={'diff': ticker}, inplace=True)
        print(df2)  
        if main_df.empty:
            main_df = df2
            count = 1
        else:
            main_df = main_df.join(df2, on='date', how='outer')

        if count % 10 == 0:
            print(count)

    main_df.to_csv('testdemo.csv')

test_demo()

I need ['date'] to be index. because some stock data run on different periods. what option do I have?
the error message is mixed with the code.

                 ADI
date                
1996-01-02  0.375000
1996-01-03  0.437500
1996-01-04  0.421875
....
2020-12-30  2.110001
2020-12-31  2.130005

[6295 rows x 1 columns]
           ADI  ACN
Traceback (most recent call last):
date               
NaN   0.375000  NaN
NaN   0.437500  NaN
NaN   0.421875  NaN
  File "D:\PycharmProjects\backtraderP1\Main.py", line 81, in <module>
NaN   0.375000  NaN
    from zfunctions.WebDemo import test_demo
NaN   0.140625  NaN
  File "D:\PycharmProjects\backtraderP1\zfunctions\WebDemo.py", line 30, in <module>
...        ...  ...
    test_demo()
NaN   1.470001  NaN
NaN   3.039993  NaN
  File "D:\PycharmProjects\backtraderP1\zfunctions\WebDemo.py", line 23, in test_demo
NaN   2.500000  NaN
NaN   2.110001  NaN
    main_df = main_df.join(df2, on='date', how='outer')
NaN   2.130005  NaN

  File "C:\Users\Cornerstone\AppData\Roaming\Python\Python39\site-packages\pandas\core\frame.py", line 8110, in join
[6295 rows x 2 columns]
    return self._join_compat(
  File "C:\Users\Cornerstone\AppData\Roaming\Python\Python39\site-packages\pandas\core\frame.py", line 8135, in _join_compat
    return merge(
  File "C:\Users\Cornerstone\AppData\Roaming\Python\Python39\site-packages\pandas\core\reshape\merge.py", line 74, in merge
    op = _MergeOperation(
  File "C:\Users\Cornerstone\AppData\Roaming\Python\Python39\site-packages\pandas\core\reshape\merge.py", line 668, in __init__
    ) = self._get_merge_keys()
  File "C:\Users\Cornerstone\AppData\Roaming\Python\Python39\site-packages\pandas\core\reshape\merge.py", line 1058, in _get_merge_keys
    left_keys.append(left._get_label_or_level_values(k))
  File "C:\Users\Cornerstone\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py", line 1679, in _get_label_or_level_values
    self._check_label_or_level_ambiguity(key, axis=axis)
  File "C:\Users\Cornerstone\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py", line 1638, in _check_label_or_level_ambiguity
    raise ValueError(msg)
ValueError: 'date' is both an index level and a column label, which is ambiguous.

Process finished with exit code 1

above is the trace of the error messages

if I don't do any calculation, and the code would work. I do not follow where I had the date twice

the following code can run without error

import pandas as pd

def test_demo():
    tickers = ['ADI', 'ACN', 'ABT']
      
    main_df = pd.DataFrame()
    
    for count, ticker in enumerate(tickers):
        df = pd.read_csv('demo\{}.csv'.format(ticker))
        # df.set_index('date', inplace=True)
        count += 1

        df.rename(columns={'value': ticker}, inplace=True)

        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
        df.drop(['open', 'high', 'low', 'close', 'volume', 'position', 'cash'], 1, inplace=True)
        df = df.set_index('date')
        # print(df)
        # print(DataFrame.dtypes)

        if main_df.empty:
            main_df = df
            count = 1
        else:
            main_df = main_df.join(df, on='date', how='outer')
            # main_df = main_df.merge(df, on='date')
            # print(main_df)
        if count % 10 == 0:
            print(count)


    main_df.to_csv('testdemo.csv')


test_demo()

Solution

  • I think is possible simplify your code for create list of DataFrames and join with outer join in concat by dates:

    def test_demo():
        
        dfs = []
        tickers = ['ADI', 'ACN', 'ABT']
        for count, ticker in enumerate(tickers):
            df = pd.read_csv('demo\{}.csv'.format(ticker))
            count += 1
    
            df = df.set_index('date')
            df[ticker] = df['high'] - df['low']
    
            print(df)  
            dfs.append(df)
    
            if count % 10 == 0:
                print(count)
    
        main_df = pd.concat(dfs, axis=1)
        main_df.to_csv('testdemo.csv')
    
    test_demo()