Search code examples
pythonpandasdataframegenetic-algorithm

'ValueError: cannot set a row with mismatched columns' when adding a row to pandas DataFrame


While learning python, I decided to try create genetic algorithm and got stuck in the mutation step.

I will be glad for any advice both on solving this problem and in general on the architecture and style of the code.

one_generation = genlib.create_generation()
print(genlib.almost_generation(one_generation))

This code return error:

Traceback (most recent call last):
File "/home/rosrobot/PycharmProjects/gen2/main.py", line 23, in \<module\>
print(genlib.almost_generation(one_generation))

File "/home/rosrobot/PycharmProjects/gen2/genlib.py", line 83, in almost_generation
updated_generation.loc\[creature_index\] = sample\[updated_generation.columns\]

File "/home/rosrobot/PycharmProjects/gen2/venv/lib64/python3.10/site-packages/pandas/core/indexing.py", line 716, in __setitem__
iloc.\_setitem_with_indexer(indexer, value, self.name)

File "/home/rosrobot/PycharmProjects/gen2/venv/lib64/python3.10/site-packages/pandas/core/indexing.py", line 1682, in \_setitem_with_indexer
self.\_setitem_with_indexer_missing(indexer, value)

File "/home/rosrobot/PycharmProjects/gen2/venv/lib64/python3.10/site-packages/pandas/core/indexing.py", line 1998, in \_setitem_with_indexer_missing
raise ValueError("cannot set a row with mismatched columns")
ValueError: cannot set a row with mismatched columns

Process finished with exit code 1

Functions in 'genlib' file:

import random as rnd
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

pd.plotting.register_matplotlib_converters()

def create_creature(gen_length=10,
                    creature_name='one'):
    creature = pd.Series(data=[rnd.randint(0, 1) for i in range(gen_length)],
                         name=creature_name)
    return creature

def create_generation(generation_size=50,
                      gen_length=10):
    generation = pd.DataFrame(data=[create_creature(creature_name=(str(i + 1)) + 'th',
                                                    gen_length=gen_length) for i in range(generation_size)])
    generation['quality'] = generation.sum(axis=1)
    return generation

def __indexes_of_quality(generation):
    """
    :rtype: pd.Series
    """
    for i in generation.quality.unique():
        print('quality = ', i, ': ',
              generation.loc[generation.quality == i, 'quality'].index.values,
              '\n',
              '__')

def create_many_generations(number_of_generations=10,
                            generation_size=50,
                            gen_length=10):
    list_of_dataframes = pd.Series(data=[create_generation(generation_size=generation_size,
                                                           gen_length=gen_length
                                                           ) for i in range(number_of_generations)],
                                   name='creature_name')

    return list_of_dataframes

def one_generation_pyplot(generation):
    sns.barplot(x=generation.index,
                y=generation.sort_values('quality').quality)
    plt.show()

def many_generations_pyplot(list_of_generations):
    qualities = [sum(generation.quality) for generation in list_of_generations]
    sns.lineplot(data=qualities)
    plt.show()

def __mutation(creature: pd.Series) -> pd.Series:
    point = rnd.randint(0, len(creature))
    creature[point] = int(not creature[point].values)
    return creature

def almost_generation(generation):
    sample = generation.sample()
    sample = __mutation(sample)

    updated_generation = pd.DataFrame(columns=generation.columns)
    for creature_index in generation.index:
        if creature_index == sample.index:
            print(creature_index, ' == ', sample.index)
            updated_generation.loc[creature_index] = sample[updated_generation.columns]
        else:
            updated_generation.loc[creature_index] = generation.loc[creature_index]
    return updated_generation

I tried to convert "sample" to str, and also tried using loc, iloc and append


Solution

  • In your almost generation function, change the line within your if block to assign values:

    if creature_index == sample.index:
        print(creature_index, ' == ', sample.index)
        updated_generation.loc[creature_index] = sample[updated_generation.columns].values
    

    You can just simplify your entire function as follows:

    def almost_generation(generation):
        sample = generation.sample()
        sample = __mutation(sample)
        generation.loc[sample.index] = sample[generation.columns].values
        return generation