In a dataframe, columns with an NaN in the first row should be added to the last column with a number inside. If there are two columns after another with NaN those should both be added to the last column with a number in the first row.
Column A | Column B | Column C | Column D | Column E |
---|---|---|---|---|
100 | NaN | 300 | NaN | NaN |
R100 | R200 | R300 | R400 | R500 |
The result should look like this:
Column A | Column C |
---|---|
100 | 300 |
R100 | R300 |
NaN | NaN |
R200 | R400 |
NaN | |
R500 |
and finally:
Column A | Column C |
---|---|
100 | 300 |
R100 | R300 |
R200 | R400 |
R500 |
Update: So far I got this, but I'm struggeling with column E. It won't be added to the end of column C, where already the numbers of Column D are:
import pandas as pd
# Iterate through each column
current_column = 0
while current_column < num_columns:
if pd.isnull(df.iloc[0, current_column]):
#print('for loop for', current_column_name)
# get the name of the current column
current_column_name = df.columns[current_column]
# get the name of the previous column
previous_column = df.columns[current_column - 1]
# count the values/rows to shift
count = df[previous_column].count()
df[current_column_name] = df[current_column_name].shift(count-1)
# add the values of the current column to the previous column
#df[previous_column] = df[current_column_name].combine_first(df[previous_column].shift())
df[previous_column] = df[current_column_name].combine_first(df[previous_column])
# Delete the current column
df.drop(df.columns[current_column], axis=1, inplace=True)
# Update the number of columns after deletion
num_columns -= 1
else:
print('the current column ', current_column_name, 'is not NaN')
current_column += 1
Probably not the best code, but worked for my case with 500+ columns and 100 rows.
# Assign column names to transposed DataFrame
df.columns = column_names
# Get the total number of columns
num_columns = len(df.columns)
# Iterate through each column
current_column = 0
while current_column < num_columns:
# get the name of the current column
current_column_name = df.columns[current_column]
# get the name of the previous column
previous_column = df.columns[current_column - 1]
# Check if the first entry in the current column is NaN
if pd.isnull(df.iloc[0, current_column]):
# Get the value of the first cell in the actual column
first_cell = df[previous_column].iloc[0]
# Count the amount of vaules in the actual column
count = df[current_column_name].count()
# Shifts the previous column down 1 position
df[previous_column] = df[previous_column].shift(count-1)
# add the values of the current column to the previous column
df[previous_column] = df[current_column_name].combine_first(df[previous_column].shift())
# add the first value back to the previous column
df.at[0, previous_column] = first_cell
# Delete the current column
df.drop(df.columns[current_column], axis=1, inplace=True)
# Update the number of columns after deletion
num_columns -= 1
else:
current_column += 1
´´´