I have the following dataframes:
# Creating Data_One_df
data_one = {
"Date": ["12/1/2022"] * 7,
"Time": ["6:30", "6:35", "6:40", "6:45", "6:50", "6:55", "7:00"],
"Bars_left": [6, 5, 4, 3, 2, 1, 0],
"6:30": [""] * 7,
"6:35": [""] * 7,
"6:40": [""] * 7,
"6:45": [""] * 7,
"6:50": [""] * 7,
"6:55": [""] * 7,
"7:00": [""] * 7,
}
Data_One_df = pd.DataFrame(data_one)
# Creating Data_Two_df
data_two = {
"Time": ["6:30", "6:35", "6:40", "6:45", "6:50", "6:55", "7:00"],
"Fcst": [1, 2, 1, 1, 2, 1, 1],
}
Data_Two_df = pd.DataFrame(data_two)
I want to populate columns of Data_One_df
(input table) by matching the row value of the 'Time' column to the 'Time' column in Data_Two_df
(lookup table) and returning the offset (1 row down) of the cumsum of the 'Fcst' to the offset (1 column to the right) of the 'Input table'.
For example: for row 6:30 in Data_One_df
, I want to find the corresponding row in Data_Two_df
('lookup table') (i.e. 6:30) and then offset (i.e. 6:35). Next, calculate the cumsum starting at 6:35, which is 8. Place that result in the column (6:35) of 'input table'. I then want to repeat this value across all columns.
Here is what the expected result should look like:
data_three = {
"Date": ["12/1/2022"] * 7,
"Time": ["6:30", "6:35", "6:40", "6:45", "6:50", "6:55", "7:00"],
"Bars_left": [6, 5, 4, 3, 2, 1, 0],
"6:30": ["NaN", "NaN", "NaN", "NaN", "NaN", "NaN", "NaN"],
"6:35": [8, "NaN", "NaN", "NaN", "NaN", "NaN", "NaN"],
"6:40": [8, 6, "NaN", "NaN", "NaN", "NaN", "NaN"],
"6:45": [8, 6, 5, "NaN", "NaN", "NaN", "NaN"],
"6:50": [8, 6, 5, 4, "NaN", "NaN", "NaN"],
"6:55": [8, 6, 5, 4, 2, "NaN", "NaN"],
"7:00": [8, 6, 5, 4, 2, 1, "NaN"],
}
Desired_df = pd.DataFrame(data_three)
I tried numerous methods, to no avail. The latest code effort is the following:
# Get a list of time columns
time_cols = Data_One_df.columns[3:]
# Loop through the time columns and update the values
# in Data_One_df based on Data_Two_df
for i, col in enumerate(time_cols):
# Get the corresponding forecast value from Data_Two_df
fcst = Data_Two_df["Fcst"][i]
# Update the values in Data_One_df based on the forecast
Data_One_df[col] = [np.nan] * fcst + list(Data_One_df[col][fcst:])
Here is one way to do it with Pandas shift and cumsum:
# Add necessary infos to Data_Two_df
Data_Two_df["Left_Time"] = Data_Two_df["Time"].shift(-1)
Data_Two_df["Fcst_cumsum"] = Data_Two_df["Fcst"][::-1].cumsum()[::-1].shift(-1)
Data_Two_df = Data_Two_df.dropna()
# Add values to Data_One_df
Data_One_df = Data_One_df.replace("", pd.NA)
for t, left_t, val in zip(
Data_Two_df["Time"], Data_Two_df["Left_Time"], Data_Two_df["Fcst_cumsum"]
):
Data_One_df.loc[Data_One_df["Time"] == t, left_t] = val
Data_One_df.loc[:, "6:30":] = Data_One_df.loc[:, "6:30":].fillna(method="ffill", axis=1)
Then:
print(Data_One_df)
# Output
Date Time Bars_left 6:30 6:35 6:40 6:45 6:50 6:55 7:00
0 12/1/2022 6:30 6 <NA> 8.0 8.0 8.0 8.0 8.0 8.0
1 12/1/2022 6:35 5 <NA> <NA> 6.0 6.0 6.0 6.0 6.0
2 12/1/2022 6:40 4 <NA> <NA> <NA> 5.0 5.0 5.0 5.0
3 12/1/2022 6:45 3 <NA> <NA> <NA> <NA> 4.0 4.0 4.0
4 12/1/2022 6:50 2 <NA> <NA> <NA> <NA> <NA> 2.0 2.0
5 12/1/2022 6:55 1 <NA> <NA> <NA> <NA> <NA> <NA> 1.0
6 12/1/2022 7:00 0 <NA> <NA> <NA> <NA> <NA> <NA> <NA>