python pandas dataframe vectorization multi-index

How do I create a dataframe from another dataframe with only the last non negative values for each value column?

I have a multi-indexed dataframe like so:

           year  value   value2 value3   some_other_column_i_dont_care_about
  one two             
  a   t     2000     0     1     7        aaa
      w     2001     3    -1     4        bbb
      t     2002    -2     1     -3       ccc
  b   t     2000     4     3     6        ddd
      w     2001     7     5     -1       eee    
      t     2002    -8    -3     3        fff
  c   t     2000    11    10     3        ggg
      w     2001   -12    -9     -1       hhh
      t     2002   -15    -6     -5       iii

How do I create a new, single level df, that just has the latest (in terms of years) non-negative values, like so:

            value  value2 value3
 one    
 a            3      1     4
 b            7      5     3
 c           11     10     3

Solution

One option is to melt, use query to keep values >=0, then use pivot_table with aggfunc='last' to get to wide format again:

new_df = (
    df.reset_index('one')
        .melt(id_vars='one',
              value_vars=['value', 'value2', 'value3'],
              value_name='values')
        .query('values >= 0')
        .pivot_table(index='one', columns='variable', aggfunc='last')
        .droplevel(0, axis=1)
        .rename_axis(index=None, columns=None)
)

Alternatively use groupby last to keep the last value from each group, then unstack after melt:

new_df = (
    df.reset_index('one')
        .melt(id_vars='one',
              value_vars=['value', 'value2', 'value3'],
              value_name='values')
        .query('values >= 0')
        .groupby(['one', 'variable'])
        .last()
        .unstack()
        .droplevel(0, axis=1)
        .rename_axis(index=None, columns=None)
)

new_df:

   value  value2  value3
a      3       1       4
b      7       5       3
c     11      10       3

Assuming years are not guaranteed to be in ascending order chain sort_values before melt:

new_df = (
    df.reset_index('one')
        .sort_values('year')  # Sort By Year
        .melt(id_vars='one',
              value_vars=['value', 'value2', 'value3'],
              value_name='values')
        .query('values >= 0')
        .pivot_table(index='one', columns='variable', aggfunc='last')
        .droplevel(0, axis=1)
        .rename_axis(index=None, columns=None)
)

Complete Working Example:

import pandas as pd

df = pd.DataFrame({
    'one': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c'],
    'two': ['t', 'w', 't', 't', 'w', 't', 't', 'w', 't'],
    'year': [2000, 2001, 2002, 2000, 2001, 2002, 2000, 2001, 2002],
    'value': [0, 3, -2, 4, 7, -8, 11, -12, -15],
    'value2': [1, -1, 1, 3, 5, -3, 10, -9, -6],
    'value3': [3, 4, -3, -1, 3, -2, 7, -9, 3],
    'some_other_column_i_dont_care_about': ['aaa', 'bbb', 'ccc', 'ddd', 'eee',
                                            'fff', 'ggg', 'hhh', 'iii']
}).set_index(['one', 'two'])

new_df = (
    df.reset_index('one')
        .melt(id_vars='one',
              value_vars=['value', 'value2', 'value3'],
              value_name='values')
        .query('values >= 0')
        .pivot_table(index='one', columns='variable', aggfunc='last')
        .droplevel(0, axis=1)
        .rename_axis(index=None, columns=None)
)

print(new_df)