Search code examples
maxrowrapidminer

Rapid Miner Row Maximum


Sorry I'm totally new to RapidMiner and only made the basic tutorial.

I have a dataset like

MatchID   Value1   Value2   Value3
1            5        1        2
1           4.5      1.5       2
...

and would like to know if there is a possibilty to get the highest value per column (for example Value1) and make further calculations with it (generate attributes).

Thank you.


Solution

  • There are lots of ways as it happens. Here's one using the Aggregate operator to find the maxima, Join to join this to the original and Generate Attributes to do some calculating.

    <?xml version="1.0" encoding="UTF-8"?><process version="7.2.003">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="7.2.003" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="7.2.003" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
          </operator>
          <operator activated="true" class="aggregate" compatibility="7.2.003" expanded="true" height="82" name="Aggregate" width="90" x="179" y="34">
        <parameter key="use_default_aggregation" value="true"/>
        <parameter key="default_aggregation_function" value="maximum"/>
        <list key="aggregation_attributes"/>
          </operator>
          <operator activated="true" class="join" compatibility="7.2.003" expanded="true" height="82" name="Join" width="90" x="313" y="34">
        <parameter key="join_type" value="outer"/>
        <parameter key="use_id_attribute_as_key" value="false"/>
        <list key="key_attributes"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="7.2.003" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="34">
        <list key="function_descriptions">
          <parameter key="deltaA1" value="[maximum(a1)]-a1"/>
          <parameter key="deltaA2" value="[maximum(a2)]-a2"/>
          <parameter key="deltaA3" value="[maximum(a3)]-a3"/>
          <parameter key="deltaA4" value="[maximum(a4)]-a4"/>
        </list>
          </operator>
          <connect from_op="Retrieve Iris" from_port="output" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_op="Join" to_port="left"/>
          <connect from_op="Aggregate" from_port="original" to_op="Join" to_port="right"/>
          <connect from_op="Join" from_port="join" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>