Search code examples
rapidminer

Creating a pareto chart in RapidMiner


I am not able to plot a simple pareto chart.

My data looks like:

enter image description here

and when I try to create a pareto chart, I get a blank space, I also cannot select a value for "Count Value":

enter image description here

What am I missing here? My sample data is stored in that xml:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="generate_data_user_specification" compatibility="5.3.015" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="447" y="75">
        <list key="attribute_values">
          <parameter key="category" value="&quot;black&quot;"/>
          <parameter key="Incidents" value="10"/>
        </list>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="generate_data_user_specification" compatibility="5.3.015" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="447" y="390">
        <list key="attribute_values">
          <parameter key="category" value="&quot;blue&quot;"/>
          <parameter key="Incidents" value="2"/>
        </list>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="generate_data_user_specification" compatibility="5.3.015" expanded="true" height="60" name="Generate Data by User Specification (3)" width="90" x="447" y="210">
        <list key="attribute_values">
          <parameter key="category" value="&quot;green&quot;"/>
          <parameter key="Incidents" value="7"/>
        </list>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="generate_data_user_specification" compatibility="5.3.015" expanded="true" height="60" name="Generate Data by User Specification (4)" width="90" x="447" y="165">
        <list key="attribute_values">
          <parameter key="category" value="&quot;white&quot;"/>
          <parameter key="Incidents" value="8"/>
        </list>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="generate_data_user_specification" compatibility="5.3.015" expanded="true" height="60" name="Generate Data by User Specification (5)" width="90" x="447" y="300">
        <list key="attribute_values">
          <parameter key="category" value="&quot;red&quot;"/>
          <parameter key="Incidents" value="2"/>
        </list>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="generate_data_user_specification" compatibility="5.3.015" expanded="true" height="60" name="Generate Data by User Specification (6)" width="90" x="447" y="480">
        <list key="attribute_values">
          <parameter key="category" value="&quot;Yellow&quot;"/>
          <parameter key="Incidents" value="1"/>
        </list>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="generate_data_user_specification" compatibility="5.3.015" expanded="true" height="60" name="Generate Data by User Specification (7)" width="90" x="447" y="705">
        <list key="attribute_values">
          <parameter key="category" value="&quot;Gray&quot;"/>
          <parameter key="Incidents" value="1"/>
        </list>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="generate_data_user_specification" compatibility="5.3.015" expanded="true" height="60" name="Generate Data by User Specification (8)" width="90" x="447" y="840">
        <list key="attribute_values">
          <parameter key="category" value="&quot;Navy&quot;"/>
          <parameter key="Incidents" value="1"/>
        </list>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="generate_data_user_specification" compatibility="5.3.015" expanded="true" height="60" name="Generate Data by User Specification (9)" width="90" x="447" y="570">
        <list key="attribute_values">
          <parameter key="category" value="&quot;Purple&quot;"/>
          <parameter key="Incidents" value="1"/>
        </list>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="append" compatibility="5.3.015" expanded="true" height="220" name="Append" width="90" x="715" y="120"/>
      <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
      <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 5"/>
      <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 4"/>
      <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 2"/>
      <connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append" to_port="example set 3"/>
      <connect from_op="Generate Data by User Specification (6)" from_port="output" to_op="Append" to_port="example set 6"/>
      <connect from_op="Generate Data by User Specification (7)" from_port="output" to_op="Append" to_port="example set 9"/>
      <connect from_op="Generate Data by User Specification (8)" from_port="output" to_op="Append" to_port="example set 7"/>
      <connect from_op="Generate Data by User Specification (9)" from_port="output" to_op="Append" to_port="example set 8"/>
      <connect from_op="Append" from_port="merged set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Solution

  • So I found a workaround (thanks to Andrew), which is only working for this example set.

    I had to "de-aggregate" it and add a new polynominal attribute with the same value for every example.

    Then I could create a Pareto chart, group-by 'category' and set the count-column to the new attribute.

    enter image description here

    Lead to this chart:

    enter image description here

    When I do this with my dataset I get this chart:

    enter image description here

    I guess without being able to configure the pareto chart, it is really bad for a lot of different values in the group-by category.