Search code examples
javascriptalgorithmmachine-learningartificial-intelligenceclassification

Implementing OneRule algorithmn in javascript


OneR, short for "One Rule", is a simple yet accurate classification algorithm that generates one rule for each predictor in the data, then selects the rule with the smallest total error as its "one rule".

I tried to find code samples on GitHub, but found only one, developed with R language. How could I implement this algorithm in Javascript?

What I have tried? I am trying to implement following this sample article: https://www.saedsayad.com/oner.htm

class OneR {
  /**
   * Pass dataset which will be an array of values.
   * Last value is classifcator's value.
   * All other values are predictors.
   *
   * Example
   *
   * The meaning of sequence values:
   * |Outlook|Temp|Humidity|Windy|Play Golf|
   *
   * Representation of a sequence:
   * ['rainy', 'hot', 'high', 0, 0]
   *
   * True and False are represented as zeros or ones
   */
  constructor(data = []) {
    this.data = data;
    this.frequences = {};
  }

  predict() {
    if (this.data && this.data.length > 0) {
      const firstRow = this.data[0];
      const predictorCount = firstRow.length - 1;
      let classifcator;

      // For each predictor,
      for (let i = 0; i < predictorCount; i++) {
        // For each value of that predictor, make a rule as follos;
        for (let y = 0; y < this.data.length; y++) {
          // Count how often each value of target (class) appears
          classifcator = this.data[y][predictorCount];
          console.log(classifcator);

          // Find the most frequent class
          // Make the rule assign that class to this value of the predictor
        }

        // Calculate the total error of the rules of each predictor
      }

      // Choose the predictor with the smallest total error
    } else {
      console.log("Cannot predict!");
    }
  }
}

module.exports = {
  OneR
};

I have loaded data from csv

rainy,hot,high,0,0
rainy,hot,high,1,0
overcast,hot,high,0,1
sunny,mild,high,0,1
sunny,cool,normal,0,1
sunny,cool,normal,1,0
overcast,cool,normal,1,1
rainy,mild,high,0,0
rainy,cool,normal,0,1
sunny,mild,normal,0,1
rainy,mild,normal,1,1
overcast,mild,high,1,1
overcast,hot,normal,0,1
sunny,mild,high,1,0

Solution

  • If I understand correctly how the frequency tables must be compared (lowest error rate, highest accuracy), you could use Maps so to cope with non-string types if ever necessary.

    Although your example has target values that are booleans (0 or 1), in general they could be from a larger domain, like for example "call", "fold", "raise", "check".

    Your template code creates a class, but I honestly do not see the benefit of that, since you can practically only do one action on it. Of course, if you have other actions in mind, other than one-rule prediction, then a class could make sense. Here I will just provide a function that takes the data, and returns the number of the selected predictor and the rule table that goes with it:

    function oneR(data) {
        if (!data && !data.length) return console.log("Cannot predict!");
    
        const predictorCount = data[0].length - 1;
    
        // get unique list of classes (target values):
        let classes = [...new Set(data.map(row => row[predictorCount]))];
        
        let bestAccuracy = -1;
        let bestFreq, bestPredictor;
        
        // For each predictor,
        for (let i = 0; i < predictorCount; i++) {
            // create frequency table for this predictor: Map of Map of counts
            let freq = new Map(data.map(row => [row[i], new Map(classes.map(targetValue => [targetValue, 0]))]));
            // For each value of that predictor, collect the frequencies
            for (let row of data) {
                // Count how often each value of target (class) appears
                let targetValue = row[predictorCount];
                let predictorValueFreq = freq.get(row[i]);
                let count = predictorValueFreq.get(targetValue);
                predictorValueFreq.set(targetValue, count+1);
            }
            // Find the most frequent class for each predictor value
            let accuracy = 0;
            for (let [predictorValue, predictorValueFreq] of freq) {
                let maxCount = 0;
                let chosenTargetValue;
                for (let [targetValue, count] of predictorValueFreq) {
                    if (count > maxCount) {
                        // Make the rule assign that class to this value of the predictor
                        maxCount = count;
                        chosenTargetValue = targetValue; 
                    }
                }
                freq.set(predictorValue, chosenTargetValue);
                accuracy += maxCount;
            }
            // If this accuracy is best, then retain this frequency table
            if (accuracy > bestAccuracy) {
                bestAccuracy = accuracy;
                bestPredictor = i;
                bestFreq = freq;
            }
        }
        // Return the best frequency table and the predictor for which it applies
        return {
            predictor: bestPredictor, // zero-based column number
            rule: [...bestFreq.entries()]
        }
    }
    
    let data = [
        ["rainy","hot","high",0,0],
        ["rainy","hot","high",1,0],
        ["overcast","hot","high",0,1],
        ["sunny","mild","high",0,1],
        ["sunny","cool","normal",0,1],
        ["sunny","cool","normal",1,0],
        ["overcast","cool","normal",1,1],
        ["rainy","mild","high",0,0],
        ["rainy","cool","normal",0,1],
        ["sunny","mild","normal",0,1],
        ["rainy","mild","normal",1,1],
        ["overcast","mild","high",1,1],
        ["overcast","hot","normal",0,1],
        ["sunny","mild","high",1,0]
    ];
    
    let result = oneR(data);
    
    console.log(result);