Search code examples
javascriptmachine-learninglinear-regressiongradient-descent

Logistic regression not generalizing


According to Andrew Ng's lecture on logictic regression on Coursera the following cost function can be minimized using the update expression below:

enter image description here

Running that update function several hundred times on ~150 samples, I get the following pattern, though the cost seems to be decreasing after each iteration as expected:

enter image description here

The circles are the samples I'm training on, where the input features are the (x, y) coordinate of each point, and the color is the target label. The red or yellow background is what the model predicts that (x, y) input classifies as (red = 0, yellow = 1).

Question

  • Is that update routine not the correct partial derivative of that corresponding cost function J?
  • What can this output pattern be an indication of?

Training method

// A single pass/epoch

const lr = 0.003;
let params = [0.5, 0.5, 0.5];

const scores = samples.map(sample => sig(sum(sample, params));
const errors = scores.map((score, i) => score - labels[i][0]);

params = params.map((param, col) => {
  return param - lr * errors.reduce((acc, error, row) => {
    return acc + error * samples[row][col];
  }, 0);
});

Sample training data

const samples = [
  [1, 142, 78],
  [1, 108, 182],
  [1, 396, 47],
  [1, 66,  102],
  [1, 165, 116],
  [1, 8,   106],
  [1, 245, 119],
  [1, 302, 17],
  [1, 96,  38],
  [1, 201, 132],
];

const labels = [
  [0],
  [1],
  [0],
  [0],
  [1],
  [1],
  [1],
  [0],
  [1],
];

Edit

Here's a JSBin of this: https://jsbin.com/jinole/edit?html,js,output


Solution

  • Your problem is purely numerical, since you implemented logistic loss directly, your function J needs to take exponent of a point. At the same time, your data is huge, your x/y coordinates are in hundreads. exp(400) results in NaNs in JS, so your whole code fails to converge. All you need to do is to place your points in [0,2] x [0,4] instead of [0,200] x [0, 400] rectangle and it will work just fine.

    enter image description here

    For example:

    function sum(x, w) {
      return x.reduce((acc, _x, i) => acc + _x * w[i], 0);
    }
    
    function sig(z) {
      return 1 / (1 + Math.exp(-z));
    }
    
    function cost(scores, labels) {
      return -(1 / scores.length) * scores.reduce((acc, score, i) => {
        var y = labels[i][0];
        return y * Math.log(score) + (1 - y) * Math.log(1 - score);
      }, 0);
    }
    
    function clear(ctx) {
      ctx.clearRect(0, 0, 400, 200);
    }
    
    function render(ctx, points) {
      points.forEach(point => {
        if (point[2] > 0) {
          ctx.fillStyle = '#3c5cff';
        } else {
          ctx.fillStyle = '#f956ff';
        }
        ctx.fillRect(Math.max(0, point[0] * 100 - 2), Math.max(0, point[1] * 100 - 2), 4, 4);
        //      ctx.fillRect(point[0], point[1], 1, 1);
      })
    }
    
    function renderEach(ctx, params) {
      for (let y = 0; y < 200; y++) {
        for (let x = 0; x < 400; x++) {
          if (sig(sum([1, x / 100, y / 100], params)) < 0.5) {
            ctx.fillStyle = '#b22438';
          } else {
            ctx.fillStyle = '#fff9b6';
          }
    
          ctx.fillRect(x, y, 1, 1);
        }
      }
    }
    
    function doEpoch(samples, params, learningRate, lastCost, cycle, maxCycles) {
      var scores = samples.map(sample => sig(sum(sample, params)));
      var errors = scores.map((score, i) => score - labels[i][0]);
    
      var p = document.getElementById('log');
      if (!p) {
        p = document.createElement('p');
        p.setAttribute('id', 'log');
        document.body.appendChild(p);
      }
    
      params = params.map((param, col) => {
        return param - learningRate * errors.reduce((acc, error, row) => (acc + error * samples[row][col]), 0);
      });
    
      var J = cost(scores, labels);
      if (lastCost === null) {
        lastCost = J;
      }
    
      if (cycle % 100 === 0) {
        p.textContent = `Epoch = ${cycle}, Cost = ${J} (${J - lastCost}), Params = ${JSON.stringify(params, null, 2)}`;
        clear(ctx);
        renderEach(ctx, params);
        render(ctx, points);
      }
    
      if (cycle < maxCycles) {
        setTimeout(function() {
          doEpoch(samples, params, learningRate, J, cycle + 1, maxCycles);
        }, 10);
      }
    }
    
    var canvas = document.createElement('canvas');
    canvas.width = 400;
    canvas.height = 200;
    document.body.appendChild(canvas);
    var ctx = canvas.getContext('2d');
    
    var lineY = 150;
    var points = [];
    for (let i = 0; i < 500; i++) {
      var point = [parseInt(Math.random() * canvas.width, 10) / 100, parseInt(Math.random() * canvas.height, 10) / 100];
      point.push(Number(point[1] <= lineY / 100));
      points.push(point);
    }
    
    render(ctx, points);
    
    var samples = points.map(point => [point[0], point[1]]);
    var labels = points.map(point => [point[2]]);
    
    console.log('Samples', JSON.stringify(samples.slice(0, 10)));
    console.log('Labels', JSON.stringify(labels.slice(0, 10)));
    
    var params = [1].concat(samples[0].map(() => Math.random()));
    var withBias = samples.map(sample => [1].concat(sample));
    
    var epochs = 100000;
    var learningRate = 0.01;
    var lastCost = null;
    
    doEpoch(withBias, params, learningRate, lastCost, 0, epochs);
    body {
      background: #eee;
      padding: 0;
      margin: 0;
      font-family: monospace;
    }
    
    canvas {
      background: #fff;
      width: 100%;
      image-rendering: pixelated;
    }
    <div id="plot-app"></div>