Search code examples
pandasimage-processingobject-detectiondetectionmaxima

Finding maximal regions in data


Suppose I have the following data:

import pandas pd

data = [44.5, 27.0, 22.0, 23.0, 23.0, 24.0, 23.0, 21.0, 22.0, 20.0, 22.0, 19.0, 20.0, 19.0, 20.0, 20.0, 18.0, 18.0, 18.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.5, 16.0, 16.0, 16.0, 16.0, 21.0, 18.0, 17.0, 18.0, 16.0, 17.0, 16.0, 16.0, 16.0, 17.0, 17.0, 16.0, 16.0, 16.0, 16.0, 17.0, 16.0, 16.0, 16.0, 16.0, 17.0, 17.0, 17.0, 17.0, 19.0, 18.0, 18.0, 17.0, 18.0, 19.0, 54.0, 37.0, 31.0, 30.0, 30.0, 30.0, 28.0, 26.0, 27.5, 25.0, 26.0, 26.0, 26.0, 26.0, 25.0, 30.0, 29.0, 26.0, 28.0, 26.0, 27.0, 25.0, 26.0, 26.0, 25.0, 24.0, 23.0, 23.0, 24.0, 25.0, 36.0, 29.0, 27.0, 26.0, 26.0, 27.0, 25.0, 25.0, 25.0, 25.0, 26.0, 23.0, 25.0, 24.0, 25.0, 27.0, 25.0, 24.0, 24.0, 23.0, 24.0, 24.0, 23.5, 22.0, 22.0, 23.0, 23.0, 23.0, 22.0, 24.0, 41.0, 28.0, 26.0, 25.0, 24.0, 25.0, 24.0, 22.0, 22.0, 23.0, 23.0, 22.0, 21.0, 22.0, 23.0, 23.0, 21.0, 19.0, 20.0, 19.0, 20.0, 19.0, 20.0, 19.0, 19.0, 20.0, 19.0, 19.0, 20.0, 20.0, 25.0, 21.0, 21.0, 20.0, 20.0, 20.0, 21.0, 20.0, 21.0, 19.5, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 18.0, 18.0, 17.0, 17.0, 18.0, 19.0, 19.0, 17.0, 18.0, 19.0, 19.0, 18.0, 18.0, 18.0, 26.0, 20.0, 19.0, 18.0, 18.0, 20.0, 17.0, 19.0, 17.0, 18.0, 19.0, 17.0, 17.0, 18.0, 17.0, 20.0, 18.0, 18.0, 17.0, 17.0, 18.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 16.0, 18.0, 20.0, 23.0, 18.0, 17.0, 17.0, 16.0, 16.0, 15.0, 15.0, 15.0, 14.0, 14.0, 15.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 12.0, 14.0, 13.0, 14.0, 12.0, 13.0, 13.0, 13.0, 13.0, 13.0, 12.0, 20.0, 15.0, 14.0, 14.0, 12.0, 13.0, 13.0, 13.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 12.0, 14.0, 12.0, 13.0, 12.0, 12.0, 13.0, 12.0, 12.0, 11.0, 12.0, 12.0, 11.0, 12.0, 12.0, 12.0, 33.0, 27.0, 24.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 15.0, 16.0, 14.0, 14.0, 13.0, 13.0, 14.0, 13.0, 14.0, 14.0, 13.0, 14.0, 13.0, 13.0, 13.0, 12.0, 13.0, 13.0, 12.0, 13.0, 13.0, 24.0, 15.0, 16.0, 14.0, 14.0, 16.0, 15.5, 15.0, 14.0, 15.0, 15.0, 15.0, 14.0, 14.0, 14.0, 16.0, 14.0, 14.0, 14.0, 15.0, 15.0, 14.0, 14.0, 14.0, 14.0, 15.0, 15.0, 15.0, 14.0, 17.0, 26.0, 21.0, 19.0, 19.0, 19.0, 19.0, 20.0, 19.0, 19.0, 19.0, 20.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 19.0, 18.0, 19.0, 19.0, 20.0, 18.0, 19.0, 19.0, 20.0, 18.0, 19.0, 22.0, 47.0, 31.0, 30.0, 28.0, 28.0, 28.0, 27.0, 26.0, 28.0, 29.0, 31.0, 29.0, 30.0, 30.0, 32.0, 36.0, 27.0, 29.0, 29.0, 27.0, 30.0, 28.0, 27.0, 26.0, 28.0, 28.0, 28.0, 28.0, 28.0, 31.0, 38.0, 33.0, 31.0, 30.0, 29.0, 30.0, 29.0, 30.0, 29.0, 30.0, 33.0, 30.0, 30.0, 30.0, 31.0, 32.0, 32.0, 32.0, 31.0, 31.0, 33.0, 32.0, 31.0, 32.0, 32.0, 33.5, 32.0, 32.0, 34.0, 41.0, 99.0, 73.0, 76.0, 64.0, 64.0, 72.0, 72.0, 68.0, 63.0, 66.0, 70.0, 70.0, 62.0, 63.0, 63.0, 72.0, 66.0, 66.0, 65.0, 62.0, 65.0, 63.0, 59.0, 59.0, 62.0, 61.0, 62.0, 61.0, 60.0, 64.0, 82.0, 69.0, 67.0, 67.0, 62.0, 68.5, 65.0, 66.0, 64.0, 66.0, 66.0, 62.0, 65.0, 65.0, 61.0, 68.0, 64.0, 64.0, 65.0, 65.0, 69.0, 66.0, 62.0, 64.0, 64.0, 70.0, 65.0, 65.0, 67.0, 75.0, 167.0, 120.0, 111.0, 102.0, 100.0, 107.0, 105.0, 102.5, 96.0, 97.0, 101.5, 99.0, 95.0, 87.0, 89.0, 101.0, 94.0, 94.0, 93.0, 95.0, 100.0, 96.0, 96.0, 94.0, 101.0, 119.0, 134.0, 140.0, 149.0, 211.0, 219.0, 160.0, 127.0, 115.0, 112.0, 108.0, 101.0, 96.0, 90.0, 91.0, 94.0, 90.0, 86.0, 85.0, 89.0, 94.0, 91.0, 83.0, 82.0, 81.0, 86.0, 82.0, 82.0, 77.0, 79.0, 83.0, 84.0, 77.0, 76.0, 83.0, 132.5, 108.0, 104.0, 97.0, 99.0, 97.0, 94.0, 90.0, 85.0, 87.0, 92.0, 86.0, 86.0, 84.0, 83.0, 92.0, 86.0, 86.0, 86.0, 83.0, 86.0, 83.0, 78.0, 77.0, 81.0, 81.0, 76.0, 78.0, 77.0, 78.0, 98.0, 91.0, 84.0, 83.0, 81.0, 84.0, 81.0, 81.0, 78.0, 76.0, 82.0, 79.0, 78.0, 74.0, 77.0, 81.0, 78.0, 78.0, 80.0, 79.0, 85.0, 76.0, 76.5, 75.0, 76.0, 79.0, 75.0, 73.0, 70.0, 73.0, 109.0, 93.0, 91.0, 85.0, 85.0, 88.0, 82.0, 81.0, 79.0, 76.0, 82.0, 78.0, 80.0, 75.0, 77.0, 81.0, 79.0, 76.0, 78.0, 74.0, 81.0, 78.0, 74.0, 72.0, 73.0, 77.0, 74.0, 75.0, 71.0, 73.0, 95.0, 83.0, 83.0, 81.0, 79.0, 83.0, 79.0, 76.0, 75.0, 74.0, 84.0, 74.0, 75.5, 75.0, 76.0, 86.0, 79.0, 76.0, 73.0, 74.0, 80.0, 77.0, 75.5, 70.0, 73.0, 76.0, 73.0, 73.0, 70.0, 80.0, 112.0, 88.0, 89.0, 86.0, 87.0, 89.0, 87.0, 85.0, 85.0, 82.0, 90.0, 88.0, 87.0, 87.0, 86.0, 92.0, 87.0, 88.0, 86.0, 88.0, 90.0, 93.0, 89.0, 85.0, 90.0, 94.0, 91.0, 84.5, 86.0, 86.0, 110.0, 104.0, 102.0, 94.5, 101.0, 106.0, 100.0, 96.0, 89.0, 92.0, 99.0, 95.0, 95.0, 94.0, 95.0, 108.0, 104.0, 98.0, 110.0, 106.0, 117.0, 112.0, 108.5, 107.0, 105.0, 109.0, 108.0, 110.0, 103.0, 112.0, 187.0, 152.0, 138.0, 137.0, 135.5, 146.0, 136.0, 129.0, 130.0, 134.0, 141.0, 133.0, 137.0, 131.0, 137.0, 153.0, 147.0, 140.0, 142.0, 150.0, 174.0, 157.0, 149.0, 145.0, 151.0, 160.0, 147.0, 136.0, 132.0, 138.0, 254.0, 229.0, 222.0, 212.0, 207.0, 230.0, 210.0, 206.0, 201.0, 194.0, 209.0, 199.0, 201.0, 202.0, 200.0, 225.0, 220.0, 201.0, 202.0, 212.0, 225.0, 210.0, 210.0, 200.0, 210.0, 231.0, 231.0, 218.0, 225.0, 321.0, 1018.0, 588.0, 491.0, 456.0, 441.0, 477.0, 427.0, 411.0, 375.0, 377.0, 422.0, 368.0, 359.0, 342.0, 342.0, 400.0, 373.0, 355.0, 358.0, 363.0, 387.0, 357.0, 350.0, 336.0, 328.0, 348.0, 316.0, 301.0, 305.0, 313.0, 599.0, 535.0, 504.0, 498.5, 485.0, 536.0, 505.0, 468.5, 455.0, 470.0, 516.0, 464.0, 452.5, 436.0, 430.0, 519.0, 473.0, 451.0, 433.0, 436.0, 495.0, 456.0, 456.0, 431.0, 437.0, 467.0, 424.0, 400.0, 372.0, 452.0, 1067.0, 804.0, 715.0, 667.5, 632.0, 689.0, 624.0, 575.5, 569.0, 555.0, 605.0, 546.5, 522.0, 495.0, 511.0, 603.5, 532.0, 512.5, 512.0, 491.0, 543.0, 499.0, 472.0, 451.0, 463.0, 500.0, 477.0, 457.0, 435.0, 461.0, 773.0, 705.5, 680.0, 644.0, 639.0, 689.0, 668.0, 620.5, 581.0, 584.0, 667.0, 597.5, 590.5, 568.0, 559.0, 668.0, 605.0, 577.0, 571.0, 566.0, 610.0, 585.5, 575.0, 537.0, 548.0, 586.0, 532.0, 520.0, 491.0, 519.0, 778.0, 703.0, 648.0, 607.0, 586.0, 633.0, 578.0, 552.0, 534.0, 523.0, 610.0, 566.5, 534.0, 525.0, 516.0, 595.0, 552.0, 522.0, 516.5, 518.0, 560.0, 531.0, 513.0, 498.0, 530.0, 578.0, 550.0, 545.0, 602.0, 799.0, 918.0, 644.5, 585.0, 548.0, 525.0, 569.0, 523.0, 496.0, 489.0, 470.0, 526.0, 480.0, 470.0, 455.0, 469.5, 545.0, 466.0, 440.0, 424.0, 440.0, 466.0, 434.0, 415.0, 404.5, 412.0, 449.0, 430.0, 416.0, 408.0, 443.0, 586.0, 495.5, 445.0, 435.0, 434.0, 461.0, 437.0, 416.0, 395.0, 404.0, 437.0, 400.0, 387.0, 381.0, 373.5, 427.0, 404.0, 375.0, 394.0, 380.0, 418.0, 397.0, 386.0, 369.0, 384.5, 411.0, 383.0, 385.0, 420.0, 511.0, 555.0, 415.0, 373.0, 358.0, 359.5, 362.0, 348.5, 339.0, 321.0, 324.5, 348.0, 329.0, 316.0, 315.0, 312.0, 356.0, 321.0, 310.0, 301.0, 310.0, 329.0, 319.0, 305.0, 302.0, 296.0, 315.0, 293.0, 289.0, 289.0, 297.5, 407.0, 342.0, 321.0, 312.0, 304.0, 335.0, 315.0, 302.0, 297.0, 293.5, 308.0, 285.5, 290.0, 283.0, 289.0, 326.0, 300.5, 294.0, 285.0, 281.0, 310.0, 291.0, 289.0, 277.0, 281.0, 306.0, 292.0, 280.5, 279.0, 292.5, 365.0, 316.0, 303.0, 290.0, 287.0, 319.0, 298.0, 287.0, 274.0, 287.0, 303.5, 282.0, 275.0, 271.0, 273.0, 312.0, 289.0, 280.0, 276.0, 274.0, 299.0, 295.0, 281.0, 280.0, 285.0, 306.0, 299.0, 288.0, 295.0, 319.0, 439.0, 379.0, 378.0, 368.0, 353.0, 396.0, 384.0, 363.0, 356.0, 366.0, 406.0, 389.0, 388.5, 378.5, 396.0, 477.0, 460.5, 436.0, 424.0, 443.5, 524.5, 506.0, 503.0, 508.0, 571.5, 687.5, 739.5, 1058.0, 1998.0, 1973.0, 916.5, 459.5, 358.5, 310.0, 274.0, 262.0, 239.0, 225.0, 212.5, 203.0, 214.5, 191.0, 186.0, 176.0, 182.5, 185.0, 170.0, 163.0, 161.0, 162.5, 167.0, 156.0, 156.5, 151.0, 153.0, 157.0, 151.0, 150.0, 140.0, 164.0, 206.0, 164.0, 159.0, 157.5, 156.0, 156.0, 150.0, 148.0, 144.0, 150.5, 161.0, 154.0, 148.0, 152.0, 260.5, 267.0, 168.0, 161.0, 153.0, 150.0, 147.0, 147.0, 148.0, 144.0, 145.0, 148.0, 151.5, 147.0, 145.0, 150.0, 174.0, 142.0, 134.0, 120.0, 113.0, 110.0, 105.0, 101.0, 96.0, 94.0, 98.0, 93.0, 89.0, 86.0, 89.0, 100.0, 121.0, 82.0, 79.0, 78.0, 100.0, 80.0, 74.0, 72.5, 77.0, 91.0, 74.0, 80.0, 86.0, 126.0, 108.0, 63.0, 52.0, 48.0, 46.0, 48.0, 46.0, 44.0, 43.0, 43.0, 45.0, 42.0, 42.0, 42.0, 53.0, 52.0, 38.0, 37.0, 34.0, 33.0, 37.0, 35.0, 33.0, 33.0, 33.0, 42.0, 35.0, 34.0, 34.0, 39.0, 75.0, 46.0, 41.0, 39.0, 38.0, 40.0, 36.0, 36.0, 35.0, 38.0, 44.0, 35.0, 35.0, 34.0, 41.0, 63.0, 44.0, 37.0, 33.0, 40.0, 70.0, 37.0, 34.0, 34.0, 41.0, 66.0, 43.0, 40.0, 53.0, 87.0, 43.0, 27.0, 21.0, 22.0, 20.0, 22.0, 19.0, 18.0, 18.0, 19.0, 23.0, 21.0, 20.0, 23.0, 35.0, 23.0, 23.5, 21.0, 23.0, 21.0, 22.0, 20.0, 21.0, 20.0, 20.0, 29.0, 21.0, 20.0, 21.0, 27.0, 69.5, 33.0, 31.5, 30.0, 28.0, 28.0, 25.0, 26.0, 26.0, 28.0, 29.0, 23.0, 22.0, 24.0, 28.0, 23.0, 25.0, 26.0, 25.0, 29.0, 55.0, 27.0, 24.0, 25.0, 29.0, 55.0, 32.0, 27.0, 38.0, 54.0, 77.0, 28.5, 23.0, 19.0, 18.0, 17.0, 15.0, 15.0, 13.0, 13.0, 14.0, 12.0, 14.0, 13.5, 17.0, 12.0, 11.0, 11.0, 10.0, 10.0, 11.0, 11.0, 10.0, 9.5, 10.0, 10.0, 9.0, 9.0, 9.0, 9.0, 12.0, 10.0, 10.0, 10.0, 9.0, 9.0, 10.0, 10.0, 9.0, 9.0, 10.0, 9.0, 9.0, 9.0, 8.0, 8.0, 9.0, 9.0, 9.0, 8.0, 9.0, 9.0, 9.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 30.0, 18.0, 15.0, 14.0, 14.0, 14.0, 13.0, 12.5, 12.0, 11.0, 11.0, 10.0, 11.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 8.0, 9.0, 9.0, 9.0, 9.0, 11.0, 10.0, 9.0, 9.0, 9.0, 10.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 12.0, 11.0, 10.0, 10.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 9.0, 10.0, 10.0, 13.0]
​

which if we plot we can see (qualitatively) a maximal region between roughly 750 and 1100.

enter image description here

We can see this maximal region more clearly if we smooth the data:

pd.Series(data).ewm(span=100).mean().plot()

enter image description here

My question is, what techniques/algorithms can be used to identify the interval say (800, 1200)? I have many such datasets which have different shapes, but all include 1 or 2 maximal "regions".

Any ideas? Thanks


Solution

  • Here's the mountain climber solution to the problem I mentioned in my comment. I saved the data you posted to a numpy file: https://drive.google.com/file/d/192jp5LvEE0Dc8QVMVmzzuHSehl2_bBLF/view?usp=sharing

    Graph after mean filtering and mountain climbing

    enter image description here

    Bounds after thresholding based on the value at the start of the rise.

    enter image description here

    import numpy as np
    import matplotlib.pyplot as plt
    
    # returns direction of gradient
    # 1 if positive, -1 if negative, 0 if flat
    def getDirection(one, two):
        dx = two - one;
        if dx == 0:
            return 0;
        if dx > 0:
            return 1;
        return -1;
    
    # detects and returns peaks and valleys
    def mountainClimber(vals, minClimb):
        # init trackers
        last_valley = vals[0];
        last_peak = vals[0];
        last_val = vals[0];
        last_dir = getDirection(vals[0], vals[1]);
    
        # get climbing
        peak_valley = []; # index, height, climb (positive for peaks, negative for valleys)
        for a in range(1, len(vals)):
            # get current direction
            sign = getDirection(last_val, vals[a]);
            last_val = vals[a];
    
            # if not equal, check gradient
            if sign != 0:
                if sign != last_dir:
                    # change in gradient, record peak or valley
                    # peak
                    if last_dir > 0:
                        last_peak = vals[a];
                        climb = last_peak - last_valley;
                        climb = round(climb, 2);
                        peak_valley.append([a, vals[a], climb]);
                    else:
                        # valley
                        last_valley = vals[a];
                        climb = last_valley - last_peak;
                        climb = round(climb, 2);
                        peak_valley.append([a, vals[a], climb]);
    
                    # change direction
                    last_dir = sign;
    
        # filter out very small climbs
        filtered_pv = [];
        for dot in peak_valley:
            if abs(dot[2]) > minClimb:
                filtered_pv.append(dot);
        return filtered_pv;
    
    # run an mean filter over the graph values
    def meanFilter(vals, size):
        fil = [];
        filtered_vals = [];
        for val in vals:
            fil.append(val);
    
            # check if full
            if len(fil) >= size:
                # pop front
                fil = fil[1:];
                filtered_vals.append(sum(fil) / size);
            else:
                # pad to maintain index positions
                filtered_vals.append(0);
        return filtered_vals;
    
    # load from file
    data = np.load("data.npy");
    
    # filter and round values
    mean_filter_size = 150;
    filtered_vals = meanFilter(data, mean_filter_size);
    
    # get peaks and valleys
    pv = mountainClimber(filtered_vals, 0);
    
    # filter for the largest climb
    biggest_climb = -1;
    top_index = None;
    for pv_index, feature in enumerate(pv):
        # unpack
        _, _, climb = feature;
    
        # check climb
        if climb > biggest_climb:
            biggest_climb = climb;
            top_index = pv_index;
    
    # pull out the threshold
    start = pv[top_index - 1][0];
    threshold = pv[top_index - 1][1];
    
    # look through and find the first spot where the graph drops below threshold
    end = None;
    for index in range(start + 1, len(data)):
        if data[index] < threshold:
            end = index;
            break;
    
    # draw the bounding lines
    markers_x = [start, end];
    markers_y = [data[start], data[end]];
    
    # draw plot
    x = [a for a in range(len(data))];
    fig = plt.figure();
    ax = plt.axes();
    ax.plot(x, data);
    ax.plot(markers_x, markers_y, 'or');
    plt.show();