Suppose I have the following data:
import pandas pd
data = [44.5, 27.0, 22.0, 23.0, 23.0, 24.0, 23.0, 21.0, 22.0, 20.0, 22.0, 19.0, 20.0, 19.0, 20.0, 20.0, 18.0, 18.0, 18.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.5, 16.0, 16.0, 16.0, 16.0, 21.0, 18.0, 17.0, 18.0, 16.0, 17.0, 16.0, 16.0, 16.0, 17.0, 17.0, 16.0, 16.0, 16.0, 16.0, 17.0, 16.0, 16.0, 16.0, 16.0, 17.0, 17.0, 17.0, 17.0, 19.0, 18.0, 18.0, 17.0, 18.0, 19.0, 54.0, 37.0, 31.0, 30.0, 30.0, 30.0, 28.0, 26.0, 27.5, 25.0, 26.0, 26.0, 26.0, 26.0, 25.0, 30.0, 29.0, 26.0, 28.0, 26.0, 27.0, 25.0, 26.0, 26.0, 25.0, 24.0, 23.0, 23.0, 24.0, 25.0, 36.0, 29.0, 27.0, 26.0, 26.0, 27.0, 25.0, 25.0, 25.0, 25.0, 26.0, 23.0, 25.0, 24.0, 25.0, 27.0, 25.0, 24.0, 24.0, 23.0, 24.0, 24.0, 23.5, 22.0, 22.0, 23.0, 23.0, 23.0, 22.0, 24.0, 41.0, 28.0, 26.0, 25.0, 24.0, 25.0, 24.0, 22.0, 22.0, 23.0, 23.0, 22.0, 21.0, 22.0, 23.0, 23.0, 21.0, 19.0, 20.0, 19.0, 20.0, 19.0, 20.0, 19.0, 19.0, 20.0, 19.0, 19.0, 20.0, 20.0, 25.0, 21.0, 21.0, 20.0, 20.0, 20.0, 21.0, 20.0, 21.0, 19.5, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 18.0, 18.0, 17.0, 17.0, 18.0, 19.0, 19.0, 17.0, 18.0, 19.0, 19.0, 18.0, 18.0, 18.0, 26.0, 20.0, 19.0, 18.0, 18.0, 20.0, 17.0, 19.0, 17.0, 18.0, 19.0, 17.0, 17.0, 18.0, 17.0, 20.0, 18.0, 18.0, 17.0, 17.0, 18.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 16.0, 18.0, 20.0, 23.0, 18.0, 17.0, 17.0, 16.0, 16.0, 15.0, 15.0, 15.0, 14.0, 14.0, 15.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 12.0, 14.0, 13.0, 14.0, 12.0, 13.0, 13.0, 13.0, 13.0, 13.0, 12.0, 20.0, 15.0, 14.0, 14.0, 12.0, 13.0, 13.0, 13.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 12.0, 14.0, 12.0, 13.0, 12.0, 12.0, 13.0, 12.0, 12.0, 11.0, 12.0, 12.0, 11.0, 12.0, 12.0, 12.0, 33.0, 27.0, 24.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 15.0, 16.0, 14.0, 14.0, 13.0, 13.0, 14.0, 13.0, 14.0, 14.0, 13.0, 14.0, 13.0, 13.0, 13.0, 12.0, 13.0, 13.0, 12.0, 13.0, 13.0, 24.0, 15.0, 16.0, 14.0, 14.0, 16.0, 15.5, 15.0, 14.0, 15.0, 15.0, 15.0, 14.0, 14.0, 14.0, 16.0, 14.0, 14.0, 14.0, 15.0, 15.0, 14.0, 14.0, 14.0, 14.0, 15.0, 15.0, 15.0, 14.0, 17.0, 26.0, 21.0, 19.0, 19.0, 19.0, 19.0, 20.0, 19.0, 19.0, 19.0, 20.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 19.0, 18.0, 19.0, 19.0, 20.0, 18.0, 19.0, 19.0, 20.0, 18.0, 19.0, 22.0, 47.0, 31.0, 30.0, 28.0, 28.0, 28.0, 27.0, 26.0, 28.0, 29.0, 31.0, 29.0, 30.0, 30.0, 32.0, 36.0, 27.0, 29.0, 29.0, 27.0, 30.0, 28.0, 27.0, 26.0, 28.0, 28.0, 28.0, 28.0, 28.0, 31.0, 38.0, 33.0, 31.0, 30.0, 29.0, 30.0, 29.0, 30.0, 29.0, 30.0, 33.0, 30.0, 30.0, 30.0, 31.0, 32.0, 32.0, 32.0, 31.0, 31.0, 33.0, 32.0, 31.0, 32.0, 32.0, 33.5, 32.0, 32.0, 34.0, 41.0, 99.0, 73.0, 76.0, 64.0, 64.0, 72.0, 72.0, 68.0, 63.0, 66.0, 70.0, 70.0, 62.0, 63.0, 63.0, 72.0, 66.0, 66.0, 65.0, 62.0, 65.0, 63.0, 59.0, 59.0, 62.0, 61.0, 62.0, 61.0, 60.0, 64.0, 82.0, 69.0, 67.0, 67.0, 62.0, 68.5, 65.0, 66.0, 64.0, 66.0, 66.0, 62.0, 65.0, 65.0, 61.0, 68.0, 64.0, 64.0, 65.0, 65.0, 69.0, 66.0, 62.0, 64.0, 64.0, 70.0, 65.0, 65.0, 67.0, 75.0, 167.0, 120.0, 111.0, 102.0, 100.0, 107.0, 105.0, 102.5, 96.0, 97.0, 101.5, 99.0, 95.0, 87.0, 89.0, 101.0, 94.0, 94.0, 93.0, 95.0, 100.0, 96.0, 96.0, 94.0, 101.0, 119.0, 134.0, 140.0, 149.0, 211.0, 219.0, 160.0, 127.0, 115.0, 112.0, 108.0, 101.0, 96.0, 90.0, 91.0, 94.0, 90.0, 86.0, 85.0, 89.0, 94.0, 91.0, 83.0, 82.0, 81.0, 86.0, 82.0, 82.0, 77.0, 79.0, 83.0, 84.0, 77.0, 76.0, 83.0, 132.5, 108.0, 104.0, 97.0, 99.0, 97.0, 94.0, 90.0, 85.0, 87.0, 92.0, 86.0, 86.0, 84.0, 83.0, 92.0, 86.0, 86.0, 86.0, 83.0, 86.0, 83.0, 78.0, 77.0, 81.0, 81.0, 76.0, 78.0, 77.0, 78.0, 98.0, 91.0, 84.0, 83.0, 81.0, 84.0, 81.0, 81.0, 78.0, 76.0, 82.0, 79.0, 78.0, 74.0, 77.0, 81.0, 78.0, 78.0, 80.0, 79.0, 85.0, 76.0, 76.5, 75.0, 76.0, 79.0, 75.0, 73.0, 70.0, 73.0, 109.0, 93.0, 91.0, 85.0, 85.0, 88.0, 82.0, 81.0, 79.0, 76.0, 82.0, 78.0, 80.0, 75.0, 77.0, 81.0, 79.0, 76.0, 78.0, 74.0, 81.0, 78.0, 74.0, 72.0, 73.0, 77.0, 74.0, 75.0, 71.0, 73.0, 95.0, 83.0, 83.0, 81.0, 79.0, 83.0, 79.0, 76.0, 75.0, 74.0, 84.0, 74.0, 75.5, 75.0, 76.0, 86.0, 79.0, 76.0, 73.0, 74.0, 80.0, 77.0, 75.5, 70.0, 73.0, 76.0, 73.0, 73.0, 70.0, 80.0, 112.0, 88.0, 89.0, 86.0, 87.0, 89.0, 87.0, 85.0, 85.0, 82.0, 90.0, 88.0, 87.0, 87.0, 86.0, 92.0, 87.0, 88.0, 86.0, 88.0, 90.0, 93.0, 89.0, 85.0, 90.0, 94.0, 91.0, 84.5, 86.0, 86.0, 110.0, 104.0, 102.0, 94.5, 101.0, 106.0, 100.0, 96.0, 89.0, 92.0, 99.0, 95.0, 95.0, 94.0, 95.0, 108.0, 104.0, 98.0, 110.0, 106.0, 117.0, 112.0, 108.5, 107.0, 105.0, 109.0, 108.0, 110.0, 103.0, 112.0, 187.0, 152.0, 138.0, 137.0, 135.5, 146.0, 136.0, 129.0, 130.0, 134.0, 141.0, 133.0, 137.0, 131.0, 137.0, 153.0, 147.0, 140.0, 142.0, 150.0, 174.0, 157.0, 149.0, 145.0, 151.0, 160.0, 147.0, 136.0, 132.0, 138.0, 254.0, 229.0, 222.0, 212.0, 207.0, 230.0, 210.0, 206.0, 201.0, 194.0, 209.0, 199.0, 201.0, 202.0, 200.0, 225.0, 220.0, 201.0, 202.0, 212.0, 225.0, 210.0, 210.0, 200.0, 210.0, 231.0, 231.0, 218.0, 225.0, 321.0, 1018.0, 588.0, 491.0, 456.0, 441.0, 477.0, 427.0, 411.0, 375.0, 377.0, 422.0, 368.0, 359.0, 342.0, 342.0, 400.0, 373.0, 355.0, 358.0, 363.0, 387.0, 357.0, 350.0, 336.0, 328.0, 348.0, 316.0, 301.0, 305.0, 313.0, 599.0, 535.0, 504.0, 498.5, 485.0, 536.0, 505.0, 468.5, 455.0, 470.0, 516.0, 464.0, 452.5, 436.0, 430.0, 519.0, 473.0, 451.0, 433.0, 436.0, 495.0, 456.0, 456.0, 431.0, 437.0, 467.0, 424.0, 400.0, 372.0, 452.0, 1067.0, 804.0, 715.0, 667.5, 632.0, 689.0, 624.0, 575.5, 569.0, 555.0, 605.0, 546.5, 522.0, 495.0, 511.0, 603.5, 532.0, 512.5, 512.0, 491.0, 543.0, 499.0, 472.0, 451.0, 463.0, 500.0, 477.0, 457.0, 435.0, 461.0, 773.0, 705.5, 680.0, 644.0, 639.0, 689.0, 668.0, 620.5, 581.0, 584.0, 667.0, 597.5, 590.5, 568.0, 559.0, 668.0, 605.0, 577.0, 571.0, 566.0, 610.0, 585.5, 575.0, 537.0, 548.0, 586.0, 532.0, 520.0, 491.0, 519.0, 778.0, 703.0, 648.0, 607.0, 586.0, 633.0, 578.0, 552.0, 534.0, 523.0, 610.0, 566.5, 534.0, 525.0, 516.0, 595.0, 552.0, 522.0, 516.5, 518.0, 560.0, 531.0, 513.0, 498.0, 530.0, 578.0, 550.0, 545.0, 602.0, 799.0, 918.0, 644.5, 585.0, 548.0, 525.0, 569.0, 523.0, 496.0, 489.0, 470.0, 526.0, 480.0, 470.0, 455.0, 469.5, 545.0, 466.0, 440.0, 424.0, 440.0, 466.0, 434.0, 415.0, 404.5, 412.0, 449.0, 430.0, 416.0, 408.0, 443.0, 586.0, 495.5, 445.0, 435.0, 434.0, 461.0, 437.0, 416.0, 395.0, 404.0, 437.0, 400.0, 387.0, 381.0, 373.5, 427.0, 404.0, 375.0, 394.0, 380.0, 418.0, 397.0, 386.0, 369.0, 384.5, 411.0, 383.0, 385.0, 420.0, 511.0, 555.0, 415.0, 373.0, 358.0, 359.5, 362.0, 348.5, 339.0, 321.0, 324.5, 348.0, 329.0, 316.0, 315.0, 312.0, 356.0, 321.0, 310.0, 301.0, 310.0, 329.0, 319.0, 305.0, 302.0, 296.0, 315.0, 293.0, 289.0, 289.0, 297.5, 407.0, 342.0, 321.0, 312.0, 304.0, 335.0, 315.0, 302.0, 297.0, 293.5, 308.0, 285.5, 290.0, 283.0, 289.0, 326.0, 300.5, 294.0, 285.0, 281.0, 310.0, 291.0, 289.0, 277.0, 281.0, 306.0, 292.0, 280.5, 279.0, 292.5, 365.0, 316.0, 303.0, 290.0, 287.0, 319.0, 298.0, 287.0, 274.0, 287.0, 303.5, 282.0, 275.0, 271.0, 273.0, 312.0, 289.0, 280.0, 276.0, 274.0, 299.0, 295.0, 281.0, 280.0, 285.0, 306.0, 299.0, 288.0, 295.0, 319.0, 439.0, 379.0, 378.0, 368.0, 353.0, 396.0, 384.0, 363.0, 356.0, 366.0, 406.0, 389.0, 388.5, 378.5, 396.0, 477.0, 460.5, 436.0, 424.0, 443.5, 524.5, 506.0, 503.0, 508.0, 571.5, 687.5, 739.5, 1058.0, 1998.0, 1973.0, 916.5, 459.5, 358.5, 310.0, 274.0, 262.0, 239.0, 225.0, 212.5, 203.0, 214.5, 191.0, 186.0, 176.0, 182.5, 185.0, 170.0, 163.0, 161.0, 162.5, 167.0, 156.0, 156.5, 151.0, 153.0, 157.0, 151.0, 150.0, 140.0, 164.0, 206.0, 164.0, 159.0, 157.5, 156.0, 156.0, 150.0, 148.0, 144.0, 150.5, 161.0, 154.0, 148.0, 152.0, 260.5, 267.0, 168.0, 161.0, 153.0, 150.0, 147.0, 147.0, 148.0, 144.0, 145.0, 148.0, 151.5, 147.0, 145.0, 150.0, 174.0, 142.0, 134.0, 120.0, 113.0, 110.0, 105.0, 101.0, 96.0, 94.0, 98.0, 93.0, 89.0, 86.0, 89.0, 100.0, 121.0, 82.0, 79.0, 78.0, 100.0, 80.0, 74.0, 72.5, 77.0, 91.0, 74.0, 80.0, 86.0, 126.0, 108.0, 63.0, 52.0, 48.0, 46.0, 48.0, 46.0, 44.0, 43.0, 43.0, 45.0, 42.0, 42.0, 42.0, 53.0, 52.0, 38.0, 37.0, 34.0, 33.0, 37.0, 35.0, 33.0, 33.0, 33.0, 42.0, 35.0, 34.0, 34.0, 39.0, 75.0, 46.0, 41.0, 39.0, 38.0, 40.0, 36.0, 36.0, 35.0, 38.0, 44.0, 35.0, 35.0, 34.0, 41.0, 63.0, 44.0, 37.0, 33.0, 40.0, 70.0, 37.0, 34.0, 34.0, 41.0, 66.0, 43.0, 40.0, 53.0, 87.0, 43.0, 27.0, 21.0, 22.0, 20.0, 22.0, 19.0, 18.0, 18.0, 19.0, 23.0, 21.0, 20.0, 23.0, 35.0, 23.0, 23.5, 21.0, 23.0, 21.0, 22.0, 20.0, 21.0, 20.0, 20.0, 29.0, 21.0, 20.0, 21.0, 27.0, 69.5, 33.0, 31.5, 30.0, 28.0, 28.0, 25.0, 26.0, 26.0, 28.0, 29.0, 23.0, 22.0, 24.0, 28.0, 23.0, 25.0, 26.0, 25.0, 29.0, 55.0, 27.0, 24.0, 25.0, 29.0, 55.0, 32.0, 27.0, 38.0, 54.0, 77.0, 28.5, 23.0, 19.0, 18.0, 17.0, 15.0, 15.0, 13.0, 13.0, 14.0, 12.0, 14.0, 13.5, 17.0, 12.0, 11.0, 11.0, 10.0, 10.0, 11.0, 11.0, 10.0, 9.5, 10.0, 10.0, 9.0, 9.0, 9.0, 9.0, 12.0, 10.0, 10.0, 10.0, 9.0, 9.0, 10.0, 10.0, 9.0, 9.0, 10.0, 9.0, 9.0, 9.0, 8.0, 8.0, 9.0, 9.0, 9.0, 8.0, 9.0, 9.0, 9.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 30.0, 18.0, 15.0, 14.0, 14.0, 14.0, 13.0, 12.5, 12.0, 11.0, 11.0, 10.0, 11.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 8.0, 9.0, 9.0, 9.0, 9.0, 11.0, 10.0, 9.0, 9.0, 9.0, 10.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 12.0, 11.0, 10.0, 10.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 9.0, 10.0, 10.0, 13.0]
which if we plot we can see (qualitatively) a maximal region between roughly 750 and 1100.
We can see this maximal region more clearly if we smooth the data:
pd.Series(data).ewm(span=100).mean().plot()
My question is, what techniques/algorithms can be used to identify the interval say (800, 1200)? I have many such datasets which have different shapes, but all include 1 or 2 maximal "regions".
Any ideas? Thanks
Here's the mountain climber solution to the problem I mentioned in my comment. I saved the data you posted to a numpy file: https://drive.google.com/file/d/192jp5LvEE0Dc8QVMVmzzuHSehl2_bBLF/view?usp=sharing
Graph after mean filtering and mountain climbing
Bounds after thresholding based on the value at the start of the rise.
import numpy as np
import matplotlib.pyplot as plt
# returns direction of gradient
# 1 if positive, -1 if negative, 0 if flat
def getDirection(one, two):
dx = two - one;
if dx == 0:
return 0;
if dx > 0:
return 1;
return -1;
# detects and returns peaks and valleys
def mountainClimber(vals, minClimb):
# init trackers
last_valley = vals[0];
last_peak = vals[0];
last_val = vals[0];
last_dir = getDirection(vals[0], vals[1]);
# get climbing
peak_valley = []; # index, height, climb (positive for peaks, negative for valleys)
for a in range(1, len(vals)):
# get current direction
sign = getDirection(last_val, vals[a]);
last_val = vals[a];
# if not equal, check gradient
if sign != 0:
if sign != last_dir:
# change in gradient, record peak or valley
# peak
if last_dir > 0:
last_peak = vals[a];
climb = last_peak - last_valley;
climb = round(climb, 2);
peak_valley.append([a, vals[a], climb]);
else:
# valley
last_valley = vals[a];
climb = last_valley - last_peak;
climb = round(climb, 2);
peak_valley.append([a, vals[a], climb]);
# change direction
last_dir = sign;
# filter out very small climbs
filtered_pv = [];
for dot in peak_valley:
if abs(dot[2]) > minClimb:
filtered_pv.append(dot);
return filtered_pv;
# run an mean filter over the graph values
def meanFilter(vals, size):
fil = [];
filtered_vals = [];
for val in vals:
fil.append(val);
# check if full
if len(fil) >= size:
# pop front
fil = fil[1:];
filtered_vals.append(sum(fil) / size);
else:
# pad to maintain index positions
filtered_vals.append(0);
return filtered_vals;
# load from file
data = np.load("data.npy");
# filter and round values
mean_filter_size = 150;
filtered_vals = meanFilter(data, mean_filter_size);
# get peaks and valleys
pv = mountainClimber(filtered_vals, 0);
# filter for the largest climb
biggest_climb = -1;
top_index = None;
for pv_index, feature in enumerate(pv):
# unpack
_, _, climb = feature;
# check climb
if climb > biggest_climb:
biggest_climb = climb;
top_index = pv_index;
# pull out the threshold
start = pv[top_index - 1][0];
threshold = pv[top_index - 1][1];
# look through and find the first spot where the graph drops below threshold
end = None;
for index in range(start + 1, len(data)):
if data[index] < threshold:
end = index;
break;
# draw the bounding lines
markers_x = [start, end];
markers_y = [data[start], data[end]];
# draw plot
x = [a for a in range(len(data))];
fig = plt.figure();
ax = plt.axes();
ax.plot(x, data);
ax.plot(markers_x, markers_y, 'or');
plt.show();