Search code examples
pythonnumpyscipytime-seriestrend

How to De-Trend a waveform which is piece-wise linear or non-linear?


I'm trying to remove the trend present in the waveform which looks like the following:

enter image description here

For doing so, I use scipy.signal.detrend() as follows:

autocorr = scipy.signal.detrend(autocorr)

But I don't see any significant flattening in trend. I get the following:

enter image description here

My objective is to have the trend completely eliminated from the waveform. And I need to also generalize it so that it can detrend any kind of waveform - be it linear, piece-wise linear, polynomial, etc.

Can you please suggest a way to do the same?

Note: In order to replicate the above waveform, you can simply run the following code that I used to generate it:

#Loading Libraries
import warnings
warnings.filterwarnings("ignore")

import json
import sys, os
import numpy as np
import pandas as pd
import glob
import pickle

from statsmodels.tsa.stattools import adfuller, acf, pacf
from scipy.signal import find_peaks, square
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt

#Generating a function with Dual Seasonality:
def white_noise(mu, sigma, num_pts):
    """ Function to generate Gaussian Normal Noise
    Args:
        sigma: std value
        num_pts: no of points
        mu: mean value

    Returns:
        generated Gaussian Normal Noise
    """
    
    noise = np.random.normal(mu, sigma, num_pts)
    return noise

def signal_line_plot(input_signal: pd.Series, title: str = "", y_label: str = "Signal"):
    """ Function to plot a time series signal
    Args:
        input_signal: time series signal that you want to plot
        title: title on plot
        y_label: label of the signal being plotted
        
    Returns:
        signal plot
    """

    plt.plot(input_signal)
    plt.title(title)
    plt.ylabel(y_label)
    plt.show()

# Square with two periodicities of daily and weekly. With @15min sampling frequency it means 4*24=96 samples and 4*24*7=672 

t_week = np.linspace(1,480, 480)
t_weekend=np.linspace(1,192,192)
T=96 #Time Period
x_weekday = 10*square(2*np.pi*t_week/T, duty=0.7)+10 + white_noise(0, 1,480)
x_weekend = 2*square(2*np.pi*t_weekend/T, duty=0.7)+2 + white_noise(0,1,192)
x_daily_weekly = np.concatenate((x_weekday, x_weekend)) 
x_daily_weekly_long = np.concatenate((x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly))
signal_line_plot(x_daily_weekly_long)
signal_line_plot(x_daily_weekly_long[0:1000])

#Finding Autocorrelation & Lags for the signal [WHICH  THE FINAL PARAMETERS WHICH ARE TO BE PLOTTED]:

#Determining Autocorrelation & Lag values
import scipy.signal as signal
autocorr = signal.correlate(x_daily_weekly_long, x_daily_weekly_long, mode="same")

#Normalize the autocorr values (such that the hightest peak value is at 1)
autocorr = (autocorr-min(autocorr))/(max(autocorr)-min(autocorr))

lags = signal.correlation_lags(len(x_daily_weekly_long), len(x_daily_weekly_long), mode = "same")

#Visualization
f = plt.figure()
f.set_figwidth(40)
f.set_figheight(10)
plt.plot(lags, autocorr)

#DETRENDING:
autocorr = scipy.signal.detrend(autocorr)

#Visualization
f = plt.figure()
f.set_figwidth(40)
f.set_figheight(10)
plt.plot(lags, autocorr)


Solution

  • Since it's an auto-correlation, it will always be even; so detrending with a breakpoint at lag=0 should get you part of the way there.

    An alternative way to detrend is to use a high-pass filter; you could do this in two ways. What will be tricky is deciding what the cut-off frequency should be.

    Here's a possible way to do this:

    #Loading Libraries
    import numpy as np
    from scipy import signal
    import matplotlib.pyplot as plt
    
    #Generating a function with Dual Seasonality:
    def white_noise(mu, sigma, num_pts):
        """ Function to generate Gaussian Normal Noise
        Args:
            sigma: std value
            num_pts: no of points
            mu: mean value
    
        Returns:
            generated Gaussian Normal Noise
        """
        
        noise = np.random.normal(mu, sigma, num_pts)
        return noise
    
    # High-pass filter via discrete Fourier transform
    # Drop all components from 0th to dropcomponent-th
    def dft_highpass(x, dropcomponent):
        fx = np.fft.rfft(x)
        fx[:dropcomponent] = 0
        return np.fft.irfft(fx)
    
    
    # Square with two periodicities of daily and weekly. With @15min sampling frequency it means 4*24=96 samples and 4*24*7=672 
    
    t_week = np.linspace(1,480, 480)
    t_weekend=np.linspace(1,192,192)
    T=96 #Time Period
    x_weekday = 10*signal.square(2*np.pi*t_week/T, duty=0.7)+10 + white_noise(0, 1,480)
    x_weekend = 2*signal.square(2*np.pi*t_weekend/T, duty=0.7)+2 + white_noise(0,1,192)
    x_daily_weekly = np.concatenate((x_weekday, x_weekend)) 
    x_daily_weekly_long = np.concatenate((x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly))
    
    #Finding Autocorrelation & Lags for the signal [WHICH  THE FINAL PARAMETERS WHICH ARE TO BE PLOTTED]:
    
    #Determining Autocorrelation & Lag values
    autocorr = signal.correlate(x_daily_weekly_long, x_daily_weekly_long, mode="same")
    
    #Normalize the autocorr values (such that the hightest peak value is at 1)
    autocorr = (autocorr-min(autocorr))/(max(autocorr)-min(autocorr))
    
    lags = signal.correlation_lags(len(x_daily_weekly_long), len(x_daily_weekly_long), mode = "same")
    
    
    # detrend w/ breakpoints
    dautocorr = signal.detrend(autocorr, bp=len(lags)//2)
    
    # detrend w/ high-pass filter
    # use `filtfilt` to get zero-phase
    b, a = signal.butter(1, 1e-3, 'high')
    fautocorr = signal.filtfilt(b, a, autocorr)
    
    # detrend with DFT HPF
    rautocorr = dft_highpass(autocorr, len(autocorr) // 1000)
    
    #Visualization
    fig, ax = plt.subplots(3)
    
    for i in range(3):
        ax[i].plot(lags, autocorr, label='orig')
    
    ax[0].plot(lags, dautocorr, label='detrend w/ bp')
    ax[1].plot(lags, fautocorr, label='HPF')
    ax[2].plot(lags, rautocorr, label='DFT')
    
    for i in range(3):
        ax[i].legend()
        ax[i].set_ylabel('autocorr')
    
    ax[-1].set_xlabel('lags')
    

    giving

    enter image description here