Search code examples
pythonfilterfft

Fourier transform and filter given data set


Overall, I want to calculate a fourier transform of a given data set and filter out some of the frequencies with the biggest absolute values. So:

1) Given a data array D with accompanying times t, 2) find the k biggest fourier coefficients and 3) remove those coefficients from the data, in order to filter out certain signals from the original data.

Something goes wrong in the end when plotting the filtered data set over the given times. I'm not exactly sure, where the error is. The final 'filtered data' plot doesn't look even slightly smoothened and somehow changes its position compared with the original data. Is my code completely bad?

Part 1):

n = 1000
limit_low = 0
limit_high = 0.48
D = np.random.normal(0, 0.5, n) + np.abs(np.random.normal(0, 2, n) * np.sin(np.linspace(0, 3*np.pi, n))) + np.sin(np.linspace(0, 5*np.pi, n))**2 + np.sin(np.linspace(1, 6*np.pi, n))**2

scaling = (limit_high - limit_low) / (max(D) - min(D))
D = D * scaling
D = D + (limit_low - min(D))                       # given data

t = linspace(0,D.size-1,D.size)                    # times


Part 2):

from numpy import linspace                                    
import numpy as np
from scipy import fft, ifft

D_f = fft.fft(D)         # fft of D-dataset

#---extract the k biggest coefficients out of D_f ---

k = 15
I, bigvals = [], []                                        
for i in np.argsort(-D_f):                
    if D_f[i] not in bigvals:            
        bigvals.append(D_f[i])          
        I.append(i)
        if len(I) == k:
            break

bigcofs = np.zeros(len(D_f))             
bigcofs[I] = D_f[I]                      # array with only zeros in in except for the k maximal coefficients

Part 3):

D_filter = fft.ifft(bigcofs)
D_new = D - D_filter

p1=plt.plot(t,D,'r')
p2=plt.plot(t,D_new,'b');
plt.legend((p1[0], p2[0]), ('original data', 'filtered data'))

I appreciate your help, thanks in advance.


Solution

  • There were two issues I noticed:

    1. you likely want the components with largest absolute value, so np.argsort(-np.abs(D_f)) instead of np.argsort(-D_f).

    2. More subtly: bigcofs = np.zeros(len(D_f)) is of type float64 and was discarding the imaginary part at the line bigcofs[I] = D_f[I]. You can fix this with bigcofs = np.zeros(len(D_f), dtype=complex)

    I've improved your code slightly below to get desired results:

    import numpy as np
    from scipy import fft, ifft
    import matplotlib.pyplot as plt
    
    n = 1000
    limit_low = 0
    limit_high = 0.48
    N_THRESH = 10
    
    D = 0.5*np.random.normal(0, 0.5, n) + 0.5*np.abs(np.random.normal(0, 2, n) * np.sin(np.linspace(0, 3*np.pi, n))) + np.sin(np.linspace(0, 5*np.pi, n))**2 + np.sin(np.linspace(1, 6*np.pi, n))**2
    
    scaling = (limit_high - limit_low) / (max(D) - min(D))
    D = D * scaling
    D = D + (limit_low - min(D))                       # given data
    t = np.linspace(0,D.size-1,D.size)                    # times
    
    # transformed data
    D_fft = fft.fft(D)
    
    # Create boolean mask for N largest indices
    idx_sorted = np.argsort(-np.abs(D_fft))
    idx = idx_sorted[0:N_THRESH]
    mask = np.zeros(D_fft.shape).astype(bool)
    mask[idx] = True
    
    # Split fft above, below N_THRESH points:
    D_below = D_fft.copy()
    D_below[mask] = 0
    D_above = D_fft.copy() 
    D_above[~mask] = 0
    
    #inverse separated functions
    D_above = fft.ifft(D_above)
    D_below = fft.ifft(D_below)
    
    # plot
    plt.ion()
    f, (ax1, ax2, ax3) = plt.subplots(3,1)
    l1, = ax1.plot(t, D, c="r", label="original")
    l2, = ax2.plot(t, D_above, c="g", label="top {} coeff. signal".format(N_THRESH))
    l3, = ax3.plot(t, D_below, c="b", label="remaining signal")
    f.legend(handles=[l1,l2,l3])
    plt.show()
    

    enter image description here