Fourier transform and filter given data set

Overall, I want to calculate a fourier transform of a given data set and filter out some of the frequencies with the biggest absolute values. So:

1) Given a data array D with accompanying times t, 2) find the k biggest fourier coefficients and 3) remove those coefficients from the data, in order to filter out certain signals from the original data.

Something goes wrong in the end when plotting the filtered data set over the given times. I'm not exactly sure, where the error is. The final 'filtered data' plot doesn't look even slightly smoothened and somehow changes its position compared with the original data. Is my code completely bad?

Part 1):

n = 1000
limit_low = 0
limit_high = 0.48
D = np.random.normal(0, 0.5, n) + np.abs(np.random.normal(0, 2, n) * np.sin(np.linspace(0, 3*np.pi, n))) + np.sin(np.linspace(0, 5*np.pi, n))**2 + np.sin(np.linspace(1, 6*np.pi, n))**2

scaling = (limit_high - limit_low) / (max(D) - min(D))
D = D * scaling
D = D + (limit_low - min(D))                       # given data

t = linspace(0,D.size-1,D.size)                    # times

Part 2):

from numpy import linspace                                    
import numpy as np
from scipy import fft, ifft

D_f = fft.fft(D)         # fft of D-dataset

#---extract the k biggest coefficients out of D_f ---

k = 15
I, bigvals = [], []                                        
for i in np.argsort(-D_f):                
    if D_f[i] not in bigvals:            
        bigvals.append(D_f[i])          
        I.append(i)
        if len(I) == k:
            break

bigcofs = np.zeros(len(D_f))             
bigcofs[I] = D_f[I]                      # array with only zeros in in except for the k maximal coefficients

Part 3):

D_filter = fft.ifft(bigcofs)
D_new = D - D_filter

p1=plt.plot(t,D,'r')
p2=plt.plot(t,D_new,'b');
plt.legend((p1[0], p2[0]), ('original data', 'filtered data'))

I appreciate your help, thanks in advance.

Solution

There were two issues I noticed:

you likely want the components with largest absolute value, so np.argsort(-np.abs(D_f)) instead of np.argsort(-D_f).
More subtly: bigcofs = np.zeros(len(D_f)) is of type float64 and was discarding the imaginary part at the line bigcofs[I] = D_f[I]. You can fix this with bigcofs = np.zeros(len(D_f), dtype=complex)

I've improved your code slightly below to get desired results:

import numpy as np
from scipy import fft, ifft
import matplotlib.pyplot as plt

n = 1000
limit_low = 0
limit_high = 0.48
N_THRESH = 10

D = 0.5*np.random.normal(0, 0.5, n) + 0.5*np.abs(np.random.normal(0, 2, n) * np.sin(np.linspace(0, 3*np.pi, n))) + np.sin(np.linspace(0, 5*np.pi, n))**2 + np.sin(np.linspace(1, 6*np.pi, n))**2

scaling = (limit_high - limit_low) / (max(D) - min(D))
D = D * scaling
D = D + (limit_low - min(D))                       # given data
t = np.linspace(0,D.size-1,D.size)                    # times

# transformed data
D_fft = fft.fft(D)

# Create boolean mask for N largest indices
idx_sorted = np.argsort(-np.abs(D_fft))
idx = idx_sorted[0:N_THRESH]
mask = np.zeros(D_fft.shape).astype(bool)
mask[idx] = True

# Split fft above, below N_THRESH points:
D_below = D_fft.copy()
D_below[mask] = 0
D_above = D_fft.copy() 
D_above[~mask] = 0

#inverse separated functions
D_above = fft.ifft(D_above)
D_below = fft.ifft(D_below)

# plot
plt.ion()
f, (ax1, ax2, ax3) = plt.subplots(3,1)
l1, = ax1.plot(t, D, c="r", label="original")
l2, = ax2.plot(t, D_above, c="g", label="top {} coeff. signal".format(N_THRESH))
l3, = ax3.plot(t, D_below, c="b", label="remaining signal")
f.legend(handles=[l1,l2,l3])
plt.show()