I notice unexpected results when applying index filtering to a numpy array (b[b < 3] = 0
). Any variable that has been assigned from or to the variable that is being filtered will have the same filter applied i.e. if b = a
, a
will be filtered the same filter as b
.
I have created a test file to see what variables get affected when index filtering is applied to a variable. I have ran the code below with results given verse what I expected to get.
import numpy as np
class tester1(object):
def __init__(self):
self.a = np.array([[1, 2, 3], [4, 5, 6]])
self.b = []
self.c = []
self.d = []
def test1(self):
self.b = self.a
self.c = self.b
self.d = self.c
d = self.d
e = d
d[d < 3] = 0
print('self.a')
print(self.a)
print('self.b')
print(self.b)
print('self.c')
print(self.c)
print('d')
print(d)
print('e')
print(e)
class tester2(object):
def __init__(self):
self.d = np.array([[1, 2, 3], [4, 5, 6]])
self.e = []
self.t = tester1()
self.t.test1()
def test2(self):
self.t.b = self.d
self.t.c = self.t.b
self.e = self.t.b
self.t.b[self.t.b < 3] = 0
print('self.t.b')
print(self.t.b)
print('self.t.c')
print(self.t.c)
print('self.d')
print(self.d)
print('self.e')
print(self.e)
def test3(self):
print('self.d')
print(self.d)
self.e = self.d
a = np.array([[False, False, False], [False, True, True]])
f = self.d
f[a] = 0
print('self.d')
print(self.d)
print('self.e')
print(self.e)
print('f')
print(f)
def test4(self):
a = self.t.a
b = a
c = b
c[c > 4] = 2
print('self.t.a')
print(self.t.a)
print('b')
print(b)
print('c')
print(c)
The class produced the results on the top and the results I expect are on the bottom.
when I run t = tester2()
self.a [[0 0 3] [4 5 6]] # Actual
self.a [[1 2 3] [4 5 6]] # Expected
self.b [[0 0 3] [4 5 6]]
self.b [[1 2 3] [4 5 6]]
self.c [[0 0 3] [4 5 6]]
self.c [[1 2 3] [4 5 6]]
d [[0 0 3] [4 5 6]]
d [[0 0 3] [4 5 6]]
e [[0 0 3] [4 5 6]]
e [[1 2 3] [4 5 6]]
When I run t.test2()
self.t.b [[0 0 3] [4 5 6]] # Actual
self.t.b [[0 0 3] [4 5 6]] # Expected
self.t.c [[0 0 3] [4 5 6]]
self.t.c [[1 2 3] [4 5 6]]
self.d [[0 0 3] [4 5 6]]
self.d [[1 2 3] [4 5 6]]
self.e [[0 0 3] [4 5 6]]
self.e [[1 2 3] [4 5 6]]
When I run t.test3()
self.d [[0 0 3] [4 5 6]] # Actual
self.d [[1 2 3] [4 5 6]] # Expected
self.d [[0 0 3] [4 0 0]]
self.d [[1 2 3] [4 5 6]]
self.e [[0 0 3] [4 0 0]]
self.e [[1 2 3] [4 5 6]]
f [[0 0 3] [4 0 0]]
f [[1 2 3] [4 0 0]]
When I run t.test4()
self.t.a [[0 0 3] [4 2 2]] # Actual
self.t.a [[1 2 3] [4 5 6]] # Expected
b [[0 0 3] [4 2 2]]
b [[1 2 3] [4 5 6]]
c [[0 0 3] [4 2 2]]
c [[1 2 3] [4 2 2]]
It happens because you assign variables a,b,c and d to the same array. Think of the variables as access to this array. If you apply filtering to this array. Then it will affect all variables as they are pointing to this same array. If you want to seperate array based on this one you can use copy method like arr_b = arr_a.copy() or arr_b = arr_a[:].