Search code examples
pythonarraysdictionarynesteddefaultdict

Nested defaultdict - Error when updating value


I have a dictionary named dQalpha and another one named dQbeta which calculate the experience of a worker dQalpha[worker] and the difficulty of an item dQbeta[example] respectively.

I now want to add a new metric named dQgamma that calculates the correlation of worker and item, by using dQgamma[worker][example] which is a nested defaultdict.

However, if I say self.dQgamma=defaultdict(lambda: defaultdict(dict)), I get the error message

TypeError: float() argument must be a string or a number

If I say self.dQgamma=defaultdict(lambda: defaultdict(list)), I get this error message

ValueError: setting an array element with a sequence.

Can someone help? here's the code:

self.dQalpha={}
self.dQbeta={}
self.dQgamma=defaultdict(lambda: defaultdict(dict))


der = np.zeros_like(x)
i = 0
for worker in self.workers:
    der[i] = -self.dQalpha[worker] 
    i = i + 1
for example in self.examples:
    der[i] = -self.dQbeta[example] 
    i = i + 1
for worker in self.workers:
    for example in self.examples:
        der[i] = self.dQgamma[worker][example] #VALUE ERROR HERE
        i = i + 1

return der

UPDATE

If I say self.dQgamma=defaultdict(lambda: defaultdict(der.dtype)) , I get

NameError: global name 'der' is not defined

EDIT

  def gradientQ(self, dtype):

        self.optimize_df(x)
        self.dQalpha={}
        self.dQbeta={}
        self.dQgamma=defaultdict(lambda: defaultdict(x.dtype)) 
        #ERROR TypeError: first argument must be callable

        for example, worker_label_set in self.e2wl.items():
            dQb = 0
            for (worker, label) in worker_label_set:
                for tlabel in self.prior.keys():
                    sigma = self.sigmoid(self.alpha[worker]*self.expbeta(self.beta[example]))
                    delta = self.kronecker_delta(label,tlabel)
                    dQb = dQb + self.e2lpd[example][tlabel]*(delta-sigma)*self.alpha[worker]*self.expbeta(self.beta[example])\
                          *self.expgamma(self.gamma[worker][example])
            self.dQbeta[example] = dQb - (self.beta[example] - self.priorbeta[example])

        for worker, example_label_set in self.w2el.items():
            dQa = 0
            for (example, label) in example_label_set:
                for tlabel in self.prior.keys():
                    sigma = self.sigmoid(self.alpha[worker]*self.expbeta(self.beta[example]))
                    delta = self.kronecker_delta(label,tlabel)
                    dQa = dQa + self.e2lpd[example][tlabel]*(delta-sigma)*self.expbeta(self.beta[example])\
                          *self.expgamma(self.gamma[worker][example])

            self.dQalpha[worker] = dQa - (self.alpha[worker] - self.prioralpha[worker])


        for worker, example_label_set in self.w2el.items():
            for example, worker_label_set in self.e2wl.items():
                dQg = 0
                for tlabel in self.prior.keys():
                    sigma = self.sigmoid(self.alpha[worker]*self.expbeta(self.beta[example])*\
                                         self.expgamma(self.gamma[worker][example]))
                    delta = self.kronecker_delta(label, tlabel)
                    dQg = dQg + self.e2lpd[example][tlabel]*(delta-sigma)*self.alpha[worker]*self.expbeta(self.beta[example])\
                          *self.expgamma(self.gamma[worker][example])

            self.dQgamma[worker][example] = dQg - (self.gamma[worker][example] - self.priorgamma[worker][example])

def optimize_df(self,x):
    # unpack x
    i=0
    for worker in self.workers:
        self.alpha[worker] = x[i]
        i = i + 1

    for example in self.examples:
        self.beta[example] = x[i]
        i = i + 1

    for worker in self.workers:
        for example in self.examples:
            self.gamma[worker][example] = x[i]
            i = i + 1


    self.gradientQ(x.dtype)

    # pack x
    der = np.zeros_like(x)
    i = 0
    for worker in self.workers:
        der[i] = -self.dQalpha[worker] #Flip the sign since we want to minimize
        i = i + 1
    for example in self.examples:
        der[i] = -self.dQbeta[example] #Flip the sign since we want to minimize
        i = i + 1
    for worker in self.workers:
        for example in self.examples:
            der[i]= self.dQgamma[worker][example] #Flip the sign since we want to minimize #TODO: fix
            i = i + 1
    return der

Solution

  • The value returned by self.dQgamma[worker][example] is either a dictionary or a list (depending on how you declare it).

    You try to affect it to a numpy array expecting scalars. That's why you have an error.

    You should declare dQgamma to make it returns a compatible value for your array:

    self.dQgamma=defaultdict(lambda: defaultdict(der.dtype.type))
    

    Edit

    After all the comments below, I update my answer.

    First, actually a numpy.dtype object is not callable, you have to retrieve its type attribute which is callable. So I edited the code block above to match the right syntax.

    Then, here is a complete example on how to be able to use the type of your array inside your function (I changed some namings to match PEP8 conventions.

    from collections import defaultdict
    
    import numpy as np
    
    class MyClass:
        def gradient(self, dtype):
            self.d_qgamma=defaultdict(lambda: defaultdict(dtype.type))
    
            print("Unset first level value:", self.d_qgamma[0])
            print("Unset second level value:", self.d_qgamma[0][0])
    
            self.d_qgamma['a'] = defaultdict(dtype.type, {'z': dtype.type(42)})
            print("Set first level value:", self.d_qgamma['a'])
    
            self.d_qgamma['b']['a'] = dtype.type("42")
            print("Set second level value:", self.d_qgamma['b']['a'])
    
            print("d_qgamma:", self.d_qgamma)
    
        def optimize_df(self, x):
            self.gradient(x.dtype)
    
            der = np.zeros_like(x)
            der[0] = self.d_qgamma['b']['a']
            return der
    
    if __name__ == '__main__':
        print("der:", MyClass().optimize_df(np.zeros(4, np.float32)))
    
    Unset first level value: defaultdict(<class 'numpy.float32'>, {})
    Unset second level value: 0.0
    Set first level value: defaultdict(<class 'numpy.float32'>, {'z': 42.0})
    Set second level value: 42.0
    d_qgamma: defaultdict(<function MyClass.gradient.<locals>.<lambda> at 0x7fcfd1663050>, {0: defaultdict(<class 'numpy.float32'>, {0: 0.0}), 'a': defaultdict(<class 'numpy.float32'>, {'z': 42.0}), 'b': defaultdict(<class 'numpy.float32'>, {'a': 42.0})})
    der: [42.  0.  0.  0.]
    

    As you can see, you pass x.dtype of type numpy.dtype to your gradient function. Then, you can use this dtype object and retrieve its type attribute, which is callable, to:

    • pass it to your defaultdict constructor

      self.d_qgamma=defaultdict(lambda: defaultdict(dtype.type))

    • cast any value that you would like to store

      self.d_qgamma['b']['a'] = dtype.type("42")

      Here the string "42" is converted to float with value 42.0.