Search code examples
pythontensorflowtensorloss-functionpde

Can I convert gradient to scalar within the loss function in tensorflow?


This is my first question on stackoverflow. If I have formatted it wrong, please tell me ! Thank you so much!

So I am trying to maximize a function (first line below) enter image description here

I have attached my code of my loss function :


def loss(model, y_interior, S_interior, y_terminal, S_terminal):
    ''' Compute total loss for training.

    Args:
        model:      DGM model object
        t_interior: sampled time points in the interior of the function's domain
        S_interior: sampled space points in the interior of the function's domain
        t_terminal: sampled time points at terminal point (vector of terminal times)
        S_terminal: sampled space points at terminal time
    ''' 

    # Loss term #1: PDE
    # compute function value and derivatives at current sampled points
    W = model(y_interior, S_interior)
    W_s = tf.gradients(W, S_interior)[0]
    W_y = tf.gradients(W, y_interior)[0]
    W_yy = tf.gradients(W, y_interior)[0]


    def f(params):
        # print(params)  # <-- you'll see that params is a NumPy array
        v_1, v_2 = params # <-- for readability you may wish to assign names to the component variables

    return -((v_1*y_interior*sigma)**2/2*W_yy+((1-v_1)*y_interior*r+v_1*y_interior*alpha)*W_y+np.math.exp(-p*S_interior)*v_2**gamma)



    initial_guess = [1, 1]
    result = optimize.minimize(f, initial_guess)

    if result.success:
        fitted_params = result.x
        fitted_value = result.fun
        print(fitted_params)
    else:
        raise ValueError(result.message)
    # compute average L2-norm of differential operator
    L1 = tf.reduce_mean(tf.square(fitted_value)) 
    L2 = tf.reduce_mean(tf.square(W_s))

    # Loss term #2: boundary condition
        # no boundary condition for this problem

    # Loss term #3: initial/terminal condition
    target_payoff = tf.nn.relu(S_terminal - 100)
    fitted_payoff = model(y_terminal, S_terminal)

    L3 = tf.reduce_mean( tf.square(fitted_payoff - target_payoff) )

    return L1, L2, L3

But the error message is : TypeError: must be real number, not Tensor

--------------------------------------------------------------------------- TypeError                                 Traceback (most recent call last) <ipython-input-11-8c310392196a> in <module>
    156 S_terminal_tnsr = tf.placeholder(tf.float32, [None,1])
    157 # loss
--> 158 L1_tnsr, L2_tnsr, L3_tnsr = loss(model, y_interior_tnsr, S_interior_tnsr, y_terminal_tnsr, S_terminal_tnsr)
    159 loss_tnsr = L1_tnsr +L2_tnsr
    160 

<ipython-input-11-8c310392196a> in loss(model, y_interior, S_interior, y_terminal, S_terminal)
    109 
    110     initial_guess = [1, 1]
--> 111     result = optimize.minimize(f, initial_guess)
    112 
    113     if result.success:

~\Anaconda3\lib\site-packages\scipy\optimize\_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
    602         return _minimize_cg(fun, x0, args, jac, callback, **options)
    603     elif meth == 'bfgs':
--> 604         return _minimize_bfgs(fun, x0, args, jac, callback, **options)
    605     elif meth == 'newton-cg':
    606         return _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback,

~\Anaconda3\lib\site-packages\scipy\optimize\optimize.py in
_minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)    1001     func_calls, f = wrap_function(f, args)    1002 
-> 1003     old_fval = f(x0)    1004     1005     if fprime is None:

~\Anaconda3\lib\site-packages\scipy\optimize\optimize.py in function_wrapper(*wrapper_args)
    325     def function_wrapper(*wrapper_args):
    326         ncalls[0] += 1
--> 327         return function(*(wrapper_args + args))
    328 
    329     return ncalls, function_wrapper

<ipython-input-11-8c310392196a> in f(params)
    104         v_1, v_2 = params # <-- for readability you may wish to assign names to the component variables
    105 
--> 106         return -((v_1*y_interior*sigma)**2/2*W_yy+((1-v_1)*y_interior*r+v_1*y_interior*alpha)*W_y+np.math.exp(-p*S_interior)*v_2**gamma)
    107 
    108

I think the problem is when I maximize the function, the gradients in function f, are still tensor, not scalar. Let me know if you need more information. Thank you so much !


Solution

  • The issue here seems to be that

    
    def f(params):
        # print(params)  # <-- you'll see that params is a NumPy array
        v_1, v_2 = params
    
        return -((v_1*y_interior*sigma)**2/2*W_yy\
            +((1-v_1)*y_interior*r\
            +v_1*y_interior*alpha)*W_y+np.math.exp(-p*S_interior)*v_2**gamma)
    

    return is a Tensor but the expectation from the caller is that a scalar/int is returned. Which is what you've suspected.

    The numpy array are being casted to Tensors before the multiplication happens and the result is a tensor.

    Try this to see it in action

    
    tensor = tf.constant(10)
    np_type = np.array([1])
    
    >>> <tf.Tensor: shape=(1,), dtype=int32, numpy=array([100], dtype=int32)>
    

    This should do the trick if you're using tf 2.0 which it seems like you are - just cast to a NumPy type with np.array()

    
    def f(params):
    # print(params)  # <-- you'll see that params is a NumPy array
        v_1, v_2 = params # 
    
        tensor_loss = -((v_1*y_interior*sigma)**2/2*W_yy\ 
           +((1-v_1)*y_interior*r+v_1*y_interior*alpha)*W_y\
           +np.math.exp(-p*S_interior)*v_2**gamma)
        return np.array(tensor_loss)[0]