Search code examples
pythongpflowgaussian-process

GPflow 2: VGP model with MOK and multiple-input throws ValueError


I'm following the multi-output kernel notebook of GPflow 2.5.2 documentation. I try to replace the SVGP model by either VGP or GPR model because I have only little data and do not need the sparse aspect.

I'm using the SharedIndependent multi-output kernel.

For both of the models I get ValueErrors that the dimensions in matrix multiplication are incorrect. I guess I need to change the format of the input data but I don't know how so I just use the same format as for the SVGP model.

Error message for VGP model:

ValueError: Dimensions must be equal, but are 2 and 100 for '{{node MatMul}} = BatchMatMulV2[T=DT_DOUBLE, adj_x=false, adj_y=false](Cholesky, MatMul/identity_CONSTRUCTED_AT_top_level/forward/ReadVariableOp)' with input shapes: [100,2,2], [100,2].

Error message for GPR model:

ValueError: Dimensions must be equal, but are 2 and 100 for '{{node triangular_solve/MatrixTriangularSolve}} = MatrixTriangularSolve[T=DT_DOUBLE, adjoint=false, lower=true](Cholesky, sub)' with input shapes: [100,2,2], [100,2].

I already tried setting the q_mu and q_sqrt values as suggested here after initializing the VGP model like this (didn't work):

m.q_mu = np.zeros((len(x_train)*len(y_train.T), 1), dtype=gpflow.config.default_float())
m.q_sqrt = np.expand_dims(np.identity(len(x_train)*len(y_train.T), dtype=gpflow.config.default_float()), axis=0)

The code looks as follows:

import gpflow as gpf    

def generate_data(N=100):
    X1 = np.random.rand(N, 1)

    Y1 = np.sin(6 * X1) + np.random.randn(*X1.shape) * 0.03 + 2
    Y2 = np.sin(5 * X1 + 0.7) + np.random.randn(*X1.shape) * 0.1 + 0.5

    return X1, np.concatenate((Y1, Y2), axis=1)

N=100
M=15
P=2

data = (X, Y) = generate_data(N)

# create multi-output kernel
kernel = gpf.kernels.SharedIndependent(
    gpf.kernels.Matern52(active_dims=list(range(X.shape[1]))), output_dim=P
)

# initialization of inducing input locations (M random points from the training inputs)
Zinit = np.linspace(0, 1, M)[:, None]
Z = Zinit.copy()
# create multi-output inducing variables from Z
iv = gpf.inducing_variables.SharedIndependentInducingVariables(
    gpf.inducing_variables.InducingPoints(Z)
)

m = gpf.models.SVGP(kernel, gpf.likelihoods.Gaussian(), inducing_variable=iv, num_latent_gps=P)

optimizer = gpf.optimizers.Scipy()
optimizer.minimize(
    m.training_loss_closure(data),
    variables=m.trainable_variables,
    method="l-bfgs-b",
    options={"iprint": 0, "maxiter": ci_niter(2000)},
)

#  implementation of VGP

m = gpf.models.VGP(data, kernel, gpf.likelihoods.Gaussian(), num_latent_gps=P)
optimizer = gpf.optimizers.Scipy()
optimizer.minimize(
    m.training_loss,
    variables=m.trainable_variables,
    method="l-bfgs-b",
    options={"iprint": 0, "maxiter": ci_niter(2000)},
)

## implementation og gpflow.models.GPR

m = gpf.models.GPR(data, kernel)
optimizer = gpf.optimizers.Scipy()
optimizer.minimize(
    m.training_loss,
    variables=m.trainable_variables,
    method="l-bfgs-b",
    options={"iprint": 0, "maxiter": ci_niter(2000)},
)

Solution

  • Unfortunately, it is currently only the SVGP that supports the multi-output kernels.

    Supporting it in more models is a common request, but it is a surprisingly large amount of work, so it has never gotten done.

    The good news is that the simpler models do support just broadcasting a single kernel over multiple dimensions, which is exactly the same as the SharedIndependent MOK. Just drop in a single-output kernel and it will broadcast. For example:

    import gpflow as gpf
    import numpy as np
    
    D = 1
    P = 2
    
    
    def generate_data(N=100):
        X1 = np.random.rand(N, D)
    
        Y1 = np.sin(6 * X1) + np.random.randn(*X1.shape) * 0.03 + 2
        Y2 = np.sin(5 * X1 + 0.7) + np.random.randn(*X1.shape) * 0.1 + 0.5
    
        return X1, np.concatenate((Y1, Y2), axis=1)
    
    
    N = 100
    M = 15
    
    data = generate_data(N)
    train_data = (data[0][:70], data[1][:70])
    test_data = (data[0][70:], data[1][70:])
    
    # create multi-output kernel
    kernel = gpf.kernels.SharedIndependent(gpf.kernels.Matern52(), output_dim=P)
    
    # initialization of inducing input locations (M random points from the training inputs)
    Zinit = np.linspace(0, 1, M)[:, None]
    Z = Zinit.copy()
    # create multi-output inducing variables from Z
    iv = gpf.inducing_variables.SharedIndependentInducingVariables(
        gpf.inducing_variables.InducingPoints(Z)
    )
    
    m = gpf.models.SVGP(
        kernel, gpf.likelihoods.Gaussian(), inducing_variable=iv, num_latent_gps=P
    )
    
    optimizer = gpf.optimizers.Scipy()
    optimizer.minimize(
        m.training_loss_closure(train_data),
        variables=m.trainable_variables,
        method="l-bfgs-b",
        options={"iprint": 0, "maxiter": 2000},
    )
    print("svgp", np.mean(m.predict_log_density(test_data)))
    
    #  implementation of VGP
    
    m = gpf.models.VGP(
        train_data, gpf.kernels.Matern52(), gpf.likelihoods.Gaussian(), num_latent_gps=P
    )
    optimizer = gpf.optimizers.Scipy()
    optimizer.minimize(
        m.training_loss,
        variables=m.trainable_variables,
        method="l-bfgs-b",
        options={"iprint": 0, "maxiter": 2000},
    )
    print("vgp", np.mean(m.predict_log_density(test_data)))
    
    ## implementation og gpflow.models.GPR
    
    m = gpf.models.GPR(
        train_data,
        gpf.kernels.Matern52(),
    )
    optimizer = gpf.optimizers.Scipy()
    optimizer.minimize(
        m.training_loss,
        variables=m.trainable_variables,
        method="l-bfgs-b",
        options={"iprint": 0, "maxiter": 2000},
    )
    print("gpr", np.mean(m.predict_log_density(test_data)))