"""Script for Gaussian process regression of a 2D function."""
# Imports
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
from sklearn.model_selection import train_test_split
np.random.seed(17) # So you can reproduce these results at home
# Lay out the test function
nv = 20 # Number of points in each direction
xv = np.linspace(-5.0, 5.0, nv) # x-vector
yv = np.linspace(-5.0, 5.0, nv) # y-vector
x, y = np.meshgrid(xv, yv) # x and y are (nv, nv) matrices
z = (x - 3.0) ** 2.0 + 2.0 * x * y + (2.0 * y + 3.0) ** 2.0 - 3.0
# Add noise
noise_level = 5.0
z_noisy = z + np.random.normal(size=x.shape) * noise_level
# Convert to column vectors for scikit-learn
X = np.column_stack((x.reshape(-1), y.reshape(-1)))
Z = z_noisy.reshape(-1, 1)
# Set up the kernel:
# Radial basis function with two length scales for smooth variations
# and white kernel to account for noise
guess_l = (2.0, 1.0) # In general, x and y have different scales
bounds_l = ((1e-1, 100.0),) * 2 # Same bounds for x and y
guess_n = 1.0 # Amount of noise
bounds_n = (1e-20, 10.0) # Bounds for noise
kernel = RBF( # Kernel objects can simply be summed using +
length_scale=guess_l, length_scale_bounds=bounds_l
) + WhiteKernel(noise_level=guess_n, noise_level_bounds=bounds_n)
# Fit to 5% of the data
X_train, X_test, Y_train, Y_test = train_test_split(X, Z, test_size=0.95)
gpr = GaussianProcessRegressor(kernel, normalize_y=True)
gpr.fit(X_train, Y_train)
print(gpr.kernel_)
# RBF(length_scale=[7.86, 3.71]) + WhiteKernel(noise_level=0.00678)
# Predict and reshape back to grid for plotting
Zfit, Zstd = gpr.predict(X, return_std=True)
zstd = Zstd.reshape(x.shape)
zfit = Zfit.reshape(x.shape)
# Plot posterior mean
# Set up figure
fig, ax = plt.subplots(figsize=(6.,4.))
ax.set_xlabel("$x$")
ax.set_ylabel("$y$")
ax.set_xlim((-5, 5))
ax.set_ylim((-5, 5))
ax.set_aspect("equal")
ax.set_xticks((-5, 0, 5))
ax.set_yticks((-5, 0, 5))
ax.grid(False)
# Do the plotting
lev = np.linspace(0.0, 250.0, 6)
ax.contour(x, y, z, lev, colors="k") # Truth
ax.plot(*X_train.T, "o", color="C1") # Training samples
ax.contour(x, y, zfit, lev, colors="C0", linestyles="dashed") # Posterior mean
# Legend
truth_line = mlines.Line2D([], [], color="black", label="True $z(x,y)$")
sample_line = mlines.Line2D(
[], [], color="C1", marker="o", linestyle="none", label="Train samples"
)
mean_line = mlines.Line2D([], [], color="C0", linestyle="--", label="Posterior mean")
ax.legend(
handles=[truth_line, sample_line, mean_line],
bbox_to_anchor=(1.05, 1),
loc="upper left",
)
# Write out
plt.tight_layout()
plt.savefig("gpr_posterior_mean.svg")
# Plot posterior standard deviation
# Set up figure
fig, ax = plt.subplots(figsize=(6.,4.))
ax.set_xlabel("$x$")
ax.set_ylabel("$y$")
ax.set_xlim((-5, 5))
ax.set_ylim((-5, 5))
ax.set_aspect("equal")
ax.set_xticks((-5, 0, 5))
ax.set_yticks((-5, 0, 5))
ax.grid(False)
# Do the plotting
ax.plot(*X_train.T, "o", color="C1") # Training samples
lev = np.linspace(6.0, 16.0, 11)
hc = ax.contourf(x, y, zstd, lev) # Posterior std
for hci in hc.collections:
hci.set_edgecolor("face")
# Colorbar
hcb = plt.colorbar(hc)
hcb.ax.grid(False)
hcb.set_label("Posterior standard deviation")
# Write out
plt.tight_layout()
plt.savefig("gpr_posterior_std.svg")