Source code for bask.utils

import collections

import numpy as np
from scipy.spatial.distance import cdist, euclidean
from scipy.stats import halfnorm
from skopt.learning.gaussian_process.kernels import ConstantKernel, Matern

# We import r2_sequence here for backwards compatibility reasons:
from bask.init import r2_sequence
from bask.priors import make_roundflat

__all__ = [
    "geometric_median",
    "r2_sequence",
    "guess_priors",
    "construct_default_kernel",
    "validate_zeroone",
]


[docs]def geometric_median(X, eps=1e-5):
    """Compute the geometric median for the given array of points.

    The geometric median is the point minimizing the euclidean (L2) distance
    to all points.


    Parameters
    ----------
    X : numpy array
        (n_points, n_dim)
    eps : float
        Stop the computation if the euclidean distance of the last two
        computed points is smaller than eps

    Returns
    -------

    """
    y = np.mean(X, 0)

    while True:
        D = cdist(X, [y])
        nonzeros = (D != 0)[:, 0]

        Dinv = 1 / D[nonzeros]
        Dinvs = np.sum(Dinv)
        W = Dinv / Dinvs
        T = np.sum(W * X[nonzeros], 0)

        num_zeros = len(X) - np.sum(nonzeros)
        if num_zeros == 0:
            y1 = T
        elif num_zeros == len(X):
            return y
        else:
            R = (T - y) * Dinvs
            r = np.linalg.norm(R)
            rinv = 0 if r == 0 else num_zeros / r
            y1 = max(0, 1 - rinv) * T + min(1, rinv) * y

        if euclidean(y, y1) < eps:
            return y1

        y = y1


def _recursive_priors(kernel, prior_list):
    if hasattr(kernel, "kernel"):  # Unary operations
        _recursive_priors(kernel.kernel, prior_list)
    elif hasattr(kernel, "k1"):  # Binary operations
        _recursive_priors(kernel.k1, prior_list)
        _recursive_priors(kernel.k2, prior_list)
    elif hasattr(kernel, "kernels"):  # CompoundKernel
        # It seems that the skopt kernels are not compatible with the
        # CompoundKernel. This is therefore not officially supported.
        for k in kernel.kernels:
            _recursive_priors(k, prior_list)
    else:
        name = type(kernel).__name__
        if name in ["ConstantKernel", "WhiteKernel"]:
            if name == "ConstantKernel" and kernel.constant_value_bounds == "fixed":
                return
            if name == "WhiteKernel" and kernel.noise_level_bounds == "fixed":
                return
            # We use a half-normal prior distribution on the signal variance and
            # noise. The input x is sampled in log-space, which is why the
            # change of variables is necessary.
            # This prior assumes that the function values are standardized.
            # Note, that we do not know the structure of the kernel, which is
            # why this is just only a best guess.
            prior_list.append(
                lambda x: halfnorm(scale=2.0).logpdf(np.sqrt(np.exp(x)))
                + x / 2.0
                - np.log(2.0),
            )
        elif name in ["Matern", "RBF"]:
            # Here we apply a round-flat prior distribution to any lengthscale
            # parameter we find. We assume the input variables are normalized
            # to lie in [0, 1].
            # For common optimization problems, we expect the lengthscales to
            # lie in the range [0.1, 0.6]. The round-flat prior allows values
            # outside the range, if supported by enough datapoints.
            if isinstance(kernel.length_scale, (collections.Sequence, np.ndarray)):
                n_priors = len(kernel.length_scale)
            else:
                n_priors = 1
            roundflat = make_roundflat(
                lower_bound=0.1,
                upper_bound=0.6,
                lower_steepness=2.0,
                upper_steepness=8.0,
            )
            for _ in range(n_priors):
                prior_list.append(lambda x: roundflat(np.exp(x)) + x)
        else:
            raise NotImplementedError(
                f"Unable to guess priors for this kernel: {kernel}."
            )


[docs]def construct_default_kernel(dimensions):
    """Construct a Matern kernel as default kernel to be used in the optimizer.

    Parameters
    ----------
    dimensions : list of dimensions
        Elements are skopt.space.Dimension instances (Real, Integer
        or Categorical) or any other valid value that defines skopt
        dimension (see skopt.Optimizer docs)

    Returns
    -------
    kernel : kernel object
        The kernel specifying the covariance function of the GP used in the
        optimization.
    """
    n_parameters = len(dimensions)
    kernel = ConstantKernel(
        constant_value=1.0, constant_value_bounds=(0.1, 2.0)
    ) * Matern(
        length_scale=[0.3] * n_parameters, length_scale_bounds=(0.2, 0.5), nu=2.5
    )
    return kernel


[docs]def guess_priors(kernel):
    """Guess suitable priors for the hyperparameters of a given kernel.

    This function recursively explores the given (composite) kernel and
    adds suitable priors each encountered hyperparameter.

    Here we use a half-Normal(0, 2.0) prior for all ConstantKernels and
    WhiteKernels, and an round-flat(0.1, 0.6) prior for all lengthscales.
    Change of variables is applied, since inference is done in log-space.

    Parameters
    ----------
    kernel : Kernel object.
        Can be a single kernel (e.g. Matern), a Product or Sum kernel, or a
        CompoundKernel.

    Returns
    -------
    priors : list of functions.
        The function returns the list of priors in the same order as the vector
        theta provided by the kernel. Each prior evaluates the logpdf of its
        argument.
    """
    priors = []
    _recursive_priors(kernel, priors)
    return priors


class _NoOpPBar:
    """This class implements the progress bar interface but does nothing"""

    def __init__(self):
        pass

    def __enter__(self, *args, **kwargs):
        return self

    def __exit__(self, *args, **kwargs):
        pass

    def update(self, count):
        pass


def get_progress_bar(display, total):
    """Get a progress bar interface with given properties
    If the tqdm library is not installed, this will always return a "progress
    bar" that does nothing.
    Args:
        display (bool or str): Should the bar actually show the progress? Or a
                               string to indicate which tqdm bar to use.
        total (int): The total size of the progress bar.
    """
    if display is True:
        return tqdm.tqdm(total=total)  # noqa: F821
    return _NoOpPBar()


def validate_zeroone(arr):
    """Check if all entries of the input are between 0 and 1.

    Parameters
    ----------
    X : ndarray
        Array containing arbitrary values.

    Raises
    ------
    ValueError
        If the values of the array are not between 0 and 1 (inclusive).
    """
    if not isinstance(arr, np.ndarray):
        arr = np.array(arr)
    if np.any(arr < 0) or np.any(arr > 1):
        raise ValueError("Not all values of the array are between 0 and 1.")
Source code for bask.utils

Bayes-skopt

Navigation

Related Topics