Source code for n3fit.scaler

from typing import Callable, Optional

import numpy as np
import numpy.typing as npt
from scipy.interpolate import PchipInterpolator



[docs]
def generate_scaler(
    input_list: list[npt.NDArray], interpolation_points: Optional[int] = None
) -> Callable:
    """
    Generate the scaler function that applies feature scaling to the input data.

    Parameters
    ----------
    input_list : list of numpy.ndarray
        The list of input data arrays.
    interpolation_points : int, optional

    Returns
    -------
    _scaler : Callable
        The scaler function that applies feature scaling to the input data.
    """
    input_arr = np.concatenate(input_list, axis=1)
    input_arr = np.sort(input_arr)
    input_arr_size = input_arr.size

    # Define an evenly spaced grid in the domain [0,1]
    # force_set_smallest is used to make sure the smallest point included in the scaling is 1e-9, to
    # prevent trouble when saving it to the LHAPDF grid
    force_set_smallest = input_arr.min() > 1e-9
    # if 1.0 is in the xgrid it should also be 1.0 in the output xgrid
    include_endpoint = 1.0 in input_arr
    if force_set_smallest:
        new_xgrid = np.linspace(
            start=1 / input_arr_size, stop=1.0, endpoint=include_endpoint, num=input_arr_size
        )
    else:
        new_xgrid = np.linspace(start=0, stop=1.0, endpoint=include_endpoint, num=input_arr_size)

    # When mapping the FK xgrids onto our new grid, we need to consider degeneracies among the x-values
    # in the FK grids
    unique, counts = np.unique(input_arr, return_counts=True)
    map_to = []
    for cumsum_ in np.cumsum(counts):
        # Make sure to include the smallest new_xgrid value, such that we have a point at
        # x<=1e-9
        map_to.append(new_xgrid[cumsum_ - counts[0]])
    map_to = np.array(map_to)
    map_from = unique

    #  If needed, set feature_scaling(x=1e-9)=0
    if force_set_smallest:
        map_from = np.insert(map_from, 0, 1e-9)
        map_to = np.insert(map_to, 0, 0.0)

    # Select the indices of the points that will be used by the interpolator
    onein = map_from.size / (int(interpolation_points - 1))
    selected_points = [round(i * onein - 1) for i in range(1, int(interpolation_points))]
    if selected_points[0] != 0:
        selected_points = [0] + selected_points
    selected_points += [1]  # add also this one since 1e-9 is just an outlier

    # make a mask of which pints to keep
    mask = np.zeros(len(map_from), dtype=bool)
    mask[selected_points] = True

    # apply the mask and log the input
    masked_map_from = map_from[mask]
    log_masked_map_from = np.log(masked_map_from)
    masked_map_to = map_to[mask]

    # construct the scaler
    try:
        scaler = PchipInterpolator(log_masked_map_from, masked_map_to)
    except ValueError as e:
        raise ValueError(
            "interpolation_points is larger than the number of unique input x-values"
        ) from e

    def _scaler(x):
        x_scaled = scaler(np.log(x))
        x_scaled = 2 * x_scaled - 1
        return np.concatenate([x_scaled, x], axis=-1)

    return _scaler