Source code for leaspy.algo.algo_with_samplers

import warnings
from typing import Optional

from leaspy.exceptions import LeaspyAlgoInputError
from leaspy.io.data import Dataset
from leaspy.samplers import AbstractSampler, sampler_factory
from leaspy.variables.specs import IndividualLatentVariable, PopulationLatentVariable
from leaspy.variables.state import State

from .settings import AlgorithmSettings

__all__ = ["AlgorithmWithSamplersMixin"]



[docs]
class AlgorithmWithSamplersMixin:
    """Mixin class to use in algorithms that require `samplers`.

    Note that this mixin should be used with a class inheriting from `AbstractAlgo`, which must have `algo_parameters`
    attribute.

    Parameters
    ----------
    settings : :class:`.AlgorithmSettings`
        The specifications of the algorithm as a :class:`.AlgorithmSettings` instance.

        Please note that you can customize the number of memory-less (burn-in) iterations by setting either:
            * `n_burn_in_iter_frac`, such that duration of burn-in phase is a ratio of algorithm `n_iter` (default of 90%)

    Attributes
    ----------
    samplers : :obj:`dict` [:obj:`str`, :class:`~.algo.samplers.abstract_sampler.AbstractSampler` ]
        Dictionary of samplers per each variable

    current_iteration : :obj:`int`, default 0
        Current iteration of the algorithm.
        The first iteration will be 1 and the last one `n_iter`.

    random_order_variables : :obj:`bool` (default True)
        This attribute controls whether we randomize the order of variables at each iteration.
        Article https://proceedings.neurips.cc/paper/2016/hash/e4da3b7fbbce2345d7772b0674a318d5-Abstract.html
        gives a rationale on why we should activate this flag.
    """

    def __init__(self, settings: AlgorithmSettings):
        super().__init__(settings)
        self.samplers: dict[str, AbstractSampler] = None
        self.random_order_variables: bool = self.algo_parameters.get(
            "random_order_variables", True
        )
        self.current_iteration: int = 0
        # Dynamic number of iterations for burn-in phase
        n_burn_in_iter_frac: Optional[float] = self.algo_parameters[
            "n_burn_in_iter_frac"
        ]
        if self.algo_parameters.get("n_burn_in_iter", None) is None:
            if n_burn_in_iter_frac is None:
                raise LeaspyAlgoInputError(
                    "You should NOT have both `n_burn_in_iter_frac` and `n_burn_in_iter` None."
                    "\nPlease set a value for at least one of those settings."
                )
            self.algo_parameters["n_burn_in_iter"] = int(
                n_burn_in_iter_frac * self.algo_parameters["n_iter"]
            )

        elif n_burn_in_iter_frac is not None:
            warnings.warn(
                "`n_burn_in_iter` setting is deprecated in favour of `n_burn_in_iter_frac` - "
                "which defines the duration of the burn-in phase as a ratio of the total number of iterations."
                "\nPlease use the new setting to suppress this warning or explicitly set `n_burn_in_iter_frac=None`."
                "\nNote that while `n_burn_in_iter` is supported "
                "it will always have priority over `n_burn_in_iter_frac`.",
                FutureWarning,
            )

    def _is_burn_in(self) -> bool:
        """
        Check if current iteration is in burn-in (= memory-less) phase.

        Returns
        -------
        bool
        """
        return self.current_iteration <= self.algo_parameters["n_burn_in_iter"]

    def _get_progress_str(self) -> str:
        # The algorithm must define a progress string (thanks to `self.current_iteration`)
        iter_str = super()._get_progress_str()
        if self._is_burn_in():
            iter_str += " (memory-less phase)"
        else:
            iter_str += " (with memory)"
        return iter_str

    def __str__(self):
        out = super().__str__()
        out += "\n= Samplers ="
        for sampler in self.samplers.values():
            out += f"\n    {str(sampler)}"
        return out

    def _initialize_samplers(self, state: State, dataset: Dataset) -> None:
        """
        Instantiate samplers as a dictionary samplers {variable_name: sampler}

        Parameters
        ----------
        state : :class:`.State`
        dataset : :class:`.Dataset`
        """
        self.samplers = {}
        self._initialize_population_samplers(state)
        self._initialize_individual_samplers(state, dataset.n_individuals)

    def _initialize_individual_samplers(self, state: State, n_individuals: int) -> None:
        sampler = self.algo_parameters.get("sampler_ind", None)
        if sampler is None:
            return

        # TODO: per variable and not just per type of variable?
        sampler_kws = self.algo_parameters.get("sampler_ind_params", {})
        for var_name, var in state.dag.sorted_variables_by_type[
            IndividualLatentVariable
        ].items():
            var: IndividualLatentVariable  # for type-hint only
            # remove all properties that are not currently handled by samplers and set default values
            var_kws = dict(
                var.sampling_kws or {},
                name=var_name,
                shape=var.get_prior_shape(state.dag),
            )

            # To enforce a fixed scale for a given var, one should put it in the random var specs
            # But note that for individual variables the model parameters ***_std should always be OK (> 0)
            var_kws.setdefault("scale", var.prior.stddev.call(state))

            self.samplers[var_name] = sampler_factory(
                sampler,
                IndividualLatentVariable,
                n_patients=n_individuals,
                **var_kws,
                **sampler_kws,
            )

    def _initialize_population_samplers(self, state: State) -> None:
        sampler = self.algo_parameters.get("sampler_pop", None)
        if sampler is None:
            return

        # TODO: per variable and not just per type of variable?
        sampler_kws = self.algo_parameters.get("sampler_pop_params", {})
        for var_name, var in state.dag.sorted_variables_by_type[
            PopulationLatentVariable
        ].items():
            var: PopulationLatentVariable  # for type-hint only
            # remove all properties that are not currently handled by samplers and set default values
            var_kws = dict(
                var.sampling_kws or {},
                name=var_name,
                shape=var.get_prior_shape(state.dag),
            )

            # To enforce a fixed scale for a given var, one should put it in the random var specs
            # For instance: for betas & deltas, it is a good idea to define them this way
            # since they'll probably be = 0 just after initialization!
            var_kws.setdefault("scale", state[var_name].abs())
            # TODO: after functional test passed we could change the previous line with the following one (more consistent)
            # var_kws.setdefault("scale", var.prior.stddev.call(state))

            # TODO: mask logic?

            self.samplers[var_name] = sampler_factory(
                sampler, PopulationLatentVariable, **var_kws, **sampler_kws
            )