import pandas as pd
import torch
from leaspy.io.data.dataset import Dataset
from leaspy.utils.functional import Exp, OrthoBasis, Sqr
from leaspy.utils.weighted_tensor import (
unsqueeze_right,
)
from leaspy.variables.distributions import Normal
from leaspy.variables.specs import (
Hyperparameter,
ModelParameter,
NamedVariables,
PopulationLatentVariable,
VariableNameToValueMapping,
)
from .obs_models import FullGaussianObservationModel
from .riemanian_manifold import RiemanianManifoldModel
__all__ = [
"LinearInitializationMixin",
"LinearModel",
]
[docs]
class LinearInitializationMixin:
"""Compute initial values for model parameters."""
def _compute_initial_values_for_model_parameters(
self,
dataset: Dataset,
) -> VariableNameToValueMapping:
"""
Compute and return initial values for the model parameters using linear regression.
Parameters
----------
dataset : :class:`Dataset`
The dataset from which to extract observations and masks.
Returns
-------
:class:`~leaspy.variables.specs.VariableNameToValueMapping`
A dictionary mapping parameter names (as strings) to their initialized
torch.Tensor values.
Notes
-----
- The initial positions are computed using `intercept + t0 * slope` where `t0`
is the mean of all observation times.
- Velocities are averaged per feature, and log-transformed using
`get_log_velocities`.
- Time parameters (`tau_mean`, `tau_std`) and variability parameters
(`xi_std`) are also initialized.
- If `self.source_dimension >= 1`, zero initialization is used for `betas_mean`.
- If the observation model is a `FullGaussianObservationModel`, the
`noise_std` parameter is also added, expanded to the correct shape.
"""
from leaspy.models.utilities import (
compute_linear_regression_subjects,
get_log_velocities,
torch_round,
)
df = dataset.to_pandas(apply_headers=True)
times = df.index.get_level_values("TIME").values
t0 = times.mean()
d_regress_params = compute_linear_regression_subjects(df, max_inds=None)
df_all_regress_params = pd.concat(d_regress_params, names=["feature"])
df_all_regress_params["position"] = (
df_all_regress_params["intercept"] + t0 * df_all_regress_params["slope"]
)
df_grp = df_all_regress_params.groupby("feature", sort=False)
positions = torch.tensor(df_grp["position"].mean().values)
velocities = torch.tensor(df_grp["slope"].mean().values)
parameters = {
"g_mean": positions,
"log_v0_mean": get_log_velocities(velocities, self.features),
"tau_mean": torch.tensor(t0),
"tau_std": self.tau_std,
"xi_std": self.xi_std,
}
if self.source_dimension >= 1:
parameters["betas_mean"] = torch.zeros(
(self.dimension - 1, self.source_dimension)
)
rounded_parameters = {
str(p): torch_round(v.to(torch.float32)) for p, v in parameters.items()
}
obs_model = next(iter(self.obs_models)) # WIP: multiple obs models...
if isinstance(obs_model, FullGaussianObservationModel):
rounded_parameters["noise_std"] = self.noise_std.expand(
obs_model.extra_vars["noise_std"].shape
)
return rounded_parameters
[docs]
class LinearModel(LinearInitializationMixin, RiemanianManifoldModel):
"""Manifold model for multiple variables of interest (linear formulation)."""
def __init__(self, name: str, **kwargs):
super().__init__(name, **kwargs)
[docs]
def get_variables_specs(self) -> NamedVariables:
"""
Return the specifications of the variables (latent variables, derived variables,
model 'parameters') that are part of the model.
Returns
-------
:class:`~leaspy.variables.specs.NamedVariables :
A dictionary-like object mapping variable names to their specifications.
"""
d = super().get_variables_specs()
d.update(
g_mean=ModelParameter.for_pop_mean("g", shape=(self.dimension,)),
g_std=Hyperparameter(0.01),
g=PopulationLatentVariable(Normal("g_mean", "g_std")),
)
return d
[docs]
@staticmethod
def metric(*, g: torch.Tensor) -> torch.Tensor:
"""
Compute the metric tensor for the model.
Parameters
----------
g : :class:`torch.Tensor`
Input tensor with values of the population parameter `g` for each feature.
Returns
-------
:class:`torch.Tensor`
A tensor of ones with the same shape as `g`.
"""
return torch.ones_like(g)
[docs]
@classmethod
def model_with_sources(
cls,
*,
rt: torch.Tensor,
space_shifts: torch.Tensor,
metric,
v0,
g,
) -> torch.Tensor:
"""
Return the model output when sources(spatial components) are present.
Parameters
----------
rt : :class:`torch.Tensor`
The reparametrized time.
space_shifts : :class:`torch.Tensor`
The values of the space-shifts
metric : Any
The metric tensor used for computing the spatial/temporal influence.
v0 : Any
The values of the population parameter `v0` for each feature.
g : Any
The values of the population parameter `g` for each feature.
Returns
-------
:class:`torch.Tensor`
The model output with contribution from sources.
"""
pop_s = (None, None, ...)
rt = unsqueeze_right(rt, ndim=1) # .filled(float('nan'))
return (g[pop_s] + v0[pop_s] * rt + space_shifts[:, None, ...]).weighted_value