import numpy as np
import warnings
from scipy.linalg import qr, eigh
from numpy.linalg import multi_dot
from .scatter import Scatter, cov, covW, covAxis, cov4, mcd, tM, tcov, tcovAxis
from .comp_select import ComponentSelect, normal_crit, unimodal_crit, median_crit
from .utils import sort_eigenvalues_eigenvectors, sqrt_symmetric_matrix, _sign_max, _check_gen_kurtosis
from .plot import _plot_kurtosis
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted, validate_data, check_array
from sklearn.utils._param_validation import StrOptions
_SCATTER_MAP = {
"cov": cov,
"cov4": cov4,
"covAxis": covAxis,
"covW": covW,
"mcd": mcd,
"tM": tM,
"tcov": tcov,
"tcovAxis": tcovAxis,
}
_SELECT_MAP = {
"normal": normal_crit,
"unimodal": unimodal_crit,
"median": median_crit,
}
[docs]
class ICS(TransformerMixin, BaseEstimator):
"""
Invariant Coordinate Selection (ICS) Class and associated methods.
This class implements the ICS algorithm: it transforms the data, via the simultaneous diagonalization of two scatter
matrices, into an invariant coordinate system or independent components, depending on the underlying assumptions.
It supports various scatter matrix calculations and offers multiple algorithms for applying ICS.
Parameters:
S1 (callable or str, default='cov'): First scatter estimator. If a string is provided, it must be one of the predefined scatter estimators (see the "Available scatter estimators" section below). Otherwise, it must be a callable returning a Scatter object.
S2 (callable or str, default='cov4'): Second scatter estimator. If a string is provided, it must be one of the predefined scatter estimators (see the "Available scatter estimators" section below). Otherwise, it must be a callable returning a Scatter object.
algorithm ({'eigh', 'standard', 'whiten', 'QR'}, default='eigh'): The algorithm used for computing the invariant coordinates.
center (bool, default=False): A logical indicating whether the invariant coordinates should be centered with respect to the first locattion or not. Centering is only applicable if the first scatter object contains a location component, otherwise this is set to False. Note that this only affects the scores of the invariant components (attribute `self.scores_`), but not the generalized kurtosis values (attribute `self.kurtosis_`).
fix_signs({'scores', 'W'}, default='scores') How to fix the signs of the invariant coordinates. Possible values are 'scores' to fix the signs based on (generalized) skewness values of the coordinates, or 'W' to fix the signs based on the coefficient matrix of the linear transformation.
S1_args (dict or None, default=None): Additional arguments for S1.
S2_args (dict or None, default=None): Additional arguments for S2.
method_select ({'median', 'normal', 'unimodal'} or callable or None, default=None): The criteria to select the invariant components. If None (default), all components are kept. If a string is provided, it must be either "median" to use the median eigenvalue criterion, "normal" to apply normality tests to the components, or "unimodal" to apply unimodality tests to the components. If callable, it must return a ComponentSelect object. For more information, refer to :mod:`icspylab.comp_select`.
select_args (dict or None, default=None): Additional arguments for method_select.
Attributes:
components_ (ndarray): Invariant axes in feature space: the transformation matrix in which each row contains the coefficients of the linear transformation to the corresponding invariant coordinate. The components are sorted by decreasing kurtosis.
n_components_ (int): Number of components kept.
component_names_ (list): Names of components kept.
kurtosis_ (ndarray): Generalized kurtosis values.
skewness_ (ndarray): Skewness values.
n_features_in_ (int): Number of features seen during fit.
feature_names_in_ (ndarray): Names of features seen during fit. Defined only when X has feature names that are all strings.
S1_X_ (ndarray): Fitted scatter S1. Defined only when center=True.
criteria_out_ (dict or None): Summary of the component selection step. Defined only when method_select is not None.
Available scatter estimators are (see :mod:`icspylab.scatter` for a full description of the available scatters):
- ``'cov'``: classical covariance matrix and mean
- ``'cov4'``: fourth-moment estimator
- ``'covAxis'``: one-step Tyler shape estimator
- ``'covW'``: one-step M-estimator using mean and covariance matrix as starting point
- ``'mcd'``: Minimum Covariance Determinant
- ``'tM'``: location and scatter for a multivariate t-distribution
- ``'tcov'``: one-step pairwise M-estimator
- ``'tcovAxis'``: one-step pairwise M-estimator with the same weights as covAxis
Supported algorithms:
1. eigh: performs directly the simultaneous diagonalization of the two scatter matrices using scipy.linalg's function eigh(:math:`S_2(X)`, :math:`S_1(X)`)
2. standard: performs the spectral decomposition of the symmetric matrix :math:`S_1(X)^{-1/2}S_2(X)S_1(X)^{-1/2}`
3. whiten: whitens the data with respect to the first scatter matrix before computing the second scatter matrix.
4. QR: numerically stable algorithm based on the QR algorithm for a common family of scatter pairs: if S1 is cov(), and if S2 is one of cov4, covW, or covAxis. See Archimbaud et al. (2023) for details.
References:
- Tyler, D.E., Critchley, F., Dumbgen, L. and Oja, H. (2009) Invariant Co-ordinate Selection. Journal of the Royal Statistical Society, Series B, 71(3), 549–592. doi:10.1111/j.14679868.2009.00706.x.
- Nordhausen, K., Oja, H., & Tyler, D. E. (2008). Tools for exploring multivariate data: The package ICS. Journal of Statistical Software, 28, 1-31.
- For algorithm = 'QR', refer to Archimbaud, A., Drmac, Z., Nordhausen, K., Radojcic, U. and Ruiz-Gazen, A. (2023) Numerical Considerations and a New Implementation for Invariant Coordinate Selection. SIAM Journal on Mathematics of Data Science, 5(1), 97–121. doi:10.1137/22M1498759.
Example:
>>> from sklearn.datasets import load_iris
>>> from icspylab import ICS
>>> iris = load_iris()
>>> X = iris.data
>>> ics = ICS()
>>> ics.fit(X)
>>> print(ics.kurtosis_)
[1.20739878 1.0269412 0.9292235 0.74046722]
"""
_parameter_constraints = {
"S1": [StrOptions(set(_SCATTER_MAP.keys())), callable],
"S2": [StrOptions(set(_SCATTER_MAP.keys())), callable],
"algorithm": [StrOptions({"eigh", "standard", "whiten", "QR"})],
"center": ["boolean"],
"fix_signs": [StrOptions({"scores", "W"})],
"S1_args": [dict, None],
"S2_args": [dict, None],
"method_select": [StrOptions(set(_SELECT_MAP.keys())), callable, None],
"select_args": [dict, None],
}
def __init__(
self,
S1="cov",
S2="cov4",
algorithm="eigh",
center=False,
fix_signs="scores",
S1_args=None,
S2_args=None,
method_select=None,
select_args=None
):
self.S1 = S1
self.S2 = S2
self.algorithm = algorithm
self.center = center
self.fix_signs = fix_signs
self.S1_args = S1_args
self.S2_args = S2_args
self.method_select = method_select
self.select_args = select_args
[docs]
def fit(self, X, y=None):
"""
Fit the ICS model to the data.
This function relies on several helper methods to perform the ICS fit:
_compute_first_scatter, _compute_second_scatter, _transform_second_scatter,
_compute_transformation, _compute_transformation_qr, _fix_component_signs.
Parameters:
X (array-like): Data to fit the ICS model, where rows are samples and columns are features.
y (Ignored): Not used, present for API consistency by convention.
Returns:
self:The fitted ICS object.
"""
# Check inputs
self._validate_params()
if isinstance(self.S1, str):
self.S1_ = _SCATTER_MAP[self.S1]
else:
self.S1_ = self.S1
if isinstance(self.S2, str):
self.S2_ = _SCATTER_MAP[self.S2]
else:
self.S2_ = self.S2
if isinstance(self.method_select, str):
self.method_select_ = _SELECT_MAP[self.method_select]
else:
self.method_select_ = self.method_select
if self.algorithm == "QR":
if not (self.S1_ == cov and self.S2_ in (covW, covAxis, cov4)):
warnings.warn("QR algorithm is not applicable; proceeding with the standard algorithm")
self.algorithm = "standard"
if hasattr(X, "columns"):
feature_names_in = np.array(X.columns)
else:
feature_names_in = None
X = validate_data(
self,
X,
reset=True,
ensure_all_finite=True,
ensure_2d=True,
ensure_min_features=2,
ensure_min_samples=2,
copy=False,
)
self.feature_names_in_ = feature_names_in
self.n_features_in_ = X.shape[1]
# ICS method
S1_X, S1_X_inv_sqrt = self._compute_first_scatter(X)
if self.algorithm == "whiten":
# Whiten the data using the inverse square root of S1
Y = np.dot(X, S1_X_inv_sqrt)
S2_Y = self._compute_second_scatter(Y)
W, gen_kurtosis = self._compute_transformation(S1_X_inv_sqrt, S2_Y)
elif self.algorithm == "QR":
# Use the QR decomposition for transformation
W, gen_kurtosis = self._compute_transformation_qr(X, S1_X)
elif self.algorithm == "standard":
# Use the standard algorithm
S2_X = self._compute_second_scatter(X)
S2_Y = self._transform_second_scatter(S1_X_inv_sqrt, S2_X)
W, gen_kurtosis = self._compute_transformation(S1_X_inv_sqrt, S2_Y)
else:
S2_X = self._compute_second_scatter(X)
evals, evecs = eigh(S2_X.scatter, S1_X.scatter)
evals, evecs = sort_eigenvalues_eigenvectors(evals, evecs)
gen_kurtosis = evals
W = evecs.T
W_final, gen_skewness = self._fix_component_signs(X, W)
if self.center:
self.S1_X_ = S1_X
self.kurtosis_ = gen_kurtosis
self.skewness_ = gen_skewness
# Component selection
if self.method_select_ is None:
self.components_ = W_final
self.n_components_ = self.components_.shape[0]
self.component_names_ = [f"IC_{i + 1}" for i in range(self.n_components_)]
self.criteria_out_ = None
else:
selection_res = self._component_selection(X, W_final)
self.components_ = selection_res.components
self.n_components_ = selection_res.n_components
self.component_names_ = selection_res.component_names
self.criteria_out_ = selection_res.info
return self
[docs]
def get_feature_names_out(self, input_features=None):
check_is_fitted(self, "feature_names_in_")
return self.feature_names_in_
[docs]
def plot_kurtosis(self, **kwargs):
"""Plot the generated kurtosis."""
check_is_fitted(self, "components_")
_plot_kurtosis(self.kurtosis_, **kwargs)
[docs]
def describe(self):
"""
Print a summary of the ICS model.
This includes the algorithm used, whether data was centered, how signs were fixed;
and displays the generalized kurtosis, transformation matrix, transformed data, and
the skewness of the data.
"""
if self.feature_names_in_ is None:
feature_names = np.array([f'Feature_{i+1}' for i in range(self.n_features_in_)])
else:
feature_names = self.feature_names_in_
print("\nICS based on two scatter matrices:")
print(f"S1: {self.S1_.__name__}")
print(f"S1_args: {self.S1_args}")
print(f"S2: {self.S2_.__name__}")
print(f"S2_args: {self.S2_args}")
print("\nInformation on the algorithm:")
print(f"algorithm: {self.algorithm}")
print(f"center: {self.center}")
print(f"fix_signs: {self.fix_signs}")
# Print the kurtosis values
print("\nThe generalized kurtosis measures of the components are:")
if self.kurtosis_ is not None:
for idx, val in enumerate(self.kurtosis_):
print(f"IC_{idx + 1}: {val:.4f}")
else:
print("None")
# Print component selection information
print("\nInformation on the component selection:")
print(f"method_select: {self.method_select_}")
print(f"select_args: {self.select_args}")
if self.criteria_out_ is not None:
print(f"component selection info: {self.criteria_out_}")
if self.n_components_ < 2:
print(f"{self.n_components_} component is kept: {self.component_names_}")
elif self.n_components_ < self.n_features_in_:
print(f"{self.n_components_} components are kept: {self.component_names_}")
else:
print(f"All components are kept: {self.component_names_}")
# Print the coefficient matrix
print("\nThe coefficient matrix of the linear transformation is:")
if self.components_ is not None:
header = " " + " ".join(f"{name:>12}" for name in feature_names)
print(header)
for idx, row in enumerate(self.components_, start=1):
row_str = " ".join(f"{val:>12.5f}" for val in row)
print(f"IC_{idx:<3} {row_str}")
else:
print("None")
def _compute_first_scatter(self, X):
"""
Apply the first scatter matrix to the data and compute the inverse square root.
This step is the first step of computing ICS and is common across all 3 algorithms.
Parameters:
X (ndarray): The data matrix
Returns:
tuple: The first scatter matrix applied to the data, and its inverse square root
Algorithms:
eigh, standard, whiten, QR
"""
S1_args = {} if self.S1_args is None else self.S1_args
S1_X = self.S1_(X, **S1_args)
if not isinstance(S1_X, Scatter):
raise ValueError("S1 must return a Scatter object")
S1_X_inv_sqrt = sqrt_symmetric_matrix(S1_X.scatter, inverse=True)
return S1_X, S1_X_inv_sqrt
def _compute_second_scatter(self, X):
"""
Apply the second scatter matrix either directly on the data (eigh and standard algorithm) or on the whitened data
(whiten algorithm).
Parameters:
X (ndarray): The data matrix or whitened data.
Returns:
Scatter: The second scatter matrix applied to the passed data.
Algorithm:
eigh, standard, whiten
"""
S2_args = {} if self.S2_args is None else self.S2_args
S2_X = self.S2_(X, **S2_args)
if not isinstance(S2_X, Scatter):
raise ValueError("S2 must return a Scatter object")
return S2_X
def _transform_second_scatter(self, S1_X_inv_sqrt, S2_X):
"""
Transform the second scatter matrix using the inverse square root of the first scatter matrix
Parameters:
S1_X_inv_sqrt (ndarray): The inverse square root of the first scatter matrix
S2_X (Scatter): The second scatter matrix
Returns:
Scatter: The transformed scatter matrix.
Algorithm:
standard
"""
S2_Y = Scatter(
location=None,
scatter=multi_dot([S1_X_inv_sqrt, S2_X.scatter, S1_X_inv_sqrt]),
label=S2_X.label
)
return S2_Y
def _compute_transformation(self, S1_X_inv_sqrt, S2_Y):
"""
Compute the transformation matrix and generalized kurtosis.
This method performs the eigen decomposition of the transformed second scatter matrix (S2_Y),
sorts the eigenvalues and eigenvectors, and computes the transformation matrix W by multiplying
the transpose of the eigenvector matrix with the inverse square root of the first scatter matrix (S1_X_inv_sqrt).
Parameters:
S1_X_inv_sqrt (ndarray): The inverse square root of the first scatter matrix.
S2_Y (Scatter): The second scatter matrix transformed (standard algorithm) or applied to the whitened data.
Returns:
tuple: The transformation matrix and generalized kurtosis.
Algorithms:
standard, whiten
"""
S2_Y_eigenval, S2_Y_eigenvect = np.linalg.eigh(S2_Y.scatter)
S2_Y_eigenval, S2_Y_eigenvect = sort_eigenvalues_eigenvectors(S2_Y_eigenval, S2_Y_eigenvect)
gen_kurtosis = S2_Y_eigenval
W = np.dot(S2_Y_eigenvect.T, S1_X_inv_sqrt)
return W, gen_kurtosis
def _compute_transformation_qr(self, X, S1_X):
"""
Compute the transformation matrix and generalized kurtosis using QR decomposition.
This method follows these steps:
1. Center the data matrix X.
2. Reorder rows by decreasing infinity norm for numerical stability.
3. Perform QR decomposition with column pivoting on the reordered matrix.
4. Compute the leverage scores for further calculations.
5. Apply the second scatter matrix function and compute S2_Y.
6. Perform eigenvalue decomposition on S2_Y.
7. Compute the transformation matrix W using the solution of a linear system involving R and eigenvectors.
8. Reorder W according to the original pivoting order.
Parameters:
X (ndarray): The data matrix.
S1_X (Scatter): The first scatter matrix.
Algorithm:
QR
Returns:
tuple: The transformation matrix and generalized kurtosis.
"""
n, p = X.shape
# Center the data
T1_X = S1_X.location
if T1_X is None:
X_centered = X
else:
X_centered = X - T1_X
# Reorder rows by decreasing infinity norm for numerical stability
norm_inf = np.max(np.abs(X_centered), axis=1)
order_rows = np.argsort(norm_inf)[::-1]
X_reordered = X_centered[order_rows,]
# Perform QR decomposition with column pivoting on the reordered and scaled matrix
Q, R, P = qr(X_reordered / np.sqrt(n - 1), mode='economic', pivoting=True)
# Compute leverage scores
d = (n - 1) * np.sum(Q ** 2, axis=1)
# Determine alpha and cf values based on the second scatter matrix function
if self.S2_ == cov4:
alpha = 1
cf = 1 / (p + 2)
elif self.S2_ == covAxis:
alpha = -1
cf = p
elif self.S2_ == covW:
alpha = self.S2_args.get('alpha', 1)
cf = self.S2_args.get('cf', 1)
# Apply the weighting function to leverage scores and compute S2_Y
d = d[:, np.newaxis]
S2_Y = cf * (n - 1) / n * (Q * (d ** alpha)).T @ Q
# Perform eigenvalue decomposition on S2_Y
eigenvalues, eigenvectors = np.linalg.eigh(S2_Y)
_check_gen_kurtosis(eigenvalues)
eigenvalues, eigenvectors = sort_eigenvalues_eigenvectors(eigenvalues, eigenvectors)
# Compute the transformation matrix W and reorder it according to the original pivoting order
W = np.linalg.solve(R, eigenvectors).T
W = W[:, np.argsort(P)]
return W, eigenvalues
def _center_data(self, X, S1_X):
"""
Center the data matrix using the location component of the first scatter matrix.
Parameters:
X (ndarray): The data matrix
S1_X (Scatter): The first scatter matrix
Returns:
ndarray: The centered data matrix
Algorithm:
eig, standard, whiten, QR
"""
T1_X = S1_X.location
if T1_X is None:
warnings.warn("Location component in S1 is required for centering the data. Proceeding without centering")
self.center = False
else:
X = X - T1_X
return X
def _fix_component_signs(self, X, W):
"""
Fix the signs of the components based on the specified method.
Parameters:
X (ndarray): The data matrix.
W (ndarray): The transformation matrix.
Returns:
tuple: The final transformation matrix and skewness values.
Algorithm:
eigh, standard, whiten, QR
"""
if self.fix_signs == "scores":
Z = np.dot(X, W.T)
gen_skewness = np.mean(Z, axis=0) - np.median(Z, axis=0)
skewness_signs = np.where(gen_skewness >= 0, 1, -1)
gen_skewness = skewness_signs * gen_skewness
W_final = W * skewness_signs[:, np.newaxis]
else:
# Calculate row signs based on the maximum absolute value
row_signs = np.apply_along_axis(_sign_max, 1, W)
row_norms = np.sqrt(np.sum(W ** 2, axis=1))
W_final = (W.T / (row_signs * row_norms)).T
gen_skewness = None
return W_final, gen_skewness
def _component_selection(self, X, W_final):
"""
Implement the component selection step based on the specified method.
Parameters:
X (ndarray): Data to fit the ICS model, where rows are samples and columns are features.
W_final (ndarray): The transformation matrix.
Returns:
ndarray: Transformed matrix in which columns contain the scores of the selected invariant coordinates.
Algorithm:
eigh, standard, whiten, QR
"""
select_args = {} if self.select_args is None else self.select_args
selection_res = self.method_select_(
X=X,
W = W_final,
kurtosis=self.kurtosis_,
skewness=self.skewness_,
**select_args)
if not isinstance(selection_res, ComponentSelect):
raise ValueError("method_select must return a ComponentSelect object")
return selection_res