# Source code for mvlearn.embed.mvmds

```
# Copyright 2019 NeuroData (http://neurodata.io)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .base import BaseEmbed
from ..utils.utils import check_Xs
import warnings
import numpy as np
from sklearn.metrics import euclidean_distances
[docs]class MVMDS(BaseEmbed):
r"""
An implementation of Classical Multiview Multidimensional Scaling for
jointly reducing the dimensions of multiple views of data [#1MVMDS]_.
A Euclidean distance matrix is created for each view, double centered,
and the k largest common eigenvectors between the matrices are found
based on the stepwise estimation of common principal components. Using
these common principal components, the views are jointly reduced and
a single view of k-dimensions is returned.
MVMDS is often a better alternative to PCA for multi-view data.
See the ``tutorials`` in the documentation.
Parameters
----------
n_components : int (positive), default=2
Represents the number of components that the user would like to
be returned from the algorithm. This value must be greater than
0 and less than the number of samples within each view.
num_iter: int (positive), default=15
Number of iterations stepwise estimation goes through.
dissimilarity : {'euclidean', 'precomputed'}, default='euclidean'
Dissimilarity measure to use:
'euclidean':
Pairwise Euclidean distances between points in the dataset.
'precomputed':
Xs is treated as pre-computed dissimilarity matrices.
Attributes
----------
components_: numpy.ndarray, shape(n_samples, n_components)
Joint transformed MVMDS components of the input views.
Notes
-----
Classical Multiview Multidimensional Scaling can be broken down into two
steps. The first step involves calculating the Euclidean Distance matrices,
:math:`Z_i`, for each of the :math:`k` views and double-centering
these matrices through the following calculations:
.. math::
\Sigma_{i}=-\frac{1}{2}J_iZ_iJ_i
.. math::
\text{where }J_i=I_i-{\frac {1}{n}}\mathbb{1}\mathbb{1}^T
The second step involves finding the common principal components of the
:math:`\Sigma` matrices. These can be thought of as multiview
generalizations of the principal components found in principal component
analysis (PCA) given several covariance matrices. The central hypothesis of
the common principal component model states that given k normal populations
(views), their :math:`p` x :math:`p` covariance matrices
:math:`\Sigma_{i}`, for :math:`i = 1,2,...,k` are simultaneously
diagonalizable as:
.. math::
\Sigma_{i} = QD_i^2Q^T
where :math:`Q` is the common :math:`p` x :math:`p` orthogonal matrix and
:math:`D_i^2` are positive :math:`p` x :math:`p` diagonal matrices. The
:math:`Q` matrix contains all the common principal components. The common
principal component, :math:`q_j`, is found by solving the minimization
problem:
.. math::
\text{Minimize} \sum_{i=1}^{k}n_ilog(q_j^TS_iq_j)
.. math::
\text{Subject to } q_j^Tq_j = 1
where :math:`n_i` represent the degrees of freedom and :math:`S_i`
represent sample covariance matrices.
This class does not support ``MVMDS.transform()`` due to the iterative
nature of the algorithm and the fact that the transformation is done
during iterative fitting. Use ``MVMDS.fit_transform()`` to do both
fitting and transforming at once.
Examples
--------
>>> from mvlearn.embed import MVMDS
>>> from mvlearn.datasets import load_UCImultifeature
>>> Xs, _ = load_UCImultifeature()
>>> print(len(Xs)) # number of samples in each view
6
>>> print(Xs[0].shape) # number of samples in each view
(2000, 76)
>>> mvmds = MVMDS(n_components=5)
>>> Xs_reduced = mvmds.fit_transform(Xs)
>>> print(Xs_reduced.shape)
(2000, 5)
References
----------
.. [#1MVMDS] Trendafilov, Nickolay T. “Stepwise Estimation of Common
Principal Components.” Computational Statistics & Data
Analysis, vol. 54, no. 12, 2010, pp. 3446–3457.,
doi:10.1016/j.csda.2010.03.010.
.. [#2MVMDS] Samir Kanaan-Izquierdo, Andrey Ziyatdinov,
Maria Araceli Burgueño, Alexandre Perera-Lluna, Multiview: a software
package for multiview pattern recognition methods, Bioinformatics,
Volume 35, Issue 16, 15 August 2019, Pages 2877–2879
"""
def __init__(self, n_components=2, num_iter=15, dissimilarity='euclidean'):
super().__init__()
self.components_ = None
self.n_components = n_components
self.num_iter = num_iter
self.dissimilarity = dissimilarity
if (self.num_iter) <= 0:
raise ValueError('The number of iterations must be greater than 0')
if (self.n_components) <= 0:
raise ValueError('The number of components must be greater than 0 '
+ 'and less than the number of features')
if self.dissimilarity not in ['euclidean', 'precomputed']:
raise ValueError('The parameter `dissimilarity` must be one of \
{`euclidean`, `precomputed`}')
def _commonpcs(self, Xs):
"""
Finds Stepwise Estimation of Common Principal Components as described
by common Trendafilov implementations based on the following paper:
https://www.sciencedirect.com/science/article/pii/S016794731000112X
Parameters
----------
Xs: List of array-likes or numpy.ndarray
- Xs length: n_views
- Xs[i] shape: (n_samples, n_features_i)
Returns
-------
components: numpy.ndarray, shape(n_samples, n_components)
Joint transformed MVMDS components of the input views.
"""
n = p = Xs.shape[1]
views = len(Xs)
n_num = np.array([n] * views)/np.sum(np.array([n] * views))
components = np.zeros((p, self.n_components))
# Initialized by paper
pi = np.eye(p)
s = np.zeros((p, p))
for i in np.arange(views):
s = s + (n_num[i] * Xs[i])
_, e2 = np.linalg.eigh(s)
# Orders the eigenvalues
q0 = e2[:, ::-1]
for i in np.arange(self.n_components):
# Each q is a particular eigenvalue
q = q0[:, i]
q = np.array(q).reshape(len(q), 1)
d = np.zeros((1, views))
for j in np.arange(views):
# Represents mu from the paper.
d[:, j] = np.dot(np.dot(q.T, Xs[j]), q)
# stepwise iterations
for j in np.arange(self.num_iter):
s2 = np.zeros((p, p))
for yy in np.arange(views):
d2 = n_num[yy] * np.sum(np.array([n] * views))
# Dividing by .0001 is to prevent divide by 0 error
if d[:, yy] == 0:
s2 = s2 + (d2 * Xs[yy] / .0001)
else:
# Refers to d value from previous iteration
s2 = s2 + (d2 * Xs[yy] / d[:, yy])
# eigenvectors dotted with S matrix and pi
w = np.dot(s2, q)
w = np.dot(pi, w)
q = w / np.sqrt(np.dot(w.T, w))
for yy in np.arange(views):
d[:, yy] = np.dot(np.dot(q.T, Xs[yy]), q)
# creates next component
components[:, i] = q[:, 0]
# initializes pi for next iteration
pi = pi - np.dot(q, q.T)
return(components)
[docs] def fit(self, Xs, y=None):
"""
Calculates dimensionally reduced components by inputting the Euclidean
distances of each view, double centering them, and using the _commonpcs
function to find common components between views. Works similarly to
traditional, single-view Multidimensional Scaling.
Parameters
----------
Xs: list of array-likes or numpy.ndarray
- Xs length: n_views
- Xs[i] shape: (n_samples, n_features_i)
y : ignored
Included for API compliance.
"""
if (self.n_components) > len(Xs[0]):
self.n_components = len(Xs[0])
warnings.warn('The number of components you have requested is '
+ 'greater than the number of samples in the '
+ 'dataset. ' + str(self.n_components)
+ ' components were computed instead.')
Xs = check_Xs(Xs, multiview=True)
mat = np.ones(shape=(len(Xs), len(Xs[0]), len(Xs[0])))
# Double centering each view as in single-view MDS
if (self.dissimilarity == 'euclidean'):
for i in np.arange(len(Xs)):
view = euclidean_distances(Xs[i])
view_squared = np.power(np.array(view), 2)
J = np.eye(len(view)) - (1/len(view))*np.ones(view.shape)
B = -(1/2) * J @ view_squared @ J
mat[i] = B
# If user wants to input special distance matrix
elif (self.dissimilarity == 'precomputed'):
for i in np.arange(len(Xs)):
if (Xs[i].shape[0] != Xs[i].shape[1]):
raise ValueError('The input distance matrix must be '
+ 'a square matrix')
else:
view = Xs[i]
view_squared = np.power(np.array(view), 2)
J = np.eye(len(view)) - (1/len(view))*np.ones(view.shape)
B = -(1/2) * J @ view_squared @ J
mat[i] = B
else:
raise ValueError('The parameter `dissimilarity` must be one of \
{`euclidean`, `precomputed`}')
self.components_ = self._commonpcs(mat)
return self
[docs] def fit_transform(self, Xs, y=None):
""""
Embeds data matrix(s) using fitted projection matrices
Parameters
----------
Xs: list of array-likes or numpy.ndarray
- Xs length: n_views
- Xs[i] shape: (n_samples, n_features_i)
The data to embed based on the fit function.
y : ignored
Included for API compliance.
Returns
-------
X_transformed: numpy.ndarray, shape(n_samples, n_components)
Joint transformed MVMDS components of the input views.
"""
Xs = check_Xs(Xs)
self.fit(Xs)
return self.components_
```