add sparsity-constrained ot funtionality and example

liutianlin0121 · liutianlin0121 · commit eeaca579ed98 · 2023-04-14T19:24:32.000+02:00
diff --git a/README.md b/README.md
@@ -308,3 +308,5 @@ Dictionary Learning](https://arxiv.org/pdf/2102.06555.pdf), International Confer
 [48] Cédric Vincent-Cuaz, Rémi Flamary, Marco Corneli, Titouan Vayer, Nicolas Courty (2022). [Semi-relaxed Gromov-Wasserstein divergence and applications on graphs](https://openreview.net/pdf?id=RShaMexjc-x). International Conference on Learning Representations (ICLR), 2022.
 
 [49] Redko, I., Vayer, T., Flamary, R., and Courty, N. (2020). [CO-Optimal Transport](https://proceedings.neurips.cc/paper/2020/file/cc384c68ad503482fb24e6d1e3b512ae-Paper.pdf). Advances in Neural Information Processing Systems, 33.
+
+[50] Liu, T., Puigcerver, J., & Blondel, M. (2023). [Sparsity-constrained optimal transport](https://openreview.net/forum?id=yHY9NbQJ5BP). Proceedings of the Eleventh International Conference on Learning Representations (ICLR).
diff --git a/examples/plot_OT_1D_smooth.py b/examples/plot_OT_1D_smooth.py
@@ -101,7 +101,7 @@
 pl.show()
 
 
-#%% Smooth OT with KL regularization
+#%% Smooth OT with squared l2 regularization
 
 lambd = 1e-1
 Gsm = ot.smooth.smooth_ot_dual(a, b, M, lambd, reg_type='l2')
diff --git a/examples/plot_OT_1D_sparsity_constrained.py b/examples/plot_OT_1D_sparsity_constrained.py
@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+"""
+================================
+Sparsity-constrained optimal transport example
+================================
+
+This example illustrates EMD, squared l2 regularized OT, and sparsity-constrained OT plans.
+The sparsity-constrained OT can be considered as a middle ground between EMD and squared l2 regularized OT.
+
+"""
+
+# Author: Tianlin Liu <t.liu@unibas.ch>
+#
+# License: MIT License
+
+# sphinx_gallery_thumbnail_number = 5
+
+import numpy as np
+import matplotlib.pylab as pl
+import ot
+import ot.plot
+from ot.datasets import make_1D_gauss as gauss
+
+##############################################################################
+# Generate data
+# -------------
+
+
+#%% parameters
+
+n = 100  # nb bins
+
+# bin positions
+x = np.arange(n, dtype=np.float64)
+
+# Gaussian distributions
+a = gauss(n, m=20, s=5)  # m= mean, s= std
+b = gauss(n, m=60, s=10)
+
+# loss matrix
+M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))
+M /= M.max()
+
+
+##############################################################################
+# Plot distributions and loss matrix
+# ----------------------------------
+
+#%% plot the distributions
+
+pl.figure(1, figsize=(6.4, 3))
+pl.plot(x, a, 'b', label='Source distribution')
+pl.plot(x, b, 'r', label='Target distribution')
+pl.legend()
+
+#%% plot distributions and loss matrix
+
+pl.figure(2, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, M, 'Cost matrix M')
+
+
+#%% EMD
+
+# use fast 1D solver
+G0 = ot.emd_1d(x, x, a, b)
+
+# Equivalent to
+# G0 = ot.emd(a, b, M)
+
+pl.figure(3, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, G0, 'OT matrix G0')
+
+
+#%% Smooth OT with squared l2 regularization
+
+lambd = 1e-1
+Gsm = ot.smooth.smooth_ot_dual(a, b, M, lambd, reg_type='l2')
+
+pl.figure(4, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, Gsm, 'OT matrix Smooth OT l2 reg.')
+
+pl.show()
+
+
+#%% Smooth OT with squared l2 regularization
+
+lambd = 1e-1
+Gsc = ot.sparse.sparsity_constrained_ot_dual(a, b, M, lambd, max_nz=2)
+pl.figure(5, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, Gsc, 'Sparsity contrained OT matrix; k=2.')
+
+pl.show()
+
+# %%
diff --git a/ot/__init__.py b/ot/__init__.py
@@ -27,6 +27,7 @@
 from . import gromov
 from . import smooth
 from . import stochastic
+from . import sparse
 from . import unbalanced
 from . import partial
 from . import backend
diff --git a/ot/sparse.py b/ot/sparse.py
@@ -0,0 +1,229 @@
+"""
+Sparsity-constrained optimal transport solvers.
+
+Implementation of :
+Sparsity-Constrained Optimal Transport.
+Tianlin Liu, Joan Puigcerver, Mathieu Blondel.
+In Proc. of AISTATS 2018.
+https://arxiv.org/abs/1710.06276
+
+[50] Liu, T., Puigcerver, J., & Blondel, M. (2023).
+Sparsity-constrained optimal transport.
+Proceedings of the Eleventh International Conference on
+Learning Representations (ICLR).
+"""
+
+# Author: Tianlin Liu <t.liu@unibas.ch>
+#
+# License: MIT License
+
+
+import numpy as np
+import ot
+from .backend import get_backend
+
+
+class SparsityConstrained(ot.smooth.Regularization):
+    """ Squared L2 regularization with sparsity constraints """
+
+    def __init__(self, max_nz, gamma=1.0):
+        self.max_nz = max_nz
+        self.gamma = gamma
+
+    def delta_Omega(self, X):
+        # For each column of X, find entries that are not among the top max_nz.
+        non_top_indices = np.argpartition(
+            -X, self.max_nz, axis=0)[self.max_nz:]
+        # Set these entries to -inf.
+        X[non_top_indices, np.arange(X.shape[1])] = -np.inf
+        max_X = np.maximum(X, 0)
+        val = np.sum(max_X ** 2, axis=0) / (2 * self.gamma)
+        G = max_X / self.gamma
+        return val, G
+
+    def max_Omega(self, X, b):
+        # For each column of X, find top max_nz values and
+        # their corresponding indices.
+        max_nz_indices = np.argpartition(
+            X,
+            kth=-self.max_nz,
+            axis=0)[-self.max_nz:]
+        max_nz_values = X[max_nz_indices, np.arange(X.shape[1])]
+
+        # Project the top max_nz values onto the simplex.
+        G_nz_values = ot.smooth.projection_simplex(
+            max_nz_values / (b * self.gamma), axis=0)
+
+        # Put the projection of max_nz_values to their original indices
+        # and set all other values zero.
+        G = np.zeros_like(X)
+        G[max_nz_indices, np.arange(X.shape[1])] = G_nz_values
+        val = np.sum(X * G, axis=0)
+        val -= 0.5 * self.gamma * b * np.sum(G * G, axis=0)
+        return val, G
+
+    def Omega(self, T):
+        return 0.5 * self.gamma * np.sum(T ** 2)
+
+
+def sparsity_constrained_ot_dual(
+        a, b, M, reg, max_nz,
+        method="L-BFGS-B", stopThr=1e-9,
+        numItermax=500, verbose=False, log=False):
+    r"""
+    Solve the sparsity-constrained OT problem in the dual and return the OT matrix.
+
+    The function solves the sparsity-contrained OT in dual formulation in
+    :ref:`[50] <references-sparsity-constrained-ot-dual>`.
+
+
+    Parameters
+    ----------
+    a : np.ndarray (ns,)
+        samples weights in the source domain
+    b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+        samples in the target domain, compute sinkhorn with multiple targets
+        and fixed :math:`\mathbf{M}` if :math:`\mathbf{b}` is a matrix
+        (return OT loss + dual variables in log)
+    M : np.ndarray (ns,nt)
+        loss matrix
+    reg : float
+        Regularization term >0
+    max_nz: int
+        Maximum number of non-zero entries permitted in each column of the
+        optimal transport matrix.
+    method : str
+        Solver to use for scipy.optimize.minimize
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshold on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    gamma : (ns, nt) ndarray
+        Optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+
+
+    .. _references-sparsity-constrained-ot-dual:
+    References
+    ----------
+    .. [50] Liu, T., Puigcerver, J., & Blondel, M. (2023). Sparsity-constrained optimal transport. Proceedings of the Eleventh International Conference on Learning Representations (ICLR).
+
+    See Also
+    --------
+    ot.lp.emd : Unregularized OT
+    ot.sinhorn : Entropic regularized OT
+    ot.smooth : Entropic regularized and squared l2 regularized OT
+    ot.optim.cg : General regularized OT
+
+    """
+
+    nx = get_backend(a, b, M)
+    max_nz = min(max_nz, M.shape[0])
+    regul = SparsityConstrained(gamma=reg, max_nz=max_nz)
+
+    a0, b0, M0 = a, b, M
+
+    # convert to humpy
+    a, b, M = nx.to_numpy(a, b, M)
+
+    # solve dual
+    alpha, beta, res = ot.smooth.solve_dual(
+        a, b, M, regul,
+        max_iter=numItermax,
+        tol=stopThr, verbose=verbose)
+
+    # reconstruct transport matrix
+    G = nx.from_numpy(ot.smooth.get_plan_from_dual(alpha, beta, M, regul),
+                      type_as=M0)
+
+    if log:
+        log = {'alpha': nx.from_numpy(alpha, type_as=a0),
+               'beta': nx.from_numpy(beta, type_as=b0), 'res': res}
+        return G, log
+    else:
+        return G
+
+
+def sparsity_constrained_ot_semi_dual(
+        a, b, M, reg, max_nz,
+        method="L-BFGS-B", stopThr=1e-9,
+        numItermax=500, verbose=False, log=False):
+    r"""
+    Solve the regularized OT problem in the semi-dual and return the OT matrix
+
+    The function solves the sparsity-contrained OT in semi-dual formulation in
+    :ref:`[50] <references-sparsity-constrained-ot-semi-dual>`.
+
+
+    Parameters
+    ----------
+    a : np.ndarray (ns,)
+        samples weights in the source domain
+    b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+        samples in the target domain, compute sinkhorn with multiple targets
+        and fixed:math:`\mathbf{M}` if :math:`\mathbf{b}` is a matrix
+        (return OT loss + dual variables in log)
+    M : np.ndarray (ns,nt)
+        loss matrix
+    reg : float
+        Regularization term >0
+    max_nz: int
+        Maximum number of non-zero entries permitted in each column of the optimal transport matrix.
+    method : str
+        Solver to use for scipy.optimize.minimize
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshold on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    gamma : (ns, nt) ndarray
+        Optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+
+
+    .. _references-sparsity-constrained-ot-semi-dual:
+    References
+    ----------
+    .. [50] Liu, T., Puigcerver, J., & Blondel, M. (2023). Sparsity-constrained optimal transport. Proceedings of the Eleventh International Conference on Learning Representations (ICLR).
+
+    See Also
+    --------
+    ot.lp.emd : Unregularized OT
+    ot.sinhorn : Entropic regularized OT
+    ot.smooth : Entropic regularized and squared l2 regularized OT
+    ot.optim.cg : General regularized OT
+
+    """
+
+    max_nz = min(max_nz, M.shape[0])
+    regul = SparsityConstrained(gamma=reg, max_nz=max_nz)
+    # solve dual
+    alpha, res = ot.smooth.solve_semi_dual(
+        a, b, M, regul, max_iter=numItermax,
+        tol=stopThr, verbose=verbose)
+
+    # reconstruct transport matrix
+    G = ot.smooth.get_plan_from_semi_dual(alpha, b, M, regul)
+
+    if log:
+        log = {'alpha': alpha, 'res': res}
+        return G, log
+    else:
+        return G
diff --git a/test/test_sparse.py b/test/test_sparse.py