Source code for pylops.optimization.solver

import time
import numpy as np

from pylops.utils.backend import get_array_module


[docs]def cg(Op, y, x0, niter=10, damp=0., tol=1e-4, show=False, callback=None):
    r"""Conjugate gradient

    Solve a square system of equations given an operator ``Op`` and
    data ``y`` using conjugate gradient iterations.

    Parameters
    ----------
    Op : :obj:`pylops.LinearOperator`
        Operator to invert of size :math:`[N \times N]`
    y : :obj:`np.ndarray`
        Data of size :math:`[N \times 1]`
    x0 : :obj:`np.ndarray`, optional
        Initial guess
    niter : :obj:`int`, optional
        Number of iterations
    damp : :obj:`float`, optional
        *Deprecated*, will be removed in v2.0.0
    tol : :obj:`float`, optional
        Tolerance on residual norm
    show : :obj:`bool`, optional
        Display iterations log
    callback : :obj:`callable`, optional
        Function with signature (``callback(x)``) to call after each iteration
        where ``x`` is the current model vector

    Returns
    -------
    x : :obj:`np.ndarray`
        Estimated model of size :math:`[N \times 1]`
    iit : :obj:`int`
        Number of executed iterations
    cost : :obj:`numpy.ndarray`, optional
        History of the L2 norm of the residual

    Notes
    -----
    Solve the :math:`\mathbf{y} = \mathbf{Opx}` problem
    using conjugate gradient iterations.

    """
    ncp = get_array_module(y)

    if show:
        tstart = time.time()
        print('CG\n'
              '-----------------------------------------------------------\n'
              'The Operator Op has %d rows and %d cols\n'
              'tol = %10e\tniter = %d' % (Op.shape[0],
                                          Op.shape[1],
                                          tol, niter))
        print('-----------------------------------------------------------')
        head1 = '    Itn           x[0]              r2norm'
        print(head1)

    if x0 is None:
        x = ncp.zeros(Op.shape[1], dtype=y.dtype)
        r = y.copy()
    else:
        x = x0.copy()
        r = y - Op.matvec(x)
    c = r.copy()
    kold = ncp.abs(r.dot(r.conj()))

    cost = np.zeros(niter + 1)
    cost[0] = np.sqrt(kold)
    iiter = 0
    while iiter < niter and kold > tol:
        Opc = Op.matvec(c)
        cOpc = ncp.abs(c.dot(Opc.conj()))
        a = kold / cOpc
        x += a * c
        r -= a * Opc
        k = ncp.abs(r.dot(r.conj()))
        b = k / kold
        c = r + b * c
        kold = k
        iiter += 1
        cost[iiter] = np.sqrt(kold)

        # run callback
        if callback is not None:
            callback(x)

        if show:
            if iiter < 10 or niter - iiter < 10 or iiter % 10 == 0:
                if not np.iscomplex(x[0]):
                    msg = '%6g        %11.4e        %11.4e' % \
                          (iiter, x[0], cost[iiter])
                else:
                    msg = '%6g     %4.1e+%4.1ej     %11.4e' % \
                          (iiter, np.real(x[0]), np.imag(x[0]), cost[iiter])
                print(msg)
    if show:
        print('\nIterations = %d        Total time (s) = %.2f'
              % (iiter, time.time() - tstart))
        print(
            '-----------------------------------------------------------------\n')
    return x, iiter, cost[:iiter]


[docs]def cgls(Op, y, x0, niter=10, damp=0., tol=1e-4,
         show=False, callback=None):
    r"""Conjugate gradient least squares

    Solve an overdetermined system of equations given an operator ``Op`` and
    data ``y`` using conjugate gradient iterations.

    Parameters
    ----------
    Op : :obj:`pylops.LinearOperator`
        Operator to invert of size :math:`[N \times M]`
    y : :obj:`np.ndarray`
        Data of size :math:`[N \times 1]`
    x0 : :obj:`np.ndarray`, optional
        Initial guess
    niter : :obj:`int`, optional
        Number of iterations
    damp : :obj:`float`, optional
        Damping coefficient
    tol : :obj:`float`, optional
        Tolerance on residual norm
    show : :obj:`bool`, optional
        Display iterations log
    callback : :obj:`callable`, optional
        Function with signature (``callback(x)``) to call after each iteration
        where ``x`` is the current model vector

    Returns
    -------
    x : :obj:`np.ndarray`
        Estimated model of size :math:`[M \times 1]`
    istop : :obj:`int`
        Gives the reason for termination

        ``1`` means :math:`\mathbf{x}` is an approximate solution to
        :math:`\mathbf{d} = \mathbf{Op}\mathbf{x}`

        ``2`` means :math:`\mathbf{x}` approximately solves the least-squares
        problem
    iit : :obj:`int`
        Iteration number upon termination
    r1norm : :obj:`float`
        :math:`||\mathbf{r}||_2`, where
        :math:`\mathbf{r} = \mathbf{d} - \mathbf{Op}\mathbf{x}`
    r2norm : :obj:`float`
        :math:`\sqrt{\mathbf{r}^T\mathbf{r}  +
        \epsilon^2 \mathbf{x}^T\mathbf{x}}`.
        Equal to ``r1norm`` if :math:`\epsilon=0`
    cost : :obj:`numpy.ndarray`, optional
        History of r1norm through iterations

    Notes
    -----
    Minimize the following functional using conjugate gradient iterations:

    .. math::
        J = || \mathbf{y} -  \mathbf{Opx} ||_2^2 +
        \epsilon^2 || \mathbf{x} ||_2^2

    where :math:`\epsilon` is the damping coefficient.

    """
    ncp = get_array_module(y)

    if show:
        tstart = time.time()
        print('CGLS\n'
              '-----------------------------------------------------------\n'
              'The Operator Op has %d rows and %d cols\n'
              'damp = %10e\ttol = %10e\tniter = %d' % (Op.shape[0],
                                                       Op.shape[1],
                                                       damp, tol, niter))
        print('-----------------------------------------------------------')
        head1 = '    Itn           x[0]              r1norm          r2norm'
        print(head1)

    damp = damp ** 2
    if x0 is None:
        x = ncp.zeros(Op.shape[1], dtype=y.dtype)
        s = y.copy()
        r = Op.rmatvec(s)
    else:
        x = x0.copy()
        s = y - Op.matvec(x)
        r = Op.rmatvec(s) - damp * x
    c = r.copy()
    q = Op.matvec(c)
    kold = ncp.abs(r.dot(r.conj()))

    cost = np.zeros(niter + 1)
    cost1 = np.zeros(niter + 1)
    cost[0] = ncp.linalg.norm(s)
    cost1[0] = ncp.sqrt(cost[0] ** 2 + damp * ncp.abs(x.dot(x.conj())))

    iiter = 0
    while iiter < niter and kold > tol:
        a = kold / (q.dot(q.conj()) + damp * c.dot(c.conj()))
        x = x + a * c
        s = s - a * q
        r = Op.rmatvec(s) - damp * x
        k = ncp.abs(r.dot(r.conj()))
        b = k / kold
        c = r + b * c
        q = Op.matvec(c)
        kold = k
        iiter += 1
        cost[iiter] = ncp.linalg.norm(s)
        cost1[iiter] = ncp.sqrt(cost[iiter] ** 2 + damp * ncp.abs(x.dot(x.conj())))

        # run callback
        if callback is not None:
            callback(x)

        if show:
            if iiter < 10 or niter - iiter < 10 or iiter % 10 == 0:
                if not np.iscomplex(x[0]):
                    msg = '%6g        %11.4e        %11.4e     %11.4e' % \
                          (iiter, x[0], cost[iiter], cost1[iiter])
                else:
                    msg = '%6g     %4.1e+%4.1ej     %11.4e     %11.4e' % \
                          (iiter, np.real(x[0]), np.imag(x[0]), cost[iiter], cost1[iiter])
                print(msg)
    if show:
        print('\nIterations = %d        Total time (s) = %.2f'
              % (iiter, time.time() - tstart))
        print(
            '-----------------------------------------------------------------\n')

    # reason for termination
    istop = 1 if kold < tol else 2
    r1norm = kold
    r2norm = cost[iiter]
    return x, istop, iiter, r1norm, r2norm, cost[:iiter]


[docs]def lsqr(Op, y, x0, damp=0., atol=1e-08, btol=1e-08, conlim=100000000.0,
         niter=10, calc_var=True, show=False, callback=None):
    r"""LSQR

    Solve an overdetermined system of equations given an operator ``Op`` and
    data ``y`` using LSQR iterations.

    Parameters
    ----------
    Op : :obj:`pylops.LinearOperator`
        Operator to invert of size :math:`[N \times M]`
    y : :obj:`np.ndarray`
        Data of size :math:`[N \times 1]`
    x0 : :obj:`np.ndarray`, optional
        Initial guess
    damp : :obj:`float`, optional
        Damping coefficient
    atol, btol : :obj:`float`, optional
        Stopping tolerances. If both are 1.0e-9, the final residual norm
        should be accurate to about 9 digits. (The final x will usually
        have fewer correct digits, depending on cond(A) and the size of damp.)
    conlim : :obj:`float`, optional
        Stopping tolerance on :math:`cond(\mathbf{Op})`
        exceeds conlim. For square, ``conlim`` could be as large as 1.0e+12.
        For least-squares problems, ``conlim`` should be less than 1.0e+8.
        Maximum precision can be obtained by setting
        ``atol = btol = conlim = 0``, but the number of iterations may
        then be excessive.
    niter : :obj:`int`, optional
        Number of iterations
    calc_var : :obj:`bool`, optional
        Estimate diagonals of :math:`(\mathbf{Op}^H\mathbf{Op} +
        \epsilon^2\mathbf{I})^{-1}`.
    show : :obj:`bool`, optional
        Display iterations log
    callback : :obj:`callable`, optional
        Function with signature (``callback(x)``) to call after each iteration
        where ``x`` is the current model vector

    Returns
    -------
    x : :obj:`np.ndarray`
        Estimated model of size :math:`[M \times 1]`
    istop : :obj:`int`
        Gives the reason for termination

        ``0`` means the exact solution is :math:`\mathbf{x}=0`

        ``1`` means :math:`\mathbf{x}` is an approximate solution to
        :math:`\mathbf{y} = \mathbf{Op}\mathbf{x}`

        ``2`` means :math:`\mathbf{x}` approximately solves the least-squares
        problem

        ``3`` means the estimate of :math:`cond(\bar{\mathbf{Op}})`
        has exceeded conlim

        ``4`` means :math:`\mathbf{y} - \mathbf{Op}\mathbf{x}` is small enough
        for this machine

        ``5`` means the least-squares solution is good enough for this machine

        ``6`` means :math:`cond(\bar{\mathbf{Op}})` seems to be too large for
        this machine

        ``7`` means the iteration limit has been reached

    r1norm : :obj:`float`
        :math:`||\mathbf{r}||_2^2`, where
        :math:`\mathbf{r} = \mathbf{y} - \mathbf{Op}\mathbf{x}`
    r2norm : :obj:`float`
        :math:`\sqrt{\mathbf{r}^T\mathbf{r}  +
        \epsilon^2 \mathbf{x}^T\mathbf{x}}`.
        Equal to ``r1norm`` if :math:`\epsilon=0`
    anorm : :obj:`float`
        Estimate of Frobenius norm of :math:`\bar{\mathbf{Op}} =
        [\mathbf{Op} \; \epsilon \mathbf{I}]`
    acond : :obj:`float`
        Estimate of :math:`cond(\bar{\mathbf{Op}})`
    arnorm : :obj:`float`
        Estimate of norm of :math:`cond(\mathbf{Op}^H\mathbf{r}-
        \epsilon^2\mathbf{x})`
    var : :obj:`float`
        Diagonals of :math:`(\mathbf{Op}^H\mathbf{Op})^{-1}` (if ``damp=0``)
        or more generally :math:`(\mathbf{Op}^H\mathbf{Op} +
        \epsilon^2\mathbf{I})^{-1}`.
    cost : :obj:`numpy.ndarray`, optional
        History of r1norm through iterations

    Notes
    -----
    Minimize the following functional using LSQR iterations [1]_:

    .. math::
        J = || \mathbf{y} -  \mathbf{Opx} ||_2^2 +
        \epsilon^2 || \mathbf{x} ||_2^2

    where :math:`\epsilon` is the damping coefficient.

    .. [1] Paige, C. C., and Saunders, M. A. "LSQR: An algorithm for sparse
        linear equations and sparse least squares", ACM TOMS, vol. 8, pp. 43-71,
        1982.

    """
    # Return messages.
    msg = ('The exact solution is x = 0                               ',
           'Opx - b is small enough, given atol, btol                  ',
           'The least-squares solution is good enough, given atol     ',
           'The estimate of cond(Opbar) has exceeded conlim            ',
           'Opx - b is small enough for this machine                   ',
           'The least-squares solution is good enough for this machine',
           'Cond(Opbar) seems to be too large for this machine         ',
           'The iteration limit has been reached                      ')

    ncp = get_array_module(y)
    m, n = Op.shape

    var = None
    if calc_var:
        var = ncp.zeros(n)

    if show:
        tstart = time.time()
        print('LSQR')
        print('-------------------------------------------------')
        str1 = 'The Operator Op has %d rows and %d cols' % (m, n)
        str2 = 'damp = %20.14e     calc_var = %6g' %(damp, calc_var)
        str3 = 'atol = %8.2e                 conlim = %8.2e' %(atol, conlim)
        str4 = 'btol = %8.2e                 niter = %8g' %(btol, niter)
        print(str1)
        print(str2)
        print(str3)
        print(str4)
        print('-------------------------------------------------')

    itn = 0
    istop = 0
    ctol = 0
    if conlim > 0:
        ctol = 1. / conlim
    anorm = 0
    acond = 0
    dampsq = damp ** 2
    ddnorm = 0
    res2 = 0
    xnorm = 0
    xxnorm = 0
    z = 0
    cs2 = -1
    sn2 = 0

    # set up the first vectors u and v for the bidiagonalization.
    # These satisfy beta*u=b-Op(x0), alfa*v=Op'u
    if x0 is None:
        x = ncp.zeros(Op.shape[1], dtype=y.dtype)
        u = y.copy()
    else:
        x = x0.copy()
        u = y - Op.matvec(x0)
    alfa = 0.
    beta = ncp.linalg.norm(u)
    if beta > 0.:
        u = u / beta
        v = Op.rmatvec(u)
        alfa = ncp.linalg.norm(v)
        if alfa > 0:
            v = v / alfa
            w = v.copy()

    arnorm = alfa * beta
    if arnorm == 0:
        print(' ')
        print('LSQR finished')
        print(msg[istop])
        return x, istop, itn, 0, 0, anorm, acond, arnorm, xnorm, var
    arnorm0 = arnorm

    rhobar = alfa
    phibar = beta
    bnorm = beta
    rnorm = beta
    r1norm = rnorm
    r2norm = rnorm
    cost = np.zeros(niter + 1)
    cost[0] = rnorm
    head1 = '   Itn      x[0]       r1norm     r2norm '
    head2 = ' Compatible   LS      Norm A   Cond A'

    if show:
        print(' ')
        print(head1 + head2)
        test1 = 1
        test2 = alfa / beta
        str1 = '%6g %12.5e' %(itn,   x[0])
        str2 = ' %10.3e %10.3e' %(r1norm, r2norm)
        str3 = '  %8.1e %8.1e' %(test1,  test2)
        print(str1 + str2 + str3)

    # main iteration loop
    while itn < niter:
        itn = itn + 1
        # perform the next step of the bidiagonalization to obtain the
        # next beta, u, alfa, v. These satisfy the relations
        # beta*u = Op*v - alfa*u,
        # alfa*v = Op'*u - beta*v'
        u = Op.matvec(v) - alfa * u
        beta = ncp.linalg.norm(u)
        if beta > 0:
            u = u / beta
            anorm = np.linalg.norm([anorm, alfa, beta, damp])
            v = Op.rmatvec(u) - beta * v
            alfa = ncp.linalg.norm(v)
            if alfa > 0:
                v = v / alfa

        # use a plane rotation to eliminate the damping parameter.
        # This alters the diagonal (rhobar) of the lower-bidiagonal matrix.
        rhobar1 = np.linalg.norm([rhobar, damp])
        cs1 = rhobar / rhobar1
        sn1 = damp / rhobar1
        psi = sn1 * phibar
        phibar = cs1 * phibar

        # use a plane rotation to eliminate the subdiagonal element (beta)
        # of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix.
        rho = np.linalg.norm([rhobar1, beta])
        cs = rhobar1 / rho
        sn = beta / rho
        theta = sn * alfa
        rhobar = -cs * alfa
        phi = cs * phibar
        phibar = sn * phibar
        tau = sn * phi

        # update x and w.
        t1 = phi / rho
        t2 = -theta / rho
        dk = w / rho
        x = x + t1 * w
        w = v + t2 * w
        ddnorm = ddnorm + ncp.linalg.norm(dk) ** 2
        if calc_var:
            var = var + ncp.dot(dk, dk)

        # use a plane rotation on the right to eliminate the
        # super-diagonal element (theta) of the upper-bidiagonal matrix.
        # Then use the result to estimate norm(x).
        delta = sn2 * rho
        gambar = - cs2 * rho
        rhs = phi - delta * z
        zbar = rhs / gambar
        xnorm = ncp.sqrt(xxnorm + zbar**2)
        gamma = np.linalg.norm([gambar, theta])
        cs2 = gambar / gamma
        sn2 = theta / gamma
        z = rhs / gamma
        xxnorm = xxnorm + z ** 2.

        # test for convergence. First, estimate the condition of the matrix
        # Opbar, and the norms of rbar and Opbar'rbar
        acond = anorm * ncp.sqrt(ddnorm)
        res1 = phibar ** 2
        res2 = res2 + psi ** 2
        rnorm = ncp.sqrt(res1 + res2)
        arnorm = alfa * abs(tau)

        # distinguish between r1norm = ||b - Ax|| and
        # r2norm = sqrt(r1norm^2 + damp^2*||x||^2).
        # Estimate r1norm = sqrt(r2norm^2 - damp^2*||x||^2).
        # Although there is cancellation, it might be accurate enough.
        r1sq = rnorm ** 2 - dampsq * xxnorm
        r1norm = ncp.sqrt(ncp.abs(r1sq))
        cost[itn] = r1norm
        if r1sq < 0:
            r1norm = - r1norm
        r2norm = rnorm.copy()

        # use these norms to estimate certain other quantities,
        # some of which will be small near a solution.
        test1 = rnorm / bnorm
        test2 = arnorm / arnorm0
        test3 = 1. / acond
        t1 = test1 / (1. + anorm * xnorm / bnorm)
        rtol = btol + atol * anorm * xnorm / bnorm

        # set reason for termination.
        # The following tests guard against extremely small values of
        # atol, btol  or ctol. The effect is equivalent to the normal tests
        # using atol = eps,  btol = eps, conlim = 1/eps.
        if itn >= niter:
            istop = 7
        if 1 + test3 <= 1:
            istop = 6
        if 1 + test2 <= 1:
            istop = 5
        if 1 + t1 <= 1:
            istop = 4

        # allow for tolerances set by the user.
        if test3 <= ctol:
            istop = 3
        if test2 <= atol:
            istop = 2
        if test1 <= rtol:
            istop = 1

        # run callback
        if callback is not None:
            callback(x)

        # print status
        if show:
            if n <= 40 or itn <= 10 or itn >= niter-10 or itn % 10 == 0 or \
                    test3 <= 2*ctol or test2 <= 10*atol or \
                    test1 <= 10*rtol or istop != 0:
                str1 = '%6g %12.5e' %(itn, x[0])
                str2 = ' %10.3e %10.3e' %(r1norm, r2norm)
                str3 = '  %8.1e %8.1e' %(test1, test2)
                str4 = ' %8.1e %8.1e' %(anorm, acond)
                print(str1 + str2 + str3 + str4)
        if istop > 0:
            break

    # Print the stopping condition.
    if show:
        print(' ')
        print('LSQR finished, %s' % msg[istop])
        print(' ')
        str1 = 'istop =%8g   r1norm =%8.1e' %(istop, r1norm)
        str2 = 'anorm =%8.1e   arnorm =%8.1e' %(anorm, arnorm)
        str3 = 'itn   =%8g   r2norm =%8.1e' %(itn, r2norm)
        str4 = 'acond =%8.1e   xnorm  =%8.1e' %( acond, xnorm)
        str5 = 'Total time (s) = %.2f' % (time.time() - tstart)
        print(str1 +'   '+ str2)
        print(str3 +'   '+ str4)
        print(str5)
        print('-----------------------------------------------------------------------\n')

    return x, istop, itn, r1norm, r2norm, anorm, acond, \
           arnorm, xnorm, var, cost[:itn]