[sdpb] 126/233: Finished commenting SDPSolver.cpp
Tobias Hansen
thansen at moszumanska.debian.org
Thu Mar 9 04:06:28 UTC 2017
This is an automated email from the git hooks/post-receive script.
thansen pushed a commit to branch master
in repository sdpb.
commit b2804385dcecbd23c082a3339b32ef077d7cfefe
Author: David Simmons-Duffin <dsd at minerva.sns.ias.edu>
Date: Sat Jan 10 01:29:05 2015 -0500
Finished commenting SDPSolver.cpp
---
src/SDP.h | 24 +--
src/SDPSolver.cpp | 562 +++++++++++++++++++++++++++++++++++++++---------------
src/SDPSolver.h | 20 +-
3 files changed, 430 insertions(+), 176 deletions(-)
diff --git a/src/SDP.h b/src/SDP.h
index 70637b1..f3f900f 100644
--- a/src/SDP.h
+++ b/src/SDP.h
@@ -66,12 +66,12 @@ using std::ostream;
// constraint matrices A_p are given by
//
// A_(j,r,s,k) = \sum_{b \in blocks[j]}
-// Block_b(\chi_{b,k} \chi_{b,k}^T \otimes E^{rs}),
+// Block_b(v_{b,k} v_{b,k}^T \otimes E^{rs}),
//
// where
// - E^{rs} is the symmetrization of the m_j x m_j matrix with a
// 1 in entry (r,s) and zeros elsewhere.
-// - \chi_{b,k} is a vector of length (delta_b+1)
+// - v_{b,k} is a vector of length (delta_b+1)
// - \otimes denotes a tensor product
// - each block above thus has dimension (delta_b+1)*m_j.
// - blocks[j_1] and blocks[j_2] are disjoint if j_1 != j_2. Thus,
@@ -92,20 +92,20 @@ class IndexTuple {
int p; // overall index of the constraint
int r; // first index for E^{rs}
int s; // second index for E^{rs}
- int k; // index for \chi_{b,k}
+ int k; // index for v_{b,k}
IndexTuple(int p, int r, int s, int k): p(p), r(r), s(s), k(k) {}
IndexTuple() {}
};
class SDP {
public:
- // bilinearBases is a vector of Matrices encoding the \chi_{b,k}
- // that enter the constraint matrices. Specifically, \chi_{b,k} are
+ // bilinearBases is a vector of Matrices encoding the v_{b,k}
+ // that enter the constraint matrices. Specifically, v_{b,k} are
// the columns of bilinearBases[b],
//
- // bilinearBases[b].elt(m,k) = (\chi_{b,k})_m (0 <= b < bMax,
- // 0 <= k <= d_j,
- // 0 <= m <= delta_b)
+ // bilinearBases[b].elt(m,k) = (v_{b,k})_m (0 <= b < bMax,
+ // 0 <= k <= d_j,
+ // 0 <= m <= delta_b)
//
vector<Matrix> bilinearBases;
@@ -170,7 +170,7 @@ class SDP {
// Dimensions of the blocks of X,Y (0 <= b < bMax)
//
- // psdMatrixBlockDims()[b] = (delta_b+1)*m_j = length(\chi_{b,*})*m_j
+ // psdMatrixBlockDims()[b] = (delta_b+1)*m_j = length(v_{b,*})*m_j
//
vector<int> psdMatrixBlockDims() const {
vector<int> dims;
@@ -254,7 +254,7 @@ class DualConstraintGroup {
// constraintConstants = c, a vector of length P'
Vector constraintConstants;
- // bilinearBases is a vector of Matrices encoding the \chi_{b,k}
+ // bilinearBases is a vector of Matrices encoding the v_{b,k}
// entering the constraint matrices A_p, as described
// above. `bilinearBases' here has the structure of
// `bilinearBases[j]' above for some fixed j.
@@ -294,11 +294,11 @@ class PolynomialVectorMatrix {
vector<vector<Polynomial> > elements;
// A list of real numbers x_k (0 <= k <= degree(M)) at which to
- // sample M(x) to construct the \chi_{b,k}.
+ // sample M(x) to construct the v_{b,k}.
vector<Real> samplePoints;
// A list of real numbers s_k (0 <= k <= degree(M)) to scale M(x_k)
- // and the corresponding \chi_{b,k}.
+ // and the corresponding v_{b,k}.
vector<Real> sampleScalings;
// bilinearBasis[m] = q_m(x) (0 <= m <= degree/2), where q_m is a
diff --git a/src/SDPSolver.cpp b/src/SDPSolver.cpp
index 7742c29..210e0c0 100644
--- a/src/SDPSolver.cpp
+++ b/src/SDPSolver.cpp
@@ -18,6 +18,26 @@ using boost::filesystem::path;
using boost::timer::nanosecond_type;
using std::cout;
+// A note on function conventions: For functions which return large
+// objects, we usually use references to the returned objects as
+// arguments to a void function, which are then modified in place:
+//
+// void myFunction(const InputType1 &input1,
+// const InputType2 &input2,
+// ...
+// OutputType1 &output1,
+// OutputType2 &output2,
+// ...) {
+// use input1, etc. to modify output1, etc.
+// }
+//
+// We try to mark large inputs as `const' references. However,
+// several MBLAS and MLAPACK functions are not correctly annotated
+// with `const's, so we occasionally have input arguments which cannot
+// be marked as const because they're used in MBLAS/MLAPACK functions.
+// We try to use comments to distinguish which arguments should be
+// considered inputs and which should be considered outputs.
+
/***********************************************************************/
// Create and initialize an SDPSolver for the given SDP and
// SDPSolverParameters
@@ -41,12 +61,12 @@ SDPSolver::SDPSolver(const SDP &sdp, const SDPSolverParameters ¶meters):
R(X),
BilinearPairingsXInv(sdp.bilinearPairingBlockDims()),
BilinearPairingsY(BilinearPairingsXInv),
- SchurBlocks(sdp.schurBlockDims()),
- SchurBlocksCholesky(SchurBlocks),
+ SchurComplement(sdp.schurBlockDims()),
+ SchurComplementCholesky(SchurComplement),
SchurOffDiagonal(sdp.FreeVarMatrix),
- schurStabilizeIndices(SchurBlocks.blocks.size()),
- schurStabilizeLambdas(SchurBlocks.blocks.size()),
- stabilizeBlocks(SchurBlocks.blocks.size()),
+ schurStabilizeIndices(SchurComplement.blocks.size()),
+ schurStabilizeLambdas(SchurComplement.blocks.size()),
+ stabilizeBlocks(SchurComplement.blocks.size()),
Q(sdp.FreeVarMatrix.cols, sdp.FreeVarMatrix.cols),
Qpivots(sdp.FreeVarMatrix.cols),
dyExtended(Q.rows),
@@ -75,19 +95,21 @@ SDPSolver::SDPSolver(const SDP &sdp, const SDPSolverParameters ¶meters):
// Inputs:
// - A : l*m x l*m symmetric matrix
// - Q : l x n matrix
+// Workspace:
// - Work : l*m x n*m matrix, intermediate workspace (overwritten)
+// Output:
// - Result : n*m x n*m symmetric matrix (overwritten)
//
// An explanation of the name: a 'congruence' refers to the action M
-// -> A M A^T. We use 'congruence transpose' to refer to a congruence
+// -> A M A^T. We use 'transpose congruence' to refer to a congruence
// with the transposed matrix M -> A^T M A. 'tensor' here refers to
// the fact that we're performing a congruence with the tensor product
// Q \otimes 1.
//
-void tensorMatrixCongruenceTranspose(const Matrix &A,
- const Matrix &Q,
- Matrix &Work,
- Matrix &Result) {
+void tensorTransposeCongruence(const Matrix &A,
+ const Matrix &Q,
+ Matrix &Work,
+ Matrix &Result) {
int m = A.rows / Q.rows;
assert(Result.rows == Q.cols * m);
@@ -133,18 +155,36 @@ void tensorMatrixCongruenceTranspose(const Matrix &A,
}
}
+// Result_b = Q[b]'^T A_b Q[b]' for each block 0 <= b < Q.size()
+// - Result_b, A_b denote the b-th blocks of Result, A, resp.
+// - Q[b]' = Q[b] \otimes 1, where \otimes denotes tensor product
+// - for each b, L.blocks[b], Q[b], Work[b], and Result.blocks[b] must
+// have the structure described above for
+// `tensorTransposeCongruence'
+//
+void blockTensorTransposeCongruence(const BlockDiagonalMatrix &A,
+ const vector<Matrix> &Q,
+ vector<Matrix> &Work,
+ BlockDiagonalMatrix &Result) {
+ #pragma omp parallel for schedule(dynamic)
+ for (unsigned int b = 0; b < Q.size(); b++)
+ tensorTransposeCongruence(A.blocks[b], Q[b], Work[b], Result.blocks[b]);
+}
+
// Result = Q'^T A^{-1} Q', where Q' = Q \otimes 1, where \otimes
// denotes tensor product and 1 is an mxm idenity matrix.
// Inputs:
// - L : l*m x l*m cholesky decomposition of A
// - Q : l x n matrix
+// Workspace:
// - Work : l*m x n*m matrix, intermediate workspace (overwritten)
+// Output:
// - Result : n*m x n*m symmetric matrix (overwritten)
//
-void tensorMatrixInvCongruenceTransposeWithCholesky(const Matrix &L,
- const Matrix &Q,
- Matrix &Work,
- Matrix &Result) {
+void tensorInvTransposeCongruenceWithCholesky(const Matrix &L,
+ const Matrix &Q,
+ Matrix &Work,
+ Matrix &Result) {
// Work = L^{-1} (Q \otimes 1);
for (int cw = 0; cw < Work.cols; cw++) {
int mc = cw / Q.cols;
@@ -178,6 +218,22 @@ void tensorMatrixInvCongruenceTransposeWithCholesky(const Matrix &L,
}
}
+// Result_b = Q[b]'^T A_b^{-1} Q[b]' for each block 0 <= b < Q.size()
+// - Result_b, A_b denote the b-th blocks of Result, A, resp.
+// - Q[b]' = Q[b] \otimes 1, where \otimes denotes tensor product
+// - for each b, L.blocks[b], Q[b], Work[b], and Result.blocks[b] must
+// have the structure described above for
+// `tensorInvTransposeCongruenceWithCholesky'
+//
+void blockTensorInvTransposeCongruenceWithCholesky(const BlockDiagonalMatrix &L,
+ const vector<Matrix> &Q,
+ vector<Matrix> &Work,
+ BlockDiagonalMatrix &Result) {
+ #pragma omp parallel for schedule(dynamic)
+ for (unsigned int b = 0; b < Q.size(); b++)
+ tensorInvTransposeCongruenceWithCholesky(L.blocks[b], Q[b],
+ Work[b], Result.blocks[b]);
+}
// Result^(blockRow,blockCol) = V D V^T, where D=diag(d) is a diagonal
// matrix.
@@ -192,6 +248,7 @@ void tensorMatrixInvCongruenceTransposeWithCholesky(const Matrix &L,
// - V : V.rows x V.cols Matrix
// - blockRow : integer < k
// - blockCol : integer < k
+// Output:
// - Result : (k*V.rows) x (k*V.rows) square Matrix (overwritten)
//
void diagonalCongruence(Real const *d,
@@ -213,8 +270,8 @@ void diagonalCongruence(Real const *d,
}
}
-// v^T A' v, where A' is the (blockRow,blockCol)-th dim x dim block
-// inside A.
+// v^T A^(blockRow, blockCol) v, where A^(r,s) is the (r,s)-th dim x
+// dim block inside A.
//
// Input:
// - v : pointer to the beginning of a vector of length dim
@@ -222,6 +279,7 @@ void diagonalCongruence(Real const *d,
// - A : (k*dim) x (k*dim) matrix, where k > blockRow, blockCol
// - blockRow : integer labeling block of A
// - blockCol : integer labeling block of A
+// Output: v^T A^(blockRow, blockCol) v (returned)
//
Real bilinearBlockPairing(const Real *v,
const int dim,
@@ -241,48 +299,25 @@ Real bilinearBlockPairing(const Real *v,
}
/***********************************************************************/
-// Subroutines needed for each iteration
+// Subroutines needed for each solver iteration
-// Result_b = Q[b]'^T A_b Q[b]' for each block 0 <= b < Q.size()
-// - Result_b, A_b denote the b-th blocks of Result, A, resp.
-// - Q[b]' = Q[b] \otimes 1, where \otimes denotes tensor product
-//
-// This is just tensorMatrixCongruenceTranspose for each block of a
-// BlockDiagonalMatrix.
+// Compute the SchurComplement matrix using BilinearPairingsXInv and
+// BilinearPairingsY and the formula
//
-void computeBilinearPairings(const BlockDiagonalMatrix &A,
- const vector<Matrix> &Q,
- vector<Matrix> &workspace,
- BlockDiagonalMatrix &Result) {
- #pragma omp parallel for schedule(dynamic)
- for (unsigned int b = 0; b < Q.size(); b++)
- tensorMatrixCongruenceTranspose(A.blocks[b], Q[b],
- workspace[b], Result.blocks[b]);
-}
-
-// Result_b = Q[b]'^T A_b^{-1} Q[b]' for each block 0 <= b < Q.size()
-// - Result_b, A_b denote the b-th blocks of Result, A, resp.
-// - Q[b]' = Q[b] \otimes 1, where \otimes denotes tensor product
+// S_{(j,r1,s1,k1), (j,r2,s2,k2)} = \sum_{b \in blocks[j]}
+// (1/4) (BilinearPairingsXInv_{ej s1 + k1, ej r2 + k2}*
+// BilinearPairingsY_{ej s2 + k2, ej r1 + k1} +
+// swaps (r1 <-> s1) and (r2 <-> s2))
//
-// This is just tensorMatrixInvCongruenceTransposeWithCholesky for
-// each block of a BlockDiagonalMatrix.
+// where ej = d_j + 1.
//
-void computeInvBilinearPairingsWithCholesky(const BlockDiagonalMatrix &L,
- const vector<Matrix> &Q,
- vector<Matrix> &workspace,
- BlockDiagonalMatrix &Result) {
- #pragma omp parallel for schedule(dynamic)
- for (unsigned int b = 0; b < Q.size(); b++)
- tensorMatrixInvCongruenceTransposeWithCholesky(L.blocks[b],
- Q[b],
- workspace[b],
- Result.blocks[b]);
-}
-
-void computeSchurBlocks(const SDP &sdp,
- const BlockDiagonalMatrix &BilinearPairingsXInv,
- const BlockDiagonalMatrix &BilinearPairingsY,
- BlockDiagonalMatrix &SchurBlocks) {
+// Inputs: sdp, BilinearPairingsXInv, BilinearPairingsY
+// Output: SchurComplement (overwritten)
+//
+void computeSchurComplement(const SDP &sdp,
+ const BlockDiagonalMatrix &BilinearPairingsXInv,
+ const BlockDiagonalMatrix &BilinearPairingsY,
+ BlockDiagonalMatrix &SchurComplement) {
#pragma omp parallel for schedule(dynamic)
for (unsigned int j = 0; j < sdp.dimensions.size(); j++) {
const int ej = sdp.degrees[j] + 1;
@@ -309,14 +344,27 @@ void computeSchurBlocks(const SDP &sdp,
BilinearPairingsXInv.blocks[*b].elt(ej_r1 + k1, ej_s2 + k2) *
BilinearPairingsY .blocks[*b].elt(ej_r2 + k2, ej_s1 + k1))/4;
}
- SchurBlocks.blocks[j].elt(u1, u2) = tmp;
+ SchurComplement.blocks[j].elt(u1, u2) = tmp;
if (u2 != u1)
- SchurBlocks.blocks[j].elt(u2, u1) = tmp;
+ SchurComplement.blocks[j].elt(u2, u1) = tmp;
}
}
}
}
+// dualResidues[p] = primalObjective[p] - Tr(A_p Y) - (FreeVarMatrix y)_p,
+// for 0 <= p < primalObjective.size()
+//
+// The pairings Tr(A_p Y) can be written in terms of BilinearPairingsY:
+//
+// Tr(A_(j,r,s,k) Y) = \sum_{b \in blocks[j]}
+// (1/2) (BilinearPairingsY_{ej r + k, ej s + k} +
+// swap (r <-> s))
+// where ej = d_j + 1.
+//
+// Inputs: sdp, y, BilinearPairingsY
+// Output: dualResidues (overwriten)
+//
void computeDualResidues(const SDP &sdp,
const Vector &y,
const BlockDiagonalMatrix &BilinearPairingsY,
@@ -333,6 +381,7 @@ void computeDualResidues(const SDP &sdp,
const int ej_s = t->s * ej;
const int k = t->k;
+ // dualResidues[p] = -Tr(A_p Y)
dualResidues[p] = 0;
for (vector<int>::const_iterator b = sdp.blocks[j].begin();
b != sdp.blocks[j].end(); b++) {
@@ -341,20 +390,38 @@ void computeDualResidues(const SDP &sdp,
}
dualResidues[p] /= 2;
+ // dualResidues[p] = -Tr(A_p Y) - (FreeVarMatrix y)_p
for (int n = 0; n < sdp.FreeVarMatrix.cols; n++)
dualResidues[p] -= sdp.FreeVarMatrix.elt(p, n)*y[n];
+
+ // dualResidues[p] = primalObjective[p] - Tr(A_p Y) - (FreeVarMatrix y)_p
dualResidues[p] += sdp.primalObjective[p];
}
}
}
+// Result = \sum_p a[p] A_p,
+//
+// where a[p] is a vector of length primalObjective.size() and the
+// constraint matrices A_p are given by
+//
+// A_(j,r,s,k) = \sum_{b \in blocks[j]}
+// Block_b(v_{b,k} v_{b,k}^T \otimes E^{rs}),
+//
+// where v_{b,k} is the k-th column of bilinearBases[b], as described
+// in SDP.h.
+//
+// Inputs: sdp, a
+// Output: Result (overwritten)
+//
void constraintMatrixWeightedSum(const SDP &sdp,
- const Vector x,
- BlockDiagonalMatrix &result) {
+ const Vector a,
+ BlockDiagonalMatrix &Result) {
#pragma omp parallel for schedule(dynamic)
for (unsigned int j = 0; j < sdp.dimensions.size(); j++) {
const int ej = sdp.degrees[j] + 1;
+ // For each j, t points to the first IndexTuple corresponding to j
for (vector<IndexTuple>::const_iterator t = sdp.constraintIndices[j].begin();
t != sdp.constraintIndices[j].end();
t += ej) {
@@ -365,14 +432,22 @@ void constraintMatrixWeightedSum(const SDP &sdp,
for (vector<int>::const_iterator b = sdp.blocks[j].begin();
b != sdp.blocks[j].end(); b++) {
- diagonalCongruence(&x[p], sdp.bilinearBases[*b], r, s,
- result.blocks[*b]);
+
+ // Result.blocks[b]^(r,s) = V diag(a') V^T, where
+ // V=sdp.bilinearBases[b], a' denotes the subvector of a
+ // corresponding to j, and M^(r,s) denotes the (r,s)-th block
+ // of M.
+ diagonalCongruence(&a[p], sdp.bilinearBases[*b], r, s, Result.blocks[*b]);
+
+ // Result should be symmetric, so if r != s, we must divide
+ // the (r,s)-th block of Result.blocks[b] by 2 and copy its
+ // transpose to the (s,r)-th block.
if (r != s) {
const int u = sdp.bilinearBases[*b].rows;
for (int m = r*u; m < (r+1)*u; m++) {
for (int n = s*u; n < (s+1)*u; n++) {
- result.blocks[*b].elt(m, n) /= 2;
- result.blocks[*b].elt(n, m) = result.blocks[*b].elt(m, n);
+ Result.blocks[*b].elt(m, n) /= 2;
+ Result.blocks[*b].elt(n, m) = Result.blocks[*b].elt(m, n);
}
}
}
@@ -381,15 +456,40 @@ void constraintMatrixWeightedSum(const SDP &sdp,
}
}
+// Compute the vectors r and s on the right-hand side of the Schur
+// complement equation:
+//
+// {{S, -B}, {B^T, 0}} . {dx, dy} = {r_x, r_y}
+//
+// where S = SchurComplement and B = FreeVarMatrix. Specifically,
+//
+// r_x[p] = -dualResidues[p] - Tr(A_p Z) for 0 <= p < P
+// r_y[n] = dualObjective[n] - (FreeVarMatrix^T x)_n for 0 <= n < N
+//
+// where P = primalObjective.size(), N = dualObjective.size()
+//
+// Inputs:
+// - sdp, an SDP
+// - dualResidues, a Vector of length P
+// - Z = X^{-1} (PrimalResidues Y - R)
+// - x, a vector of length P
+// Outputs:
+// - r_x, a Vector of length P
+// - sy, a Vector of length N
+//
void computeSchurRHS(const SDP &sdp,
const Vector &dualResidues,
const BlockDiagonalMatrix &Z,
const Vector &x,
- Vector &r,
- Vector &s) {
- for (unsigned int p = 0; p < r.size(); p++)
- r[p] = -dualResidues[p];
-
+ Vector &r_x,
+ Vector &r_y) {
+ // r_x[p] = -dualResidues[p]
+ for (unsigned int p = 0; p < r_x.size(); p++)
+ r_x[p] = -dualResidues[p];
+
+ // r_x[p] = -dualResidues[p] - Tr(A_p Z), where A_p are as described
+ // in SDP.h. The trace can be computed in terms of bilinearBases
+ // using bilinearBlockPairing.
#pragma omp parallel for schedule(dynamic)
for (unsigned int j = 0; j < sdp.dimensions.size(); j++) {
for (vector<IndexTuple>::const_iterator t = sdp.constraintIndices[j].begin();
@@ -397,25 +497,29 @@ void computeSchurRHS(const SDP &sdp,
t++) {
for (vector<int>::const_iterator b = sdp.blocks[j].begin();
b != sdp.blocks[j].end(); b++) {
- const int delta = sdp.bilinearBases[*b].rows;
+ const int h = sdp.bilinearBases[*b].rows;
// Pointer to the k-th column of sdp.bilinearBases[*b]
- const Real *q = &sdp.bilinearBases[*b].elements[(t->k) * delta];
+ const Real *q = &sdp.bilinearBases[*b].elements[(t->k) * h];
- r[t->p] -= bilinearBlockPairing(q, delta, Z.blocks[*b], t->r, t->s);
+ r_x[t->p] -= bilinearBlockPairing(q, h, Z.blocks[*b], t->r, t->s);
}
}
}
+ // r_y[n] = dualObjective[n] - (FreeVarMatrix^T x)_n
#pragma omp parallel for schedule(static)
for (unsigned int n = 0; n < sdp.dualObjective.size(); n++) {
- s[n] = sdp.dualObjective[n];
+ r_y[n] = sdp.dualObjective[n];
for (int p = 0; p < sdp.FreeVarMatrix.rows; p++) {
- s[n] -= sdp.FreeVarMatrix.elt(p, n)*x[p];
+ r_y[n] -= sdp.FreeVarMatrix.elt(p, n)*x[p];
}
}
}
-// PrimalResidues = sum_p F_p x_p - X - F_0
+// PrimalResidues = \sum_p A_p x[p] - X
+//
+// Inputs: sdp, x, X
+// Output: PrimalResidues (overwritten)
//
void computePrimalResidues(const SDP &sdp,
const Vector x,
@@ -425,11 +529,13 @@ void computePrimalResidues(const SDP &sdp,
PrimalResidues -= X;
}
+// Centering parameter \beta_p for the predictor step
Real predictorCenteringParameter(const SDPSolverParameters ¶meters,
const bool isPrimalDualFeasible) {
return isPrimalDualFeasible ? Real(0) : parameters.infeasibleCenteringParameter;
}
+// Centering parameter \beta_c for the corrector step
Real correctorCenteringParameter(const SDPSolverParameters ¶meters,
const BlockDiagonalMatrix &X,
const BlockDiagonalMatrix &dX,
@@ -446,91 +552,206 @@ Real correctorCenteringParameter(const SDPSolverParameters ¶meters,
return max(parameters.infeasibleCenteringParameter, beta);
}
-Real stepLength(BlockDiagonalMatrix &XCholesky,
- BlockDiagonalMatrix &dX,
- BlockDiagonalMatrix &XInvDX,
+// min(gamma \alpha(M, dM), 1), where \alpha(M, dM) denotes the
+// largest positive real number such that M + \alpha dM is positive
+// semidefinite.
+//
+// \alpha(M, dM) is computed with a Cholesky decomposition M = L L^T.
+// The eigenvalues of M + \alpha dM are equal to the eigenvalues of 1
+// + \alpha L^{-1} dM L^{-T}. The correct \alpha is then -1/lambda,
+// where lambda is the smallest eigenvalue of L^{-1} dM L^{-T}.
+//
+// Inputs:
+// - MCholesky = L, the Cholesky decomposition of M (M itself is not needed)
+// - dM, a BlockDiagonalMatrix with the same structure as M
+// Workspace:
+// - MInvDM
+// - eigenvalues, a Vector of eigenvalues for each block of M
+// - workspace, a vector of Vectors needed by the minEigenvalue function
+// Output:
+// - min(\gamma \alpha(M, dM), 1) (returned)
+//
+Real stepLength(BlockDiagonalMatrix &MCholesky,
+ BlockDiagonalMatrix &dM,
+ BlockDiagonalMatrix &MInvDM,
vector<Vector> &eigenvalues,
vector<Vector> &workspace,
- const SDPSolverParameters ¶meters) {
- // XInvDX = L^{-1} dX L^{-1 T}, where X = L L^T
- XInvDX.copyFrom(dX);
- lowerTriangularInverseCongruence(XInvDX, XCholesky);
+ const Real gamma) {
+ // MInvDM = L^{-1} dM L^{-T}, where M = L L^T
+ MInvDM.copyFrom(dM);
+ lowerTriangularInverseCongruence(MInvDM, MCholesky);
- const Real lambda = minEigenvalue(XInvDX, workspace, eigenvalues);
- const Real gamma = parameters.stepLengthReduction;
+ const Real lambda = minEigenvalue(MInvDM, workspace, eigenvalues);
if (lambda > -gamma)
return 1;
else
return -gamma/lambda;
}
+// Compute the quantities needed to solve the Schur complement
+// equation
+//
+// {{S, -B}, {B^T, 0}} . {dx, dy} = {r, s}
+//
+// (where S = SchurComplement, B = FreeVarMatrix), using the method
+// described in the manual:
+//
+// - Compute S using BilinearPairingsXInv and BilinearPairingsY.
+//
+// - Stabilize S by adding a low-rank update S' = S + U U^T and
+// compute the Cholesky decomposition S' = L' L'^T.
+//
+// - Form B' = (B U) and compute
+//
+// - SchurOffDiagonal = L'^{-1} B
+// - L'^{-1} U (this is stored implicitly in the stabilizeBlock* variables)
+// - Q = (L'^{-1} B')^T (L'^{-1} B') - {{0, 0}, {0, 1}}
+//
+// - Compute the LU decomposition of Q.
+//
+// This data is sufficient to efficiently solve the above equation for
+// a given r,s.
+//
+// Inputs:
+// - BilinearPairingsXInv, BilinearPairingsY (these are members of
+// SDPSolver, but we include them as arguments to emphasize that
+// they must be computed first)
+// - choleskyStabilizeThreshold: the real constant \theta used in
+// stabilizing S
+// Workspace (members of SDPSolver which are modified by this method
+// and not used later):
+// - SchurComplement
+// - schurStabilizeIndices
+// - schurStabilizeLambdas
+// Outputs (members of SDPSolver which are modified by this method and
+// used later):
+// - SchurComplementCholesky
+// - SchurOffDiagonal
+// - stabilizeBlockIndices
+// - stabilizeBlockUpdateRow
+// - stabilizeBlockUpdateColumn
+// - stabilizeBlocks
+// - Q, Qpivots
+//
void SDPSolver::initializeSchurComplementSolver(const BlockDiagonalMatrix &BilinearPairingsXInv,
const BlockDiagonalMatrix &BilinearPairingsY,
const Real &choleskyStabilizeThreshold) {
- computeSchurBlocks(sdp, BilinearPairingsXInv, BilinearPairingsY, SchurBlocks);
- choleskyDecompositionStabilized(SchurBlocks, SchurBlocksCholesky,
+ computeSchurComplement(sdp, BilinearPairingsXInv, BilinearPairingsY, SchurComplement);
+
+ // compute SchurComplementCholesky = L', where
+ //
+ // L' L'^T = S' = SchurComplement + U U^T
+ //
+ // Here, the 'update' matrix U has columns given by
+ //
+ // U = ( Lambda_{p_1} e_{p_1}, ..., Lambda_{p_M} e_{p_M} )
+ //
+ // where e_p is a unit vector in the p-th direction and the
+ // Lambda_{p_m} are constants. The p_i are given by
+ // schurStabilizeIndices and the corresponding Lambda_i by
+ // schurStabilizeLambdas.
+ //
+ choleskyDecompositionStabilized(SchurComplement, SchurComplementCholesky,
schurStabilizeIndices,
schurStabilizeLambdas,
cast2double(choleskyStabilizeThreshold));
- // SchurOffDiagonal = {{- 1, 0}, {E, G}}
+ // SchurOffDiagonal = L'^{-1} FreeVarMatrix
SchurOffDiagonal.copyFrom(sdp.FreeVarMatrix);
- blockMatrixLowerTriangularSolve(SchurBlocksCholesky, SchurOffDiagonal);
- int updateColumns = SchurOffDiagonal.cols;
+ blockMatrixLowerTriangularSolve(SchurComplementCholesky, SchurOffDiagonal);
+
+ // Next we compute L'^{-1} U, which is stored implicitly in terms of
+ // its nonzero submatrices in the stabilizeBlock* variables.
+
+ // total number of columns in the off-diagonal part B' = (B U)
+ // (currently just B; will accumulate the rest shortly)
+ int offDiagonalColumns = SchurOffDiagonal.cols;
stabilizeBlockIndices.resize(0);
stabilizeBlockUpdateRow.resize(0);
stabilizeBlockUpdateColumn.resize(0);
- for (unsigned int j = 0; j < SchurBlocks.blocks.size(); j++) {
+ // j runs over blocks of SchurComplement
+ for (unsigned int j = 0; j < SchurComplement.blocks.size(); j++) {
if (schurStabilizeIndices[j].size() > 0) {
- int startIndex = schurStabilizeIndices[j][0];
- int blockRows = SchurBlocks.blocks[j].rows - startIndex;
- int blockCols = schurStabilizeIndices[j].size();
-
+ // the j-th block of S contains stabilized directions. We have a
+ // block submatrix
+ //
+ // U_j = stabilizeBlocks[j]
+ //
+ // of U for each such j.
stabilizeBlockIndices.push_back(j);
- stabilizeBlockUpdateRow.push_back(SchurBlocks.blockStartIndices[j] + startIndex);
- stabilizeBlockUpdateColumn.push_back(updateColumns);
- updateColumns += blockCols;
+
+ // index of the first stabilized direction within the j-th block
+ int startIndex = schurStabilizeIndices[j][0];
- stabilizeBlocks[j].setRowsCols(blockRows, blockCols);
+ // set the dimensions of U_j
+ stabilizeBlocks[j].setRowsCols(SchurComplement.blocks[j].rows - startIndex,
+ schurStabilizeIndices[j].size());
+ // set U_j = 0
stabilizeBlocks[j].setZero();
+ // for each column of U_j add Lambda_p in the row (p - startIndex)
for (unsigned int c = 0; c < schurStabilizeIndices[j].size(); c++) {
int r = schurStabilizeIndices[j][c] - startIndex;
stabilizeBlocks[j].elt(r, c) = schurStabilizeLambdas[j][c];
}
+
+ // append the row of U corresponding to the top-left of U_j
+ stabilizeBlockUpdateRow.push_back(SchurComplement.blockStartIndices[j] + startIndex);
+ // append the column of U corresponding to the top-left of U_j
+ stabilizeBlockUpdateColumn.push_back(offDiagonalColumns);
+ // update the number of off-diagonal columns
+ offDiagonalColumns += stabilizeBlocks[j].cols;
}
}
- // G := SchurBlocksCholesky^{-1} G
+ // Set U = L'^{-1} U
+ //
+ // We do this by modifying the blocks U_j = stabilizeBlocks[j]
+ // in-place, multiplying by the inverse of the appropriate submatrix
+ // of SchurComplementCholesky. We henceforth refer to L'^{-1} U as
+ // V to avoid confusion.
#pragma omp parallel for schedule(dynamic)
for (unsigned int j = 0; j < stabilizeBlockIndices.size(); j++) {
int b = stabilizeBlockIndices[j];
int startIndex = schurStabilizeIndices[b][0];
Rtrsm("Left", "Lower", "NoTranspose", "NonUnitDiagonal",
stabilizeBlocks[b].rows, stabilizeBlocks[b].cols, 1,
- &SchurBlocksCholesky.blocks[b].elt(startIndex, startIndex),
- SchurBlocksCholesky.blocks[b].rows,
+ &SchurComplementCholesky.blocks[b].elt(startIndex, startIndex),
+ SchurComplementCholesky.blocks[b].rows,
&stabilizeBlocks[b].elt(0, 0),
stabilizeBlocks[b].rows);
}
- // Q = SchurOffDiagonal^T SchurOffDiagonal - {{0,0},{0,1}}
- Q.setRowsCols(updateColumns, updateColumns);
+ // Next, we compute
+ //
+ // Q = (L'^{-1} B')^T (L'^{-1} B') - {{0, 0}, {0, 1}}
+ //
+ // Where B' = (B U). We think of Q as containing four-blocks called
+ // Upper/Lower-Left/Right.
+
+ // Set the dimensions of Q
+ Q.setRowsCols(offDiagonalColumns, offDiagonalColumns);
Q.setZero();
+ // At this point, SchurOffDiagonal = L'^{-1} B.
+ //
+ // UpperLeft(Q) = SchurOffDiagonal^T SchurOffDiagonal
matrixSquareIntoBlock(SchurOffDiagonal, Q, 0, 0);
- // LowerRight(Q) = G^T G - 1
+ // At this point, stabilizeBlocks the blocks of V = L'^{-1} U.
+ //
+ // LowerRight(Q) = V^T V - 1
for (unsigned int j = 0; j < stabilizeBlockIndices.size(); j++) {
int b = stabilizeBlockIndices[j];
int c = stabilizeBlockUpdateColumn[j];
matrixSquareIntoBlock(stabilizeBlocks[b], Q, c, c);
+ // subtract the identity matrix from this block
for (int i = c; i < c + stabilizeBlocks[b].cols; i++)
Q.elt(i, i) -= 1;
}
- // LowerLeft(Q) = G^T U
+ // LowerLeft(Q) = V^T SchurOffDiagonal
# pragma omp parallel for schedule(dynamic)
for (unsigned int j = 0; j < stabilizeBlockIndices.size(); j++) {
int b = stabilizeBlockIndices[j];
@@ -556,27 +777,36 @@ void SDPSolver::initializeSchurComplementSolver(const BlockDiagonalMatrix &Bilin
for (int r = SchurOffDiagonal.cols; r < Q.rows; r++)
Q.elt(c, r) = Q.elt(r, c);
+ // Resize Qpivots appropriately and compute the LU decomposition of Q
Qpivots.resize(Q.rows);
LUDecomposition(Q, Qpivots);
}
-// As inputs, dx and dy are the residues r_x and r_y on the right-hand
-// side of the Schur complement equation. As outputs, they are the
-// values for dx and dy.
+// Solve the Schur complement equation for dx, dy.
+//
+// - As inputs, dx and dy are the residues r_x and r_y on the
+// right-hand side of the Schur complement equation.
+// - As outputs, dx and dy are overwritten with the solutions of the
+// Schur complement equation.
+//
+// The equation is solved using the block-decomposition described in
+// the manual.
//
void SDPSolver::solveSchurComplementEquation(Vector &dx, Vector &dy) {
- // dx = SchurBlocksCholesky^{-1} dx
- blockMatrixLowerTriangularSolve(SchurBlocksCholesky, dx);
+ // dx = SchurComplementCholesky^{-1} dx
+ blockMatrixLowerTriangularSolve(SchurComplementCholesky, dx);
+ // extend dy by additional coordinates z needed for stabilizing
+ // dyExtended = (dy, z)
dyExtended.resize(Q.rows);
- // k_1 = r_y - SchurOffDiagonal^T dx
+
+ // dy = r_y - SchurOffDiagonal^T dx
for (unsigned int n = 0; n < dy.size(); n++)
dyExtended[n] = dy[n];
-
vectorScaleMatrixMultiplyTransposeAdd(-1, SchurOffDiagonal, dx, 1, dyExtended);
- // k_2 = -G^T dx
+ // z = -V^T dx
for (unsigned int j = 0; j < stabilizeBlockIndices.size(); j++) {
int b = stabilizeBlockIndices[j];
int pTopLeft = stabilizeBlockUpdateRow[j];
@@ -589,12 +819,13 @@ void SDPSolver::solveSchurComplementEquation(Vector &dx, Vector &dy) {
}
}
- // k = Q^{-1} k
+ // dyExtended = Q^{-1} dyExtended
solveWithLUDecomposition(Q, Qpivots, dyExtended);
- // dx = dx + SchurOffDiagonal k_1
+ // dx += SchurOffDiagonal dy
vectorScaleMatrixMultiplyAdd(1, SchurOffDiagonal, dyExtended, 1, dx);
- // dx += G k_2
+
+ // dx += V z
for (unsigned int j = 0; j < stabilizeBlockIndices.size(); j++) {
int b = stabilizeBlockIndices[j];
int pTopLeft = stabilizeBlockUpdateRow[j];
@@ -605,16 +836,33 @@ void SDPSolver::solveSchurComplementEquation(Vector &dx, Vector &dy) {
dx[pTopLeft + r] += dyExtended[cTopLeft + c] * stabilizeBlocks[b].elt(r, c);
}
- // dx = SchurBlocksCholesky^{-T} dx
- blockMatrixLowerTriangularTransposeSolve(SchurBlocksCholesky, dx);
- // dy = k_1
+ // dx = SchurComplementCholesky^{-T} dx
+ blockMatrixLowerTriangularTransposeSolve(SchurComplementCholesky, dx);
+
+ // dy = first few entries of dyExtended
for (unsigned int n = 0; n < dy.size(); n++)
dy[n] = dyExtended[n];
}
+// Compute the search direction (dx, dX, dy, dY) for the predictor and
+// corrector phases.
+//
+// Inputs:
+// - beta, the centering parameter
+// - mu = Tr(X Y) / X.cols
+// - correctorPhase: boolean indicating whether we're in the corrector
+// phase or predictor phase.
+// Workspace (members of SDPSolver which are modified in-place but not
+// used elsewhere):
+// - Z, R
+// Outputs (members of SDPSolver which are modified in-place):
+// - dx, dX, dy, dY
+//
void SDPSolver::computeSearchDirection(const Real &beta,
const Real &mu,
const bool correctorPhase) {
+ // R = beta mu I - X Y (predictor phase)
+ // R = beta mu I - X Y - dX dY (corrector phase)
blockDiagonalMatrixScaleMultiplyAdd(-1, X, Y, 0, R);
if (correctorPhase)
blockDiagonalMatrixScaleMultiplyAdd(-1, dX, dY, 1, R);
@@ -626,14 +874,15 @@ void SDPSolver::computeSearchDirection(const Real &beta,
blockMatrixSolveWithCholesky(XCholesky, Z);
Z.symmetrize();
- // (dx_RHS)_k = -d_k + Tr(F_k Z)
- // dz_RHS = f - D^T x
+ // r_x[p] = -dualResidues[p] - Tr(A_p Z)
+ // r_y[n] = dualObjective[n] - (FreeVarMatrix^T x)_n
+ // Here, dx = r_x, dy = r_y.
computeSchurRHS(sdp, dualResidues, Z, x, dx, dy);
- // dx_N = B_{NN}^{-1} dx_N, dx_B = E^T dx_N
+ // Solve for dx, dy in-place
solveSchurComplementEquation(dx, dy);
- // dX = R_p + sum_p F_p dx_p
+ // dX = PrimalResidues + \sum_p A_p dx[p]
constraintMatrixWeightedSum(sdp, dx, dX);
dX += PrimalResidues;
@@ -665,18 +914,21 @@ SDPSolverTerminateReason SDPSolver::run(const path checkpointFile) {
choleskyDecomposition(X, XCholesky);
choleskyDecomposition(Y, YCholesky);
- computeInvBilinearPairingsWithCholesky(XCholesky, sdp.bilinearBases,
- bilinearPairingsWorkspace,
- BilinearPairingsXInv);
- computeBilinearPairings(Y, sdp.bilinearBases,
- bilinearPairingsWorkspace,
- BilinearPairingsY);
-
- // d_k = c_k - Tr(F_k Y) - (D y)_k
+ // Compute the bilinear pairings BilinearPairingsXInv and
+ // BilinearPairingsY needed for the dualResidues and the Schur
+ // complement matrix
+ blockTensorInvTransposeCongruenceWithCholesky(XCholesky, sdp.bilinearBases,
+ bilinearPairingsWorkspace,
+ BilinearPairingsXInv);
+ blockTensorTransposeCongruence(Y, sdp.bilinearBases,
+ bilinearPairingsWorkspace,
+ BilinearPairingsY);
+
+ // dualResidues[p] = primalObjective[p] - Tr(A_p Y) - (FreeVarMatrix y)_p,
computeDualResidues(sdp, y, BilinearPairingsY, dualResidues);
dualError = maxAbsVector(dualResidues);
- // PrimalResidues = sum_p F_p x_p - X - F_0 (F_0 is zero for now)
+ // PrimalResidues = \sum_p A_p x[p] - X
computePrimalResidues(sdp, x, X, PrimalResidues);
primalError = PrimalResidues.maxAbs();
@@ -684,44 +936,44 @@ SDPSolverTerminateReason SDPSolver::run(const path checkpointFile) {
const bool isDualFeasible = dualError < parameters.dualErrorThreshold;
const bool isOptimal = dualityGap < parameters.dualityGapThreshold;
- if (isPrimalFeasible && isDualFeasible && isOptimal)
- return PrimalDualOptimal;
- else if (isPrimalFeasible && parameters.findPrimalFeasible)
- return PrimalFeasible;
- else if (isDualFeasible && parameters.findDualFeasible)
- return DualFeasible;
- else if (primalStepLength == 1 && parameters.detectPrimalFeasibleJump)
- return PrimalFeasibleJumpDetected;
- else if (dualStepLength == 1 && parameters.detectDualFeasibleJump)
- return DualFeasibleJumpDetected;
- // Detect max iterations after computing errors and objective
- // functions for the current point
- else if (iteration > parameters.maxIterations)
- return MaxIterationsExceeded;
+ if (isPrimalFeasible && isDualFeasible && isOptimal) return PrimalDualOptimal;
+ else if (isPrimalFeasible && parameters.findPrimalFeasible) return PrimalFeasible;
+ else if (isDualFeasible && parameters.findDualFeasible) return DualFeasible;
+ else if (primalStepLength == 1 && parameters.detectPrimalFeasibleJump) return PrimalFeasibleJumpDetected;
+ else if (dualStepLength == 1 && parameters.detectDualFeasibleJump) return DualFeasibleJumpDetected;
+ else if (iteration > parameters.maxIterations) return MaxIterationsExceeded;
+ // Compute SchurComplement and prepare to solve the Schur
+ // complement equation for dx, dy
initializeSchurComplementSolver(BilinearPairingsXInv, BilinearPairingsY,
parameters.choleskyStabilizeThreshold);
+ // Compute the complementarity mu = Tr(X Y)/X.dim
Real mu = frobeniusProductSymmetric(X, Y)/X.dim;
if (mu > parameters.maxComplementarity)
return MaxComplementarityExceeded;
- // Mehrotra predictor solution for (dx, dX, dY)
- Real betaPredictor = predictorCenteringParameter(parameters,
- isPrimalFeasible && isDualFeasible);
+ // Mehrotra predictor solution for (dx, dX, dy, dY)
+ Real betaPredictor =
+ predictorCenteringParameter(parameters, isPrimalFeasible && isDualFeasible);
computeSearchDirection(betaPredictor, mu, false);
- // Mehrotra corrector solution for (dx, dX, dY)
- Real betaCorrector = correctorCenteringParameter(parameters, X, dX, Y, dY, mu,
- isPrimalFeasible && isDualFeasible);
+ // Mehrotra corrector solution for (dx, dX, dy, dY)
+ Real betaCorrector =
+ correctorCenteringParameter(parameters, X, dX, Y, dY, mu,
+ isPrimalFeasible && isDualFeasible);
computeSearchDirection(betaCorrector, mu, true);
- // Step length to preserve positive definiteness
+ // Compute step-lengths that preserve positive definiteness of X, Y
primalStepLength = stepLength(XCholesky, dX, StepMatrixWorkspace,
- eigenvaluesWorkspace, QRWorkspace, parameters);
+ eigenvaluesWorkspace, QRWorkspace,
+ parameters.stepLengthReduction);
dualStepLength = stepLength(YCholesky, dY, StepMatrixWorkspace,
- eigenvaluesWorkspace, QRWorkspace, parameters);
+ eigenvaluesWorkspace, QRWorkspace,
+ parameters.stepLengthReduction);
+ // If our problem is both dual-feasible and primal-feasible,
+ // ensure we're following the true Newton direction.
if (isPrimalFeasible && isDualFeasible) {
primalStepLength = min(primalStepLength, dualStepLength);
dualStepLength = primalStepLength;
@@ -729,11 +981,13 @@ SDPSolverTerminateReason SDPSolver::run(const path checkpointFile) {
printIteration(iteration, mu, primalStepLength, dualStepLength, betaCorrector);
- // Update current point
+ // Update the primal point (x, X) += primalStepLength (dx, dX)
scaleVector(dx, primalStepLength);
addVector(x, dx);
dX *= primalStepLength;
X += dX;
+
+ // Update the dual point (y, Y) += dualStepLength (dy, dY)
scaleVector(dy, dualStepLength);
addVector(y, dy);
dY *= dualStepLength;
diff --git a/src/SDPSolver.h b/src/SDPSolver.h
index 222d911..19849b0 100644
--- a/src/SDPSolver.h
+++ b/src/SDPSolver.h
@@ -178,7 +178,7 @@ public:
// BilinearPairingsXInv.blocks[b].elt(
// (d_j+1) s + k1,
// (d_j+1) r + k2
- // ) = \chi_{b,k1}^T (X.blocks[b]^{-1})^{(s,r)} \chi_{b,k2}
+ // ) = v_{b,k1}^T (X.blocks[b]^{-1})^{(s,r)} v_{b,k2}
//
// 0 <= k1,k2 <= sdp.degrees[j] = d_j
// 0 <= s,r < sdp.dimensions[j] = m_j
@@ -198,14 +198,14 @@ public:
BlockDiagonalMatrix BilinearPairingsY;
// The Schur complement matrix S: a BlockDiagonalMatrix with one
- // block for each 0 <= j < J. SchurBlocks.blocks[j] has dimension
+ // block for each 0 <= j < J. SchurComplement.blocks[j] has dimension
// (d_j+1)*m_j*(m_j+1)/2
//
- BlockDiagonalMatrix SchurBlocks;
+ BlockDiagonalMatrix SchurComplement;
- // SchurBlocksCholesky = L', the Cholesky decomposition of the
+ // SchurComplementCholesky = L', the Cholesky decomposition of the
// stabilized Schur complement matrix S' = S + U U^T.
- BlockDiagonalMatrix SchurBlocksCholesky;
+ BlockDiagonalMatrix SchurComplementCholesky;
// SchurOffDiagonal = L'^{-1} FreeVarMatrix, needed in solving the
// Schur complement equation.
@@ -225,7 +225,7 @@ public:
// Recall: S is block diagonal, with blocks labeled by 0 <= j < J.
//
// schurStabilizeIndices[j] = a list of which directions of
- // SchurBlocks.blocks[j] have been stabilized (counting from 0 at
+ // SchurComplement.blocks[j] have been stabilized (counting from 0 at
// the upper left of each block), for 0 <= j < J.
vector<vector<int> > schurStabilizeIndices;
//
@@ -241,20 +241,20 @@ public:
//
// For each 0 <= m < M, stabilizeBlockUpdateRow records the row of
// U corresponding to the first stabilized direction in the j_m'th
- // block of SchurBlocks:
+ // block of SchurComplement:
//
// stabilizeBlockUpdateRow[m] = schurStabilizeIndices[j_m][0] +
- // SchurBlocks.blockStartIndices[j_m]
+ // SchurComplement.blockStartIndices[j_m]
//
vector<int> stabilizeBlockUpdateRow;
//
// For each 0 <= m < M, stabilizeBlockUpdateColumn records the
// column of U corresponding to the first stabilized direction in
- // the j_m'th block of SchurBlocks.
+ // the j_m'th block of SchurComplement.
vector<int> stabilizeBlockUpdateColumn;
//
// For each 0 <= m < M, stabilizeBlocks is the submatrix of U
- // corresponding to the j_m'th block of SchurBlocks.
+ // corresponding to the j_m'th block of SchurComplement.
vector<Matrix> stabilizeBlocks;
// Q = B' L'^{-T} L'^{-1} B' - {{0, 0}, {0, 1}}, where B' =
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/sdpb.git
More information about the debian-science-commits
mailing list