PMatrix contains the main data structure and the computational routine for the parallel selected inversion. More...

#include <pselinv.hpp>

Classes
struct	SuperNodeBufferType

Public Types
enum	{ SELINV_TAG_U_SIZE, SELINV_TAG_U_CONTENT, SELINV_TAG_L_SIZE, SELINV_TAG_L_CONTENT, SELINV_TAG_L_REDUCE, SELINV_TAG_D_SIZE, SELINV_TAG_D_CONTENT, SELINV_TAG_D_REDUCE, SELINV_TAG_L_SIZE_CD, SELINV_TAG_L_CONTENT_CD, SELINV_TAG_COUNT }

Public Member Functions
	PMatrix (const GridType g, const SuperNodeType s, const PEXSI::SuperLUOptions *o)

void	deallocate ()

	PMatrix (const PMatrix &C)

PMatrix &	operator= (const PMatrix &C)

void	Setup (const GridType g, const SuperNodeType s, const PEXSI::SuperLUOptions *o)

Int	NumCol () const

Int	NumSuper () const

Int	NumLocalBlockCol () const
	NumLocalBlockCol returns the total number of block columns.

Int	NumLocalBlockRow () const
	NumLocalBlockRow returns the total number of block rows.

std::vector< std::vector< Int > > &	ColBlockIdx ()

std::vector< std::vector< Int > > &	RowBlockIdx ()

std::vector< Int > &	ColBlockIdx (Int jLocal)

std::vector< Int > &	RowBlockIdx (Int iLocal)

Int	NumBlockL (Int jLocal) const
	NumBlockL returns the number of nonzero L blocks for the local block column jLocal.

Int	NumBlockU (Int iLocal) const
	NumBlockU returns the number of nonzero U blocks for the local block row iLocal.

const GridType *	Grid () const
	Grid returns the GridType structure of the current PMatrix.

const SuperNodeType *	SuperNode () const
	SuperNode returns the supernodal partition of the current PMatrix.

std::vector< LBlock< T > > &	L (Int jLocal)
	L returns the vector of nonzero L blocks for the local block column jLocal.

std::vector< UBlock< T > > &	U (Int iLocal)
	U returns the vector of nonzero U blocks for the local block row iLocal.

std::vector< std::vector< int > > &	WorkingSet ()
	WorkingSet returns the ordered list of supernodes which could be done in parallel.

Int	CountSendToRight (Int ksup)
	CountSendToRight returns the number of processors to the right of current processor with which it has to communicate.

Int	CountSendToBelow (Int ksup)
	CountSendToBelow returns the number of processors below current processor with which it has to communicate.

Int	CountRecvFromBelow (Int ksup)
	CountRecvFromBelow returns the number of processors below the current processor from which it receives data.

Int	CountSendToCrossDiagonal (Int ksup)
	CountSendToCrossDiagonal returns the number of cross diagonal processors with which current processor has to communicate.

Int	CountRecvFromCrossDiagonal (Int ksup)
	CountRecvFromCrossDiagonal returns the number of cross diagonal processors with which current processor has to communicate.

void	GetEtree (std::vector< Int > &etree_supno)
	GetEtree computes the supernodal elimination tree to be used later in the pipelined selected inversion stage.

virtual void	ConstructCommunicationPattern ()
	ConstructCommunicationPattern constructs the communication pattern to be used later in the selected inversion stage. The supernodal elimination tree is used to add an additional level of parallelism between supernodes. ConstructCommunicationPattern_P2p is called by default.

void	ConstructCommunicationPattern_P2p ()
	ConstructCommunicationPattern_P2p constructs the communication pattern to be used later in the selected inversion stage. The supernodal elimination tree is used to add an additional level of parallelism between supernodes.

virtual void	PreSelInv ()
	PreSelInv prepares the structure in L_ and U_ so that SelInv only involves matrix-matrix multiplication. More...

virtual void	SelInv ()
	SelInv is the main function for the selected inversion. More...

void	SelInv_P2p ()
	Point-to-point version of the selected inversion.

void	GetDiagonal (NumVec< T > &diag)
	GetDiagonal extracts the diagonal elements of the PMatrix. More...

void	GetColumn (Int colIdx, NumVec< T > &col)

void	PMatrixToDistSparseMatrix (DistSparseMatrix< T > &A)
	PMatrixToDistSparseMatrix converts the PMatrix into a distributed compressed sparse column matrix format. The DistSparseMatrix follows the natural order. More...

void	PMatrixToDistSparseMatrix (const DistSparseMatrix< T > &A, DistSparseMatrix< T > &B)
	PMatrixToDistSparseMatrix converts the PMatrix into a distributed compressed sparse column matrix format B, which has the same sparsity pattern as the DistSparseMatrix A. More...

void	PMatrixToDistSparseMatrix2 (const DistSparseMatrix< T > &A, DistSparseMatrix< T > &B)
	PMatrixToDistSparseMatrix2 is a more efficient version which performs the same job as PMatrixToDistSparseMatrix(A,B) especially when A contains much less nonzero elements than the current PMatrix. More...

Int	NnzLocal ()
	NnzLocal computes the number of nonzero elements (L and U) saved locally.

LongInt	Nnz ()
	Nnz computes the total number of nonzero elements in the PMatrix.

void	GetNegativeInertia (Real &inertia)
	GetNegativeInertia computes the negative inertia of a PMatrix. This can be used to estimate e.g. the number of eigenvalues of a matrix below a certain threshold.

int	IdxToTag (Int lidx, Int tag)

template<>
void	GetNegativeInertia (Real &inertia)

template<>
void	GetNegativeInertia (Real &inertia)

Static Public Member Functions
static PMatrix< T > *	Create (const GridType pGridType, const SuperNodeType pSuper, const SuperLUOptions *pLuOpt)
	Create is a factory method which returns a pointer to a new PMatrix.

static PMatrix< T > *	Create (const SuperLUOptions *pLuOpt)

Protected Member Functions
void	SelInvIntra_P2p (Int lidx)
	SelInvIntra_P2p.

void	SelInv_lookup_indexes (SuperNodeBufferType &snode, std::vector< LBlock< T > > &LcolRecv, std::vector< UBlock< T > > &UrowRecv, NumMat< T > &AinvBuf, NumMat< T > &UBuf)
	SelInv_lookup_indexes.

void	GetWorkSet (std::vector< Int > &snodeEtree, std::vector< std::vector< Int > > &WSet)
	GetWorkSet.

void	UnpackData (SuperNodeBufferType &snode, std::vector< LBlock< T > > &LcolRecv, std::vector< UBlock< T > > &UrowRecv)
	UnpackData.

void	ComputeDiagUpdate (SuperNodeBufferType &snode)
	ComputeDiagUpdate.

void	SendRecvCD_UpdateU (std::vector< SuperNodeBufferType > &arrSuperNodes, Int stepSuper)
	SendRecvCD_UpdateU.

Protected Attributes
const GridType *	grid_

const SuperNodeType *	super_

const SuperLUOptions *	options_

std::vector< std::vector< Int > >	ColBlockIdx_

std::vector< std::vector< Int > >	RowBlockIdx_

std::vector< std::vector < LBlock< T > > >	L_

std::vector< std::vector < UBlock< T > > >	U_

std::vector< std::vector< Int > >	workingSet_

BolNumMat	isSendToBelow_

BolNumMat	isSendToRight_

BolNumVec	isSendToDiagonal_

BolNumMat	isSendToCrossDiagonal_

BolNumMat	isRecvFromBelow_

BolNumVec	isRecvFromAbove_

BolNumVec	isRecvFromLeft_

BolNumMat	isRecvFromCrossDiagonal_

std::vector< TreeBcast * >	fwdToBelowTree_

std::vector< TreeBcast * >	fwdToRightTree_

std::vector< TreeReduce< T > * >	redToLeftTree_

std::vector< TreeReduce< T > * >	redToAboveTree_

Detailed Description

template<typename T>
class PEXSI::PMatrix< T >

PMatrix contains the main data structure and the computational routine for the parallel selected inversion.

NOTE The following is a bit obsolete.

Procedure for Selected Inversion

After factorizing a SuperLUMatrix luMat (See SuperLUMatrix for information on how to perform factorization), perform the following steps for parallel selected inversion.

Conversion from SuperLU_DIST.

Symbolic information

SuperNodeType super; 
PMatrix PMloc;
luMat.SymbolicToSuperNode( super );

Numerical information, both L and U.

luMat.LUstructToPMatrix( PMloc );

Preparation.

Construct the communication pattern for SelInv.

PMloc.ConstructCommunicationPattern(); 
or PMloc.ConstructCommunicationPattern_P2p(); 
or PMloc.ConstructCommunicationPattern_Collectives();

Numerical preparation so that SelInv only involves Gemm.

PMloc.PreSelInv();

Selected inversion.

PMloc.SelInv();
or PMloc.SelInv_P2p();
or PMloc.SelInv_Collectives();

Postprocessing.

Get the information in DistSparseMatrix format

DistSparseMatrix<Scalar> Ainv;
PMloc.PMatrixToDistSparseMatrix( Ainv );

Note

All major operations of PMatrix, including the selected inversion are defined directly as the member function of PMatrix.
In the current version of PMatrix, square grid is assumed. This assumption is only used when sending the information to cross-diagonal blocks, i.e. from L(isup, ksup) to U(ksup, isup). This assumption can be relaxed later.

Member Function Documentation

template<typename T>

void PEXSI::PMatrix< T >::GetDiagonal ( NumVec< T > & diag )

GetDiagonal extracts the diagonal elements of the PMatrix.

1) diag is permuted back to the natural order

2) diag is shared by all processors in grid_->comm through a Allreduce procedure.

template<typename T>

void PEXSI::PMatrix< T >::PMatrixToDistSparseMatrix ( DistSparseMatrix< T > & A )

PMatrixToDistSparseMatrix converts the PMatrix into a distributed compressed sparse column matrix format. The DistSparseMatrix follows the natural order.

Parameters

[out] A Output sparse matrix.

template<typename T>

void PEXSI::PMatrix< T >::PMatrixToDistSparseMatrix	(	const DistSparseMatrix< T > &	A,
		DistSparseMatrix< T > &	B
	)

PMatrixToDistSparseMatrix converts the PMatrix into a distributed compressed sparse column matrix format B, which has the same sparsity pattern as the DistSparseMatrix A.

The DistSparseMatrix follows the natural order.

Parameters

[in]	A	Input sparse matrix to provide the sparsity pattern.
[out]	B	Output sparse matrix.

template<typename T>

void PEXSI::PMatrix< T >::PMatrixToDistSparseMatrix2	(	const DistSparseMatrix< T > &	A,
		DistSparseMatrix< T > &	B
	)

PMatrixToDistSparseMatrix2 is a more efficient version which performs the same job as PMatrixToDistSparseMatrix(A,B) especially when A contains much less nonzero elements than the current PMatrix.

The DistSparseMatrix follows the natural order.

Parameters

[in]	A	Input sparse matrix to provide the sparsity pattern.
[out]	B	Output sparse matrix.

template<typename T >

void PEXSI::PMatrix< T >::PreSelInv ( )

virtual

PreSelInv prepares the structure in L_ and U_ so that SelInv only involves matrix-matrix multiplication.

Todo:: Move documentation to a more proper place and update the information.

Procedure

PreSelInv performs

Compute the inverse of the diagonal blocks

L_{kk} <- (L_{kk} U_{kk})^{-1}
Update the lower triangular L blocks

L_{ik} <- L_{ik} L_{kk}^{-1}
Update the upper triangular U blocks which saves redundant information as in L

U_{kj} <- L_{ik}

Note

PreSelInv assumes that PEXSI::PMatrix::ConstructCommunicationPattern has been executed.

template<typename T >

void PEXSI::PMatrix< T >::SelInv ( )

virtual

SelInv is the main function for the selected inversion.

Todo:: Move documentation to a more proper place and update the information.

Procedure

PSelInv is a right-looking based parallel selected inversion subroutine for sparse symmetric matrices. Static pivoting is used in this version.

Although the matrix is symmetric, the key idea of the current implementation of PSelInv is that the upper-triangular matrix is saved (in the form of UBlock). Such redundant information is effective for reducing the complexity for designing the communication pattern.

At each supernode ksup, the lower triangular part Ainv(isup, ksup) (isup > ksup) are first updated. The blocks in the processor row of ksup first sends the nonzero blocks in U(ksup, jsup) (which is L(isup, ksup)^T) to the Schur complements Ainv(isup, jsup). At the same time the blocks in the processor column of ksup sends the nonzero blocks (only nonzero row indices) to the Schur complement Ainv(isup, jsup). Then

sum_{jsup} Ainv(isup, jsup) U^{T}(ksup, jsup)

is performed. In this procedure, only processors with isRecvFromAbove[ksup] == true && isRecvFromLeft[ksup] == true participate in the computation.

The result is reduced to the processor column ksup within the same processor row. The diagonal block Ainv(ksup, ksup) is simply updated by a reduce procedure within the column processor group of ksup.

Then we update the Ainv(ksup, isup) blocks, simply via the update from the cross diagonal processors.

NOTE : The cross diagonal processor is only well here defined for square grids. For a P x P square grid, (ip, jp) is the cross diagonal processor of (jp, ip) if ip != jp. The current version of SelInv only works for square processor grids.

Communication pattern

The communication is controlled by 3 sending varaibles and 3 receiving variables. The first dimension of all the sending and receiving variables are numSuper. The information contains redundancy since not all processors have access to all the supernodes. However, this increases the readability of the output significantly and only increases a small amount of memory cost for indexing. This set of sending / receiving mechanism avoids the double indexing of the supernodes and can scale to matrices of large size.

isSendToBelow:

Dimension: numSuper x numProcRow

Role : At supernode ksup, if isSendToBelow(ksup, ip) == true, send all local blocks {U(ksup, jsup) | jsup > ksup} to the processor row ip.
isRecvFromAbove:

Dimension: numSuper

Role :
- At supernode ksup, if isRecvFromAbove(ksup) == true, receive blocks from the processor owning the block row of ksup within the same column processor group.
- If isRecvFromAbove(ksup) == true && isRecvFromLeft(ksup) == true, the ucrrent processor participate in updating Ainv(isup, ksup).
isSendToRight:

Dimension: numSuper x numProcCol

Role : At supernode ksup, if isSendToRight(ksup, jp) == true, send all local blocks (mainly the nonzero row indicies, without the values to save the communication cost) {L(isup, ksup) | isup > ksup} to the processor column jp.
isRecvFromLeft:

Dimension: numSuper

Role :
- At supernode ksup, if isRecvFromLeft(ksup) == true, receive blocks from the processor owning the block column of ksup within the same row processor group.
- If isRecvFromAbove(ksup) == true && isRecvFromLeft(ksup) == true, the ucrrent processor participate in updating Ainv(isup, ksup).
isSendToCrossDiagonal:

Dimension: numSuper

Role : At supernode ksup, if isSendToCrossDiagonal(ksup) == true, send all local blocks {(isup, ksup) | isup > ksup} to the cross-diagonal processor. NOTE : This requires a square processor grid.
isRecvCrossDiagonal:

Dimension: numSuper

Role : At supernode ksup, if isRecvFromCrossDiagonal(ksup) == true, receive from the cross-diagonal processor. NOTE : This requires a square processor grid.

The documentation for this class was generated from the following files:

include/pexsi/ngchol_interf.hpp
include/pexsi/pselinv.hpp
include/pexsi/pselinv_impl.hpp

Classes

Public Types

Public Member Functions

Static Public Member Functions

Protected Member Functions

Protected Attributes

Detailed Description

template<typename T> class PEXSI::PMatrix< T >

Procedure for Selected Inversion

Note

Member Function Documentation

Procedure

Note

Procedure

Communication pattern

template<typename T>
class PEXSI::PMatrix< T >