46 #ifndef _PEXSI_symPACK_INTERF_IMPL_HPP_
47 #define _PEXSI_symPACK_INTERF_IMPL_HPP_
50 #include "sympack.hpp"
72 symPACK::symPACKMatrix<T>& SMat,
76 symPACK::Ordering & Order = (symPACK::Ordering &)SMat.GetOrdering();
77 const std::vector<Int>& SInvp = Order.invp;
78 super.permInv.Resize( SInvp.size() );
79 const std::vector<Int>& SPerm = Order.perm;
80 super.perm.Resize( SPerm.size() );
83 for( Int i = 0; i < SPerm.size(); i++ ){
84 super.permInv[i] = SPerm[i]-1;
88 for( Int i = 0; i < SInvp.size(); i++ ){
89 super.perm[i] = SInvp[i]-1;
92 super.perm_r.Resize( n);
93 for( Int i = 0; i < n; i++ ){
96 super.permInv_r = super.perm_r;
99 std::vector<Int>& XSuper = SMat.GetSupernodalPartition();
100 Int numSuper = XSuper.size() - 1;
104 superPtr.Resize( numSuper + 1 );
105 for( Int i = 0; i < numSuper + 1; i++ ){
106 superPtr[i] = XSuper[i] - 1;
111 superIdx.Resize( n );
112 const std::vector<Int>& superMember = SMat.GetSupMembership();
113 for( Int i = 0; i < n; i++ ){
114 superIdx(i) = superMember[i] - 1;
118 const symPACK::ETree& etree = SMat.GetETree();
119 super.etree.Resize(n);
120 for( Int i = 0; i < etree.Size(); i++ ){
121 super.etree[i] = etree.PostParent(i);
122 if(super.etree[i]==0){
133 symPACK::symPACKMatrix<T>& SMat,
135 TIMER_START(symPACKtoPMatrix);
139 Int mpirank, mpisize;
143 MPI_Comm comm = g->comm;
144 MPI_Comm colComm = g->colComm;
145 MPI_Comm rowComm = g->rowComm;
146 MPI_Comm_size(comm, &mpisize);
147 MPI_Comm_rank(comm, &mpirank);
149 Int nprow = g->numProcRow, npcol = g->numProcCol;
151 Int mpirow = mpirank / npcol;
152 Int mpicol = mpirank % npcol;
155 PMat.ColBlockIdx().clear();
156 PMat.RowBlockIdx().clear();
161 #if ( _DEBUGlevel_ >= 1 )
162 statusOFS <<
"superIdx = " << superIdx << std::endl;
211 symPACK::Mapping * mapping = (symPACK::Mapping*)SMat.GetMapping();
213 Int numSuper = PMat.NumSuper();
217 symPACK::Icomm snodeIcomm;
218 std::vector<char> buffer;
219 for( Int iSuper = 0; iSuper < numSuper; iSuper++ ){
220 symPACK::SuperNode<T> * snode;
222 Int iOwner = mapping->Map(iSuper,iSuper);
224 if( mpirank == iOwner ){
226 Int iSuperLocal = SMat.snodeLocalIndex(iSuper+1);
228 symPACK::SuperNode<T>* snodetmp = SMat.snodeLocal(iSuper+1);
229 #if ( _DEBUGlevel_ >= 1 )
230 statusOFS <<
"iSuper = " << iSuper <<
", iSuperLocal = " <<
231 iSuperLocal <<
", id = " << snodetmp->Id() <<
", size = " <<
232 snodetmp->Size() <<
", #Block = " << snodetmp->NZBlockCnt() <<
237 snodetmp->Serialize( snodeIcomm);
238 Int msgSize = snodeIcomm.size();
239 #if ( _DEBUGlevel_ >= 1 )
240 statusOFS <<
"msgSize = " << msgSize << std::endl;
244 MPI_Bcast( &msgSize, 1, MPI_INT, mpirank, comm );
245 MPI_Bcast( snodeIcomm.front(), msgSize, MPI_CHAR, mpirank, comm );
249 snode = SMat.CreateSuperNode(SMat.GetOptions().decomposition,snodeIcomm.front(),msgSize,-1);
250 snode->InitIdxToBlk();
254 Int rootRank = iOwner;
256 MPI_Bcast( &msgSize, 1, MPI_INT, rootRank, comm );
257 buffer.resize(msgSize);
258 MPI_Bcast( &buffer[0], msgSize, MPI_CHAR, rootRank, comm );
261 snode = SMat.CreateSuperNode(SMat.GetOptions().decomposition,&buffer[0],msgSize,-1);
262 snode->InitIdxToBlk();
265 #if ( _DEBUGlevel_ >= 1 )
266 statusOFS <<
"All communication is finished." << std::endl;
270 if( mpicol == ( iSuper % npcol ) ){
271 Int jb = iSuper / npcol;
272 std::vector<LBlock<T> >& Lcol = PMat.
L(jb);
273 std::set<Int> blkSet;
274 Int superSize = snode->Size();
278 for( Int blkidx = 0; blkidx < snode->NZBlockCnt(); blkidx++ ){
279 symPACK::NZBlockDesc desc = snode->GetNZBlockDesc( blkidx );
280 Int nrows = snode->NRows(blkidx);
281 Int firstRow = desc.GIndex - 1;
282 Int lastRow = firstRow + nrows - 1;
283 #if ( _DEBUGlevel_ >= 1 )
284 statusOFS <<
"firstRow = " << firstRow <<
", lastRow = " << lastRow << std::endl;
286 for( Int i = superIdx(firstRow); i <= superIdx(lastRow); i++ ){
287 if( mpirow == ( i % nprow ) ){
293 Int numBlkLocal = blkSet.size();
294 std::vector<Int> blkVec;
295 blkVec.insert( blkVec.end(), blkSet.begin(), blkSet.end() );
296 Lcol.resize( blkVec.size() );
299 #if ( _DEBUGlevel_ >= 1 )
300 statusOFS <<
"Lcol.size = " << Lcol.size() << std::endl;
301 statusOFS <<
"blkSet.size = " << blkSet.size() << std::endl;
302 statusOFS <<
"blkVec = " << blkVec << std::endl;
305 std::vector<std::vector<Int> > rowsBlk( Lcol.size() );
306 std::vector<std::vector<T> > nzvalBlk( Lcol.size() );
308 for( Int blkidx = 0; blkidx < snode->NZBlockCnt(); blkidx++ ){
309 symPACK::NZBlockDesc desc = snode->GetNZBlockDesc( blkidx );
310 Int nrows = snode->NRows(blkidx);
311 Int firstRow = desc.GIndex - 1;
312 Int lastRow = firstRow + nrows - 1;
313 T* nzval = snode->GetNZval( desc.Offset );
314 std::vector<Int>::iterator vi;
316 for( Int i = firstRow; i <= lastRow; i++ ){
317 vi = find( blkVec.begin(), blkVec.end(), superIdx(i) );
318 if( vi != blkVec.end() ){
319 pos = vi - blkVec.begin();
320 rowsBlk[pos].push_back(i);
321 nzvalBlk[pos].insert(
323 nzval, nzval + superSize );
330 for ( Int iblk = 0; iblk < Lcol.size(); iblk++ ){
331 std::vector<Int>& rows = rowsBlk[iblk];
332 std::vector<T>& nzval = nzvalBlk[iblk];
339 std::ostringstream msg;
340 msg <<
"message size does not match for the blockIdx " << LB.
blockIdx << std::endl
341 <<
"LB.numRow * LB.numCol = " << LB.
numRow * LB.
numCol << std::endl
342 <<
"nzval.size = " << nzval.size() << std::endl;
343 ErrorHandling( msg.str().c_str() );
347 &nzval[0] ), LB.
nzval );
356 std::vector<symPACK::SuperNode<T> * > & localSnodes = SMat.GetLocalSupernodes();
357 std::vector<int> sizes(mpisize,0);
360 for(Int supidx = 0; supidx < localSnodes.size(); supidx++){
361 symPACK::SuperNode<T> * curSnode = localSnodes[supidx];
362 Int ksup = curSnode->Id();
363 Int superSize = curSnode->Size();
365 std::vector<bool> isSent(mpisize,
false);
368 Int destCol = ksup % npcol;
369 for( Int blkidx = 0; blkidx < snode->NZBlockCnt(); blkidx++ ){
370 symPACK::NZBlockDesc desc = snode->GetNZBlockDesc( blkidx );
371 Int nrows = snode->NRows(blkidx);
372 Int firstRow = desc.GIndex - 1;
373 Int lastRow = firstRow + nrows - 1;
375 for( Int i = firstRow; i <= lastRow; i++ ){
376 Int isup = superIdx[i];
377 Int destRow = isup % nprow;
378 Int pnum = destRow*npcol + destCol;
382 sizes[pNum]+=
sizeof(Int)+
sizeof(Int);
387 sizes[pNum]+=
sizeof(Int)+superSize*
sizeof(T);
393 std::vector<int> displs(mpisize+1,0);
395 std::partial_sum(sizes.begin(),sizes.end(),displs.begin()+1);
397 int total_send_size = displs.back();
398 symPACK::Icomm * IsendPtr =
new symPACK::Icomm(total_send_size,MPI_REQUEST_NULL);
408 Int bnum =
GBj( jb, g );
409 if( bnum >= numSuper )
continue;
410 std::vector<LBlock<T> >& Lcol = PMat.
L(jb);
411 for( Int iblk = 0; iblk < Lcol.size(); iblk++ ){
413 PMat.ColBlockIdx(jb).push_back(LB.
blockIdx);
415 PMat.RowBlockIdx( LBi ).push_back( bnum );
420 Int bnum =
GBi( ib, g );
421 if( bnum >= numSuper )
continue;
422 std::vector<UBlock<T> >& Urow = PMat.
U(ib);
423 for(Int jblk = 0; jblk < Urow.size(); jblk++ ){
425 PMat.RowBlockIdx(ib).push_back(UB.
blockIdx);
427 PMat.ColBlockIdx( LBj ).push_back( bnum );
432 std::sort(PMat.RowBlockIdx(ib).begin(),PMat.RowBlockIdx(ib).end(),std::less<Int>());
435 std::sort(PMat.ColBlockIdx(jb).begin(),PMat.ColBlockIdx(jb).end(),std::less<Int>());
439 for( Int ksup = 0; ksup < numSuper; ksup++ ){
440 Int pcol = ( ksup % npcol );
441 Int prow = ( ksup % nprow );
442 if( mpirow == prow ){
445 if( mpicol == pcol ){
446 Int jb = ksup / npcol;
447 std::vector<LBlock<T> >& Lcol = PMat.
L(jb);
449 for(Int row = 0; row<LB.
nzval.m(); row++){
450 for(Int col = row+1; col<LB.
nzval.n(); col++){
459 TIMER_STOP(symPACKtoPMatrix);
462 template<
typename T>
void PMatrixLtoU(
PMatrix<T>& PMat )
464 TIMER_START(PMatrixLtoU);
467 Int mpirank, mpisize;
471 MPI_Comm comm = g->comm;
472 MPI_Comm colComm = g->colComm;
473 MPI_Comm rowComm = g->rowComm;
474 MPI_Comm_size(comm, &mpisize);
475 MPI_Comm_rank(comm, &mpirank);
477 Int nprow = g->numProcRow, npcol = g->numProcCol;
478 Int mpirow = mpirank / npcol;
479 Int mpicol = mpirank % npcol;
481 Int numSuper = PMat.NumSuper();
482 for( Int ksup = 0; ksup < numSuper; ksup++ ){
483 #if ( _DEBUGlevel_ >= 1 )
484 statusOFS<<
"----------------------- "<< ksup<<std::endl;
487 std::vector<Int> all_proc_list;
488 std::vector<Int> all_blocks_cnt;
489 std::vector<Int> sizes;
490 std::vector<Int> displs;
492 std::vector<Int> sender_proc;
493 std::vector<std::list<LBlock<T> * > > blocks_to_receiver;
495 std::vector<Int> receiver_proc;
497 if( mpicol == ( ksup % npcol ) ){
498 Int jb = ksup / npcol;
499 std::vector<LBlock<T> >& Lcol = PMat.
L(jb);
500 Int root = (ksup % nprow);
502 std::set<Int> proc_set;
503 blocks_to_receiver.resize(npcol);
504 Int startBlk = mpirow==root?1:0;
505 for ( Int iblk = startBlk; iblk < Lcol.size(); iblk++ ){
509 Int tgt_pcol = snode_idx % npcol;
510 blocks_to_receiver[tgt_pcol].push_back(&LB);
511 proc_set.insert(tgt_pcol);
514 receiver_proc.insert(receiver_proc.begin(),proc_set.begin(),proc_set.end());
518 mpi::Gatherv(receiver_proc,all_proc_list,sizes,displs,root, colComm);
521 std::vector<Int> blocks_cnt(receiver_proc.size());
522 for(Int j = 0; j< receiver_proc.size();++j){
523 Int pcol = receiver_proc[j];
524 std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
525 blocks_cnt[j] = blocks.size();
527 mpi::Gatherv(blocks_cnt,all_blocks_cnt,root, colComm);
533 std::vector<std::set<Int> > sender_procs(npcol);
534 std::vector<Int> urow_sizes(npcol,0);
535 for(Int prow = 0; prow < nprow; ++prow){
536 Int * recv_list = &all_proc_list[displs[prow]];
537 Int * recv_blocks = &all_blocks_cnt[displs[prow]];
538 Int size = sizes[prow];
539 for(Int i = 0; i<size;++i){
540 Int pcol = recv_list[i];
541 sender_procs[pcol].insert(prow);
542 Int ucol_contrib = recv_blocks[i];
543 urow_sizes[pcol]+=ucol_contrib;
547 all_blocks_cnt = urow_sizes;
548 all_proc_list.clear();
550 displs.resize(npcol);
552 for(Int pcol = 0; pcol < npcol; ++pcol){
553 sizes[pcol] = sender_procs[pcol].size()*
sizeof(Int);
554 displs[pcol] = totalsize;
555 totalsize += sizes[pcol];
558 all_proc_list.reserve(totalsize /
sizeof(Int) );
559 for(Int pcol = 0; pcol < npcol; ++pcol){
560 all_proc_list.insert(all_proc_list.end(),sender_procs[pcol].begin(),sender_procs[pcol].end());
567 if( mpirow == ( ksup % nprow ) ){
568 Int root = (ksup % npcol);
571 MPI_Scatter(mpicol==root?&sizes[0]:NULL,
sizeof(Int),MPI_BYTE,
572 &localSize,
sizeof(Int),MPI_BYTE, root, rowComm);
573 sender_proc.resize(localSize /
sizeof(Int));
575 MPI_Scatterv(mpicol==root?&all_proc_list[0]:NULL,
576 mpicol==root?&sizes[0]:NULL,
577 mpicol==root?&displs[0]:NULL,MPI_BYTE,
578 &sender_proc[0],localSize,MPI_BYTE, root, rowComm);
581 MPI_Scatter(mpicol==root?&all_blocks_cnt[0]:NULL,
sizeof(Int),MPI_BYTE,
582 &urowSize,
sizeof(Int),MPI_BYTE, root, rowComm);
584 Int ib = ksup / nprow;
585 std::vector<UBlock<T> >& Urow = PMat.
U(ib);
586 Urow.resize(urowSize);
596 if( mpicol == ( ksup % npcol ) ){
597 std::vector<Int> mask( LBlockMask::TOTAL_NUMBER, 1 );
599 for(Int tpcol = 0; tpcol < npcol; tpcol++){
601 Int pcol = (mpicol+1+tpcol) % npcol;
602 Int pnum = (ksup % nprow)*npcol + pcol;
605 std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
607 std::stringstream sstm;
608 Int numLBlocks = blocks.size();
609 #if ( _DEBUGlevel_ >= 1 )
610 statusOFS<<
"Sending "<<numLBlocks<<
" LBlocks"<<std::endl;
612 serialize( numLBlocks , sstm, NO_MASK);
613 typename std::list<LBlock<T> *>::iterator it;
614 for(it = blocks.begin();
615 it!=blocks.end();++it){
617 #if ( _DEBUGlevel_ >= 1 )
618 statusOFS<<
"Sent LB: "<<LB<<std::endl;
620 serialize(LB, sstm,mask);
632 if( mpirow == ( ksup % nprow ) ){
633 std::vector<Int> mask( LBlockMask::TOTAL_NUMBER, 1 );
634 Int ib = ksup / nprow;
635 std::vector<UBlock<T> >& Urow = PMat.
U(ib);
639 for(Int i = 0; i < sender_proc.size(); ++i){
640 Int prow = sender_proc[i];
641 Int pnum = (prow)*npcol + (ksup % npcol);
643 std::stringstream sstm;
649 deserialize(numLBlocks, sstm, NO_MASK);
650 #if ( _DEBUGlevel_ >= 1 )
651 statusOFS<<
"Receiving "<<numLBlocks<<
" LBlocks"<<std::endl;
653 for(Int i = 0; i<numLBlocks;++i){
655 deserialize(LB, sstm, mask);
657 #if ( _DEBUGlevel_ >= 1 )
658 statusOFS<<
"Received LB: "<<LB<<std::endl;
674 Int ib = ksup / nprow;
675 std::vector<UBlock<T> >& Urow = PMat.
U(ib);
677 std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
679 typename std::list<LBlock<T> *>::iterator it;
680 for(it = blocks.begin();
681 it!=blocks.end();++it){
700 TIMER_STOP(PMatrixLtoU);
707 #endif //_PEXSI_symPACK_INTERF_IMPL_HPP_
Int blockIdx
Block index (supernodal index)
Definition: pselinv.hpp:248
std::vector< LBlock< T > > & L(Int jLocal)
L returns the vector of nonzero L blocks for the local block column jLocal.
Definition: pselinv.hpp:724
SuperNodeType describes mapping between supernode and column, the permutation information, and potentially the elimination tree (not implemented here).
Definition: pselinv.hpp:177
Thin interface to LAPACK.
Int numRow
Number of nonzero rows.
Definition: pselinv.hpp:199
Int NumLocalBlockCol() const
NumLocalBlockCol returns the total number of block columns.
Definition: pselinv.hpp:695
Int LBi(Int bnum, const GridType *g)
LBi returns the local block number on the processor at processor row PROW( bnum, g )...
Definition: pselinv.hpp:319
Int SuperSize(Int bnum, const SuperNodeType *s)
SuperSize returns the size of the block bnum.
Definition: pselinv.hpp:360
Int LBj(Int bnum, const GridType *g)
LBj returns the local block number on the processor at processor column PCOL( bnum, g ).
Definition: pselinv.hpp:324
Int GBj(Int jLocal, const GridType *g)
GBj returns the global block number from a local block number in the column direction.
Definition: pselinv.hpp:334
Int numCol
Number of nonzero columns.
Definition: pselinv.hpp:202
Main file for parallel selected inversion.
void symPACKMatrixToPMatrix(symPACK::symPACKMatrix< T > &SMat, PMatrix< T > &PMat)
Converts a matrix of symPACK type to PMatrix.
Definition: sympack_interf_impl.hpp:132
const GridType * Grid() const
Grid returns the GridType structure of the current PMatrix.
Definition: pselinv.hpp:716
LBlock stores a nonzero block in the lower triangular part or the diagonal part in PSelInv...
Definition: pselinv.hpp:193
Sparse matrix and Distributed sparse matrix in compressed column format.
Int GBi(Int iLocal, const GridType *g)
GBi returns the global block number from a local block number in the row direction.
Definition: pselinv.hpp:329
GridType is the PSelInv way of defining the grid.
Definition: pselinv.hpp:140
Int NumLocalBlockRow() const
NumLocalBlockRow returns the total number of block rows.
Definition: pselinv.hpp:698
const SuperNodeType * SuperNode() const
SuperNode returns the supernodal partition of the current PMatrix.
Definition: pselinv.hpp:720
IntNumVec rows
Dimension numRow * 1, index (0-based) for the number of nonzero rows.
Definition: pselinv.hpp:205
Int numCol
Number of nonzero columns.
Definition: pselinv.hpp:254
UBlock stores a nonzero block in the upper triangular part in PSelInv.
Definition: pselinv.hpp:245
std::vector< UBlock< T > > & U(Int iLocal)
U returns the vector of nonzero U blocks for the local block row iLocal.
Definition: pselinv.hpp:728
Int blockIdx
Block index (supernodal index)
Definition: pselinv.hpp:196
PMatrix contains the main data structure and the computational routine for the parallel selected inve...
Definition: pselinv.hpp:542
Int numRow
Number of nonzero rows.
Definition: pselinv.hpp:251
NumMat< T > nzval
Dimension numRow * numCol, nonzero elements.
Definition: pselinv.hpp:260
void symPACKMatrixToSuperNode(symPACK::symPACKMatrix< T > &SMat, SuperNodeType &super)
Converts the symPACK supernodal structure to PMatrix SuperNodeType structure.
Definition: sympack_interf_impl.hpp:71
NumMat< T > nzval
Dimension numRow * numCol, nonzero elements.
Definition: pselinv.hpp:209
IntNumVec cols
Dimension numCol * 1, index (0-based) for the number of nonzero rows.
Definition: pselinv.hpp:257