46 #ifndef _PEXSI_NGCHOL_INTERF_IMPL_HPP_
47 #define _PEXSI_NGCHOL_INTERF_IMPL_HPP_
51 #include "ngchol/sp_blas.hpp"
72 LIBCHOLESKY::SupernodalMatrix<T>& SMat,
75 PushCallStack(
"NGCHOLMatrixToSuperNode");
80 const LIBCHOLESKY::IntNumVec& SPerm = SMat.GetOrdering().perm;
81 super.perm.Resize( SPerm.m() );
82 for( Int i = 0; i < SPerm.m(); i++ ){
83 super.perm[i] = SPerm[i];
87 super.permInv.Resize( n );
88 for( Int i = 0; i < n; i++ ){
91 std::sort( super.permInv.Data(), super.permInv.Data() + n,
94 LIBCHOLESKY::IntNumVec& XSuper = SMat.GetSupernodalPartition();
95 Int numSuper = XSuper.m() - 1;
99 superPtr.Resize( numSuper + 1 );
100 for( Int i = 0; i < numSuper + 1; i++ ){
101 superPtr[i] = XSuper[i] - 1;
106 superIdx.Resize( n );
107 const LIBCHOLESKY::IntNumVec& superMember = SMat.GetSupMembership();
108 for( Int i = 0; i < n; i++ ){
109 superIdx(i) = superMember[i] - 1;
113 LIBCHOLESKY::ETree& etree = SMat.GetETree();
114 super.etree.Resize(n);
115 for( Int i = 0; i < n; i++ ){
116 super.etree[i] = etree.PostParent(i);
127 LIBCHOLESKY::SupernodalMatrix<T>& SMat,
130 PushCallStack(
"NGCHOLMatrixToPMatrix");
135 Int mpirank, mpisize;
139 MPI_Comm comm = g->comm;
140 MPI_Comm colComm = g->colComm;
141 MPI_Comm rowComm = g->rowComm;
142 MPI_Comm_size(comm, &mpisize);
143 MPI_Comm_rank(comm, &mpirank);
145 Int nprow = g->numProcRow, npcol = g->numProcCol;
147 Int mpirow = mpirank / npcol;
148 Int mpicol = mpirank % npcol;
151 PMat.ColBlockIdx().clear();
152 PMat.RowBlockIdx().clear();
157 #if ( _DEBUGlevel_ >= 1 )
158 statusOFS <<
"superIdx = " << superIdx << std::endl;
208 Int numSuper = PMat.NumSuper();
209 LIBCHOLESKY::Icomm snodeIcomm;
210 std::vector<char> buffer;
211 for( Int iSuper = 0; iSuper < numSuper; iSuper++ ){
212 LIBCHOLESKY::SuperNode<T> snode;
214 if( mpirank == ( iSuper % mpisize ) ){
216 Int iSuperLocal = iSuper / mpisize;
217 LIBCHOLESKY::SuperNode<T>& snodetmp =
218 SMat.GetLocalSupernode(iSuperLocal);
219 #if ( _DEBUGlevel_ >= 1 )
220 statusOFS <<
"iSuper = " << iSuper <<
", iSuperLocal = " <<
221 iSuperLocal <<
", id = " << snodetmp.Id() <<
", size = " <<
222 snodetmp.Size() <<
", #Block = " << snodetmp.NZBlockCnt() <<
224 statusOFS <<
"snode (before bcast) = " << snodetmp << std::endl;
236 LIBCHOLESKY::Serialize( snodeIcomm, snodetmp );
237 Int msgSize = snodeIcomm.size();
238 #if ( _DEBUGlevel_ >= 1 )
239 statusOFS <<
"msgSize = " << msgSize << std::endl;
243 MPI_Bcast( &msgSize, 1, MPI_INT, mpirank, comm );
244 MPI_Bcast( snodeIcomm.front(), msgSize, MPI_CHAR, mpirank, comm );
246 LIBCHOLESKY::Deserialize( snodeIcomm.front(), snode );
250 Int rootRank = ( iSuper % mpisize );
252 MPI_Bcast( &msgSize, 1, MPI_INT, rootRank, comm );
253 buffer.resize(msgSize);
254 MPI_Bcast( &buffer[0], msgSize, MPI_CHAR, rootRank, comm );
255 LIBCHOLESKY::Deserialize( &buffer[0], snode );
258 #if ( _DEBUGlevel_ >= 1 )
259 statusOFS <<
"All communication is finished." << std::endl;
263 if( mpicol == ( iSuper % npcol ) ){
264 Int jb = iSuper / npcol;
265 std::vector<LBlock<T> >& Lcol = PMat.
L(jb);
266 std::set<Int> blkSet;
267 Int superSize = snode.Size();
271 for( Int blkidx = 0; blkidx < snode.NZBlockCnt(); blkidx++ ){
272 LIBCHOLESKY::NZBlockDesc desc = snode.GetNZBlockDesc( blkidx );
273 Int nrows = snode.NRows(blkidx);
274 Int firstRow = desc.GIndex - 1;
275 Int lastRow = firstRow + nrows - 1;
276 #if ( _DEBUGlevel_ >= 1 )
277 statusOFS <<
"firstRow = " << firstRow <<
", lastRow = " << lastRow << std::endl;
279 for( Int i = superIdx(firstRow); i <= superIdx(lastRow); i++ ){
280 if( mpirow == ( i % nprow ) ){
286 Int numBlkLocal = blkSet.size();
287 std::vector<Int> blkVec;
288 blkVec.insert( blkVec.end(), blkSet.begin(), blkSet.end() );
289 Lcol.resize( blkVec.size() );
292 #if ( _DEBUGlevel_ >= 1 )
293 statusOFS <<
"Lcol.size = " << Lcol.size() << std::endl;
294 statusOFS <<
"blkSet.size = " << blkSet.size() << std::endl;
295 statusOFS <<
"blkVec = " << blkVec << std::endl;
298 std::vector<std::vector<Int> > rowsBlk( Lcol.size() );
299 std::vector<std::vector<T> > nzvalBlk( Lcol.size() );
301 for( Int blkidx = 0; blkidx < snode.NZBlockCnt(); blkidx++ ){
302 LIBCHOLESKY::NZBlockDesc desc = snode.GetNZBlockDesc( blkidx );
303 Int nrows = snode.NRows(blkidx);
304 Int firstRow = desc.GIndex - 1;
305 Int lastRow = firstRow + nrows - 1;
306 T* nzval = snode.GetNZval( desc.Offset );
307 std::vector<Int>::iterator vi;
309 for( Int i = firstRow; i <= lastRow; i++ ){
310 vi = find( blkVec.begin(), blkVec.end(), superIdx(i) );
311 if( vi != blkVec.end() ){
312 pos = vi - blkVec.begin();
313 rowsBlk[pos].push_back(i);
314 nzvalBlk[pos].insert(
316 nzval, nzval + superSize );
323 for ( Int iblk = 0; iblk < Lcol.size(); iblk++ ){
324 std::vector<Int>& rows = rowsBlk[iblk];
325 std::vector<T>& nzval = nzvalBlk[iblk];
332 std::ostringstream msg;
333 msg <<
"message size does not match for the blockIdx " << LB.
blockIdx << std::endl
334 <<
"LB.numRow * LB.numCol = " << LB.
numRow * LB.
numCol << std::endl
335 <<
"nzval.size = " << nzval.size() << std::endl;
336 throw std::runtime_error( msg.str().c_str() );
340 &nzval[0] ), LB.
nzval );
356 template<
typename T>
void PMatrixLtoU( PMatrix<T>& PMat )
360 PushCallStack(
"PMatrixLtoU");
363 Int mpirank, mpisize;
364 const GridType *g = PMat.Grid();
367 MPI_Comm comm = g->comm;
368 MPI_Comm colComm = g->colComm;
369 MPI_Comm rowComm = g->rowComm;
370 MPI_Comm_size(comm, &mpisize);
371 MPI_Comm_rank(comm, &mpirank);
373 Int nprow = g->numProcRow, npcol = g->numProcCol;
376 Int mpirow = mpirank / npcol;
377 Int mpicol = mpirank % npcol;
379 Int numSuper = PMat.NumSuper();
380 for( Int ksup = 0; ksup < numSuper; ksup++ ){
381 #if ( _DEBUGlevel_ >= 1 )
382 statusOFS<<
"----------------------- "<< ksup<<std::endl;
385 std::vector<Int> all_proc_list;
386 std::vector<Int> all_blocks_cnt;
387 std::vector<Int> sizes;
388 std::vector<Int> displs;
390 std::vector<Int> sender_proc;
391 std::vector<std::list<LBlock<T> * > > blocks_to_receiver;
393 std::vector<Int> receiver_proc;
394 if( mpicol == ( ksup % npcol ) ){
395 Int jb = ksup / npcol;
396 std::vector<LBlock<T> >& Lcol = PMat.L(jb);
397 Int root = (ksup % nprow);
399 std::set<Int> proc_set;
400 blocks_to_receiver.resize(npcol);
401 Int startBlk = mpirow==root?1:0;
402 for ( Int iblk = startBlk; iblk < Lcol.size(); iblk++ ){
403 LBlock<T>& LB = Lcol[iblk];
405 Int snode_idx = LB.blockIdx;
406 Int tgt_pcol = snode_idx % npcol;
407 blocks_to_receiver[tgt_pcol].push_back(&LB);
408 proc_set.insert(tgt_pcol);
411 receiver_proc.insert(receiver_proc.begin(),proc_set.begin(),proc_set.end());
414 mpi::Gatherv(receiver_proc,all_proc_list,sizes,displs,root, colComm);
417 std::vector<Int> blocks_cnt(receiver_proc.size());
418 for(Int j = 0; j< receiver_proc.size();++j){
419 Int pcol = receiver_proc[j];
420 std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
421 blocks_cnt[j] = blocks.size();
423 mpi::Gatherv(blocks_cnt,all_blocks_cnt,root, colComm);
429 std::vector<std::set<Int> > sender_procs(npcol);
430 std::vector<Int> urow_sizes(npcol,0);
431 for(Int prow = 0; prow < nprow; ++prow){
432 Int * recv_list = &all_proc_list[displs[prow]];
433 Int * recv_blocks = &all_blocks_cnt[displs[prow]];
434 Int size = sizes[prow];
435 for(Int i = 0; i<size;++i){
436 Int pcol = recv_list[i];
437 sender_procs[pcol].insert(prow);
438 Int ucol_contrib = recv_blocks[i];
439 urow_sizes[pcol]+=ucol_contrib;
443 all_blocks_cnt = urow_sizes;
444 all_proc_list.clear();
446 displs.resize(npcol);
448 for(Int pcol = 0; pcol < npcol; ++pcol){
449 sizes[pcol] = sender_procs[pcol].size()*
sizeof(Int);
450 displs[pcol] = totalsize;
451 totalsize += sizes[pcol];
454 all_proc_list.reserve(totalsize /
sizeof(Int) );
455 for(Int pcol = 0; pcol < npcol; ++pcol){
456 all_proc_list.insert(all_proc_list.end(),sender_procs[pcol].begin(),sender_procs[pcol].end());
462 if( mpirow == ( ksup % nprow ) ){
463 Int root = (ksup % npcol);
466 MPI_Scatter(mpicol==root?&sizes[0]:NULL,
sizeof(Int),MPI_BYTE,
467 &localSize,
sizeof(Int),MPI_BYTE, root, rowComm);
468 sender_proc.resize(localSize /
sizeof(Int));
471 MPI_Scatterv(&all_proc_list[0],&sizes[0],&displs[0],MPI_BYTE,
472 &sender_proc[0],localSize,MPI_BYTE, root, rowComm);
475 MPI_Scatterv(NULL,NULL,NULL,MPI_BYTE,
476 &sender_proc[0],localSize,MPI_BYTE, root, rowComm);
480 MPI_Scatter(mpicol==root?&all_blocks_cnt[0]:NULL,
sizeof(Int),MPI_BYTE,
481 &urowSize,
sizeof(Int),MPI_BYTE, root, rowComm);
483 Int ib = ksup / nprow;
484 std::vector<UBlock<T> >& Urow = PMat.U(ib);
485 Urow.resize(urowSize);
494 if( mpicol == ( ksup % npcol ) ){
495 std::vector<Int> mask( LBlockMask::TOTAL_NUMBER, 1 );
497 for(Int pcol = 0; pcol < npcol; pcol++){
498 Int pnum = (ksup % nprow)*npcol + pcol;
501 std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
503 std::stringstream sstm;
504 Int numLBlocks = blocks.size();
505 #if ( _DEBUGlevel_ >= 1 )
506 statusOFS<<
"Sending "<<numLBlocks<<
" LBlocks"<<std::endl;
508 serialize( numLBlocks , sstm, NO_MASK);
509 typename std::list<LBlock<T> *>::iterator it;
510 for(it = blocks.begin();
511 it!=blocks.end();++it){
512 LBlock<T> & LB = *(*it);
513 #if ( _DEBUGlevel_ >= 1 )
514 statusOFS<<
"Sent LB: "<<LB<<std::endl;
516 serialize(LB, sstm,mask);
518 mpi::Send(sstm, pnum, PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_SIZE),
519 PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_CONTENT), comm);
525 if( mpirow == ( ksup % nprow ) ){
526 std::vector<Int> mask( LBlockMask::TOTAL_NUMBER, 1 );
527 Int ib = ksup / nprow;
528 std::vector<UBlock<T> >& Urow = PMat.U(ib);
532 for(Int i = 0; i < sender_proc.size(); ++i){
534 Int prow = sender_proc[i];
535 Int pnum = (prow)*npcol + (ksup % npcol);
537 std::stringstream sstm;
538 mpi::Recv(sstm,pnum,PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_SIZE),
539 PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_CONTENT), comm);
543 deserialize(numLBlocks, sstm, NO_MASK);
544 #if ( _DEBUGlevel_ >= 1 )
545 statusOFS<<
"Receiving "<<numLBlocks<<
" LBlocks"<<std::endl;
547 for(Int i = 0; i<numLBlocks;++i){
549 deserialize(LB, sstm, mask);
551 #if ( _DEBUGlevel_ >= 1 )
552 statusOFS<<
"Received LB: "<<LB<<std::endl;
555 UBlock<T> & UB = Urow[idx];
557 UB.blockIdx = LB.blockIdx;
558 UB.numCol = LB.numRow;
559 UB.numRow = LB.numCol;
561 Transpose(LB.nzval,UB.nzval);
568 Int ib = ksup / nprow;
569 std::vector<UBlock<T> >& Urow = PMat.U(ib);
571 std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
573 typename std::list<LBlock<T> *>::iterator it;
574 for(it = blocks.begin();
575 it!=blocks.end();++it){
576 LBlock<T> & LB = *(*it);
577 UBlock<T> & UB = Urow[idx];
579 UB.blockIdx = LB.blockIdx;
580 UB.numCol = LB.numRow;
581 UB.numRow = LB.numCol;
583 Transpose(LB.nzval,UB.nzval);
603 #endif //_PEXSI_NGCHOL_INTERF_IMPL_HPP_
std::vector< LBlock< T > > & L(Int jLocal)
L returns the vector of nonzero L blocks for the local block column jLocal.
Definition: pselinv.hpp:704
SuperNodeType describes mapping between supernode and column, the permutation information, and potentially the elimination tree (not implemented here).
Definition: pselinv.hpp:165
Thin interface to LAPACK.
Int numRow
Number of nonzero rows.
Definition: pselinv.hpp:187
Int NumLocalBlockCol() const
NumLocalBlockCol returns the total number of block columns.
Definition: pselinv.hpp:675
Int numCol
Number of nonzero columns.
Definition: pselinv.hpp:190
Main file for parallel selected inversion.
const GridType * Grid() const
Grid returns the GridType structure of the current PMatrix.
Definition: pselinv.hpp:696
LBlock stores a nonzero block in the lower triangular part or the diagonal part in PSelInv...
Definition: pselinv.hpp:181
Sparse matrix and Distributed sparse matrix in compressed column format.
GridType is the PSelInv way of defining the grid.
Definition: pselinv.hpp:128
Int NumLocalBlockRow() const
NumLocalBlockRow returns the total number of block rows.
Definition: pselinv.hpp:678
const SuperNodeType * SuperNode() const
SuperNode returns the supernodal partition of the current PMatrix.
Definition: pselinv.hpp:700
IntNumVec rows
Dimension numRow * 1, index (0-based) for the number of nonzero rows.
Definition: pselinv.hpp:193
Int blockIdx
Block index (supernodal index)
Definition: pselinv.hpp:184
PMatrix contains the main data structure and the computational routine for the parallel selected inve...
Definition: ngchol_interf.hpp:57
void NGCHOLMatrixToPMatrix(LIBCHOLESKY::SupernodalMatrix< T > &SMat, PMatrix< T > &PMat)
Converts a matrix of NGCHOL type to PMatrix.
Definition: ngchol_interf_impl.hpp:126
void NGCHOLMatrixToSuperNode(LIBCHOLESKY::SupernodalMatrix< T > &SMat, SuperNodeType &super)
Converts the NGCHOL supernodal structure to PMatrix SuperNodeType structure.
Definition: ngchol_interf_impl.hpp:71
NumMat< T > nzval
Dimension numRow * numCol, nonzero elements.
Definition: pselinv.hpp:197
Definition: utility.hpp:1481