PEXSI
 All Classes Namespaces Files Functions Variables Typedefs Pages
ngchol_interf_impl.hpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2012 The Regents of the University of California,
3  through Lawrence Berkeley National Laboratory.
4 
5  Author: Mathias Jacquelin and Lin Lin
6 
7  This file is part of PEXSI. All rights reserved.
8 
9  Redistribution and use in source and binary forms, with or without
10  modification, are permitted provided that the following conditions are met:
11 
12  (1) Redistributions of source code must retain the above copyright notice, this
13  list of conditions and the following disclaimer.
14  (2) Redistributions in binary form must reproduce the above copyright notice,
15  this list of conditions and the following disclaimer in the documentation
16  and/or other materials provided with the distribution.
17  (3) Neither the name of the University of California, Lawrence Berkeley
18  National Laboratory, U.S. Dept. of Energy nor the names of its contributors may
19  be used to endorse or promote products derived from this software without
20  specific prior written permission.
21 
22  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
23  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
26  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
29  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33  You are under no obligation whatsoever to provide any bug fixes, patches, or
34  upgrades to the features, functionality or performance of the source code
35  ("Enhancements") to anyone; however, if you choose to make your Enhancements
36  available either publicly, or directly to Lawrence Berkeley National
37  Laboratory, without imposing a separate written license agreement for such
38  Enhancements, then you hereby grant the following license: a non-exclusive,
39  royalty-free perpetual license to install, use, modify, prepare derivative
40  works, incorporate into other computer software, distribute, and sublicense
41  such enhancements or derivative works thereof, in binary and source code form.
42  */
46 #ifndef _PEXSI_NGCHOL_INTERF_IMPL_HPP_
47 #define _PEXSI_NGCHOL_INTERF_IMPL_HPP_
48 
49 // Interface with NGCHOL
50 #include "ngchol.hpp"
51 #include "ngchol/sp_blas.hpp"
52 
53 // Interface with PSelInv
54 #include "pexsi/pselinv.hpp"
55 
56 // Interface with sparse matrix (CSC format)
57 #include "pexsi/sparse_matrix.hpp"
58 #include "pexsi/environment.hpp"
59 #include "pexsi/sparse_matrix.hpp"
60 #include "pexsi/NumMat.hpp"
61 #include "pexsi/NumVec.hpp"
62 
63 // Interface with LAPACK
64 #include "pexsi/lapack.hpp"
65 
66 
67 #include <list>
68 
69 namespace PEXSI{
70 
71 template<typename T> void NGCHOLMatrixToSuperNode(
72  LIBCHOLESKY::SupernodalMatrix<T>& SMat,
73  SuperNodeType& super ){
74 #ifndef _RELEASE_
75  PushCallStack("NGCHOLMatrixToSuperNode");
76 #endif
77  Int n = SMat.Size();
78 
79  // perm
80  const LIBCHOLESKY::IntNumVec& SPerm = SMat.GetOrdering().perm;
81  super.perm.Resize( SPerm.m() );
82  for( Int i = 0; i < SPerm.m(); i++ ){
83  super.perm[i] = SPerm[i];
84  }
85 
86  // permInv
87  super.permInv.Resize( n );
88  for( Int i = 0; i < n; i++ ){
89  super.permInv[i] = i;
90  }
91  std::sort( super.permInv.Data(), super.permInv.Data() + n,
92  IndexComp<IntNumVec&>(super.perm) );
93 
94  LIBCHOLESKY::IntNumVec& XSuper = SMat.GetSupernodalPartition();
95  Int numSuper = XSuper.m() - 1;
96 
97  // superPtr
98  IntNumVec& superPtr = super.superPtr;
99  superPtr.Resize( numSuper + 1 );
100  for( Int i = 0; i < numSuper + 1; i++ ){
101  superPtr[i] = XSuper[i] - 1;
102  }
103 
104  // superIdx
105  IntNumVec& superIdx = super.superIdx;
106  superIdx.Resize( n );
107  const LIBCHOLESKY::IntNumVec& superMember = SMat.GetSupMembership();
108  for( Int i = 0; i < n; i++ ){
109  superIdx(i) = superMember[i] - 1;
110  }
111 
112  // etree
113  LIBCHOLESKY::ETree& etree = SMat.GetETree();
114  super.etree.Resize(n);
115  for( Int i = 0; i < n; i++ ){
116  super.etree[i] = etree.PostParent(i);
117  }
118 
119 #ifndef _RELEASE_
120  PopCallStack();
121 #endif
122 } // ----- end of NGCHOLMatrixToSuperNode -----
123 
124 
125 
126 template<typename T> void NGCHOLMatrixToPMatrix(
127  LIBCHOLESKY::SupernodalMatrix<T>& SMat,
128  PMatrix<T>& PMat ){
129 #ifndef _RELEASE_
130  PushCallStack("NGCHOLMatrixToPMatrix");
131 #endif
132  // This routine assumes that the g, supernode and options of PMatrix
133  // has been set outside this routine.
134 
135  Int mpirank, mpisize;
136  const GridType *g = PMat.Grid();
137 
138  // FIXME Check PMatrix and SupernodalMatrix has the same communicator
139  MPI_Comm comm = g->comm;
140  MPI_Comm colComm = g->colComm;
141  MPI_Comm rowComm = g->rowComm;
142  MPI_Comm_size(comm, &mpisize);
143  MPI_Comm_rank(comm, &mpirank);
144 
145  Int nprow = g->numProcRow, npcol = g->numProcCol;
146 
147  Int mpirow = mpirank / npcol;
148  Int mpicol = mpirank % npcol;
149 
150  Int n = SMat.Size();
151  PMat.ColBlockIdx().clear();
152  PMat.RowBlockIdx().clear();
153  PMat.ColBlockIdx().resize( PMat.NumLocalBlockCol() );
154  PMat.RowBlockIdx().resize( PMat.NumLocalBlockRow() );
155 
156  const IntNumVec& superIdx = PMat.SuperNode()->superIdx;
157 #if ( _DEBUGlevel_ >= 1 )
158  statusOFS << "superIdx = " << superIdx << std::endl;
159 #endif
160 
161 
162  // for loop over all supernodes
163  //
164  // if the current processor owns the supernode (the ownership is block
165  // cyclic)
166  // serialize the information
167  // broadcast the information to all processors
168  // elseif
169  // receive the information from the processor owning the
170  // supernode, and save the information in a deserialized buffer
171  // endif
172  //
173  // (Local operation from now)
174  // if the current processor owns the correct processor column
175  // for loop over the local blocks
176  // for loop over each row
177  // if the current row number belong to the current processor
178  // add the row index to a vector for LBuf
179  // endif
180  // endfor
181  // endfor
182  //
183  // Allocate the sign of LBuf for saving the nonzero values
184  // Convert the format of rowind
185  //
186  // for loop over the local blocks
187  // for loop over each row
188  // if the current row number belong to the current processor
189  // Append the nonzero values to nzval
190  // endif
191  // endfor
192  // endfor
193  // endif
194  //
195  // discard the temporary information in the buffer for all
196  // processors
197  //
198  // endfor
199  //
200  // Perform SendToCrossDiagonal to fill the U part.
201  //
202  // Remark:
203  // 1. The communiation is performed in a supernode-by-supernode
204  // way. This may not be very fast. A first improvement is to
205  // broadcast all local supernodes at once and then do local
206  // post-processing.
207 
208  Int numSuper = PMat.NumSuper();
209  LIBCHOLESKY::Icomm snodeIcomm;
210  std::vector<char> buffer;
211  for( Int iSuper = 0; iSuper < numSuper; iSuper++ ){
212  LIBCHOLESKY::SuperNode<T> snode;
213 
214  if( mpirank == ( iSuper % mpisize ) ){
215  // Get the local supernode
216  Int iSuperLocal = iSuper / mpisize;
217  LIBCHOLESKY::SuperNode<T>& snodetmp =
218  SMat.GetLocalSupernode(iSuperLocal);
219 #if ( _DEBUGlevel_ >= 1 )
220  statusOFS << "iSuper = " << iSuper << ", iSuperLocal = " <<
221  iSuperLocal << ", id = " << snodetmp.Id() << ", size = " <<
222  snodetmp.Size() << ", #Block = " << snodetmp.NZBlockCnt() <<
223  std::endl;
224  statusOFS << "snode (before bcast) = " << snodetmp << std::endl;
225 #endif
226  // Serialize the information in the current supernode
227 // std::stringstream sstm;
228 // serialize( snodetmp.Id(), sstm, NO_MASK );
229 // serialize( snodetmp.Size(), sstm, NO_MASK );
230 // serialize( snodetmp.NZBlockCnt(), sstm, NO_MASK );
231 // for( Int blkidx = 0; blkidx < snodetmp.NZBlockCnt(); blkidx++){
232 // LIBCHOLESKY::NZBlockDesc & nzblk_desc =
233 // snodetmp.GetNZBlockDesc( blkidx );
234 // } // for (blkidx)
235 
236  LIBCHOLESKY::Serialize( snodeIcomm, snodetmp );
237  Int msgSize = snodeIcomm.size();
238 #if ( _DEBUGlevel_ >= 1 )
239  statusOFS << "msgSize = " << msgSize << std::endl;
240 #endif
241 
242  // Communicate the supernode
243  MPI_Bcast( &msgSize, 1, MPI_INT, mpirank, comm );
244  MPI_Bcast( snodeIcomm.front(), msgSize, MPI_CHAR, mpirank, comm );
245  // Copy the data from buffer to snode
246  LIBCHOLESKY::Deserialize( snodeIcomm.front(), snode );
247  } // if owning the supernode
248  else{
249  // Receive the supernode
250  Int rootRank = ( iSuper % mpisize );
251  Int msgSize;
252  MPI_Bcast( &msgSize, 1, MPI_INT, rootRank, comm );
253  buffer.resize(msgSize);
254  MPI_Bcast( &buffer[0], msgSize, MPI_CHAR, rootRank, comm );
255  LIBCHOLESKY::Deserialize( &buffer[0], snode );
256  } // if not owning the supernode but is in the receiving column
257 
258 #if ( _DEBUGlevel_ >= 1 )
259  statusOFS << "All communication is finished." << std::endl;
260 #endif
261 
262  // Local operation from now
263  if( mpicol == ( iSuper % npcol ) ){
264  Int jb = iSuper / npcol;
265  std::vector<LBlock<T> >& Lcol = PMat.L(jb);
266  std::set<Int> blkSet;
267  Int superSize = snode.Size();
268 
269  // Count the number of blocks in the supernode belonging to this
270  // processor
271  for( Int blkidx = 0; blkidx < snode.NZBlockCnt(); blkidx++ ){
272  LIBCHOLESKY::NZBlockDesc desc = snode.GetNZBlockDesc( blkidx );
273  Int nrows = snode.NRows(blkidx);
274  Int firstRow = desc.GIndex - 1;
275  Int lastRow = firstRow + nrows - 1;
276 #if ( _DEBUGlevel_ >= 1 )
277  statusOFS << "firstRow = " << firstRow << ", lastRow = " << lastRow << std::endl;
278 #endif
279  for( Int i = superIdx(firstRow); i <= superIdx(lastRow); i++ ){
280  if( mpirow == ( i % nprow ) ){
281  blkSet.insert( i );
282  } // if the current processor is in the right processor row
283  }
284  } // for ( blkidx )
285 
286  Int numBlkLocal = blkSet.size();
287  std::vector<Int> blkVec;
288  blkVec.insert( blkVec.end(), blkSet.begin(), blkSet.end() );
289  Lcol.resize( blkVec.size() );
290 
291  // Allocate the nonzero rows and nzvals
292 #if ( _DEBUGlevel_ >= 1 )
293  statusOFS << "Lcol.size = " << Lcol.size() << std::endl;
294  statusOFS << "blkSet.size = " << blkSet.size() << std::endl;
295  statusOFS << "blkVec = " << blkVec << std::endl;
296 #endif
297 
298  std::vector<std::vector<Int> > rowsBlk( Lcol.size() );
299  std::vector<std::vector<T> > nzvalBlk( Lcol.size() );
300 
301  for( Int blkidx = 0; blkidx < snode.NZBlockCnt(); blkidx++ ){
302  LIBCHOLESKY::NZBlockDesc desc = snode.GetNZBlockDesc( blkidx );
303  Int nrows = snode.NRows(blkidx);
304  Int firstRow = desc.GIndex - 1;
305  Int lastRow = firstRow + nrows - 1;
306  T* nzval = snode.GetNZval( desc.Offset );
307  std::vector<Int>::iterator vi;
308  Int pos;
309  for( Int i = firstRow; i <= lastRow; i++ ){
310  vi = find( blkVec.begin(), blkVec.end(), superIdx(i) );
311  if( vi != blkVec.end() ){
312  pos = vi - blkVec.begin();
313  rowsBlk[pos].push_back(i);
314  nzvalBlk[pos].insert(
315  nzvalBlk[pos].end(),
316  nzval, nzval + superSize );
317  }
318  nzval += superSize;
319  }
320  } // for ( blkidx )
321 
322  // Save the information to Lcol
323  for ( Int iblk = 0; iblk < Lcol.size(); iblk++ ){
324  std::vector<Int>& rows = rowsBlk[iblk];
325  std::vector<T>& nzval = nzvalBlk[iblk];
326  LBlock<T>& LB = Lcol[iblk];
327  LB.blockIdx = blkVec[iblk];
328  LB.numRow = rows.size();
329  LB.numCol = superSize;
330  LB.rows = IntNumVec( LB.numRow, true, &rows[0] );
331  if( LB.numRow * LB.numCol != nzval.size() ){
332  std::ostringstream msg;
333  msg << "message size does not match for the blockIdx " << LB.blockIdx << std::endl
334  << "LB.numRow * LB.numCol = " << LB.numRow * LB.numCol << std::endl
335  << "nzval.size = " << nzval.size() << std::endl;
336  throw std::runtime_error( msg.str().c_str() );
337  }
338  // Convert the row major format to column major format
339  Transpose( NumMat<T>( LB.numCol, LB.numRow, true,
340  &nzval[0] ), LB.nzval );
341  }
342  } // if the current processor is in the right processor column
343 
344  // Set the MPI Barrier
345  MPI_Barrier( comm );
346  }
347 
348 
349  PMatrixLtoU( PMat );
350 
351 #ifndef _RELEASE_
352  PopCallStack();
353 #endif
354 } // ----- end of NGCHOLMatrixToPMatrix -----
355 
356 template<typename T> void PMatrixLtoU( PMatrix<T>& PMat )
357 {
358 
359 #ifndef _RELEASE_
360  PushCallStack("PMatrixLtoU");
361 #endif
362  //Send L to U
363  Int mpirank, mpisize;
364  const GridType *g = PMat.Grid();
365 
366  // FIXME Check PMatrix and SupernodalMatrix has the same communicator
367  MPI_Comm comm = g->comm;
368  MPI_Comm colComm = g->colComm;
369  MPI_Comm rowComm = g->rowComm;
370  MPI_Comm_size(comm, &mpisize);
371  MPI_Comm_rank(comm, &mpirank);
372 
373  Int nprow = g->numProcRow, npcol = g->numProcCol;
374 
375 
376  Int mpirow = mpirank / npcol;
377  Int mpicol = mpirank % npcol;
378 
379  Int numSuper = PMat.NumSuper();
380  for( Int ksup = 0; ksup < numSuper; ksup++ ){
381 #if ( _DEBUGlevel_ >= 1 )
382 statusOFS<<"----------------------- "<< ksup<<std::endl;
383 #endif
384  //If I'm in the supernodal column
385  std::vector<Int> all_proc_list;
386  std::vector<Int> all_blocks_cnt;
387  std::vector<Int> sizes;
388  std::vector<Int> displs;
389 
390  std::vector<Int> sender_proc;
391  std::vector<std::list<LBlock<T> * > > blocks_to_receiver;
392 
393  std::vector<Int> receiver_proc;
394  if( mpicol == ( ksup % npcol ) ){
395  Int jb = ksup / npcol;
396  std::vector<LBlock<T> >& Lcol = PMat.L(jb);
397  Int root = (ksup % nprow);
398  //compute the list of receiving processors based on my local structure
399  std::set<Int> proc_set;
400  blocks_to_receiver.resize(npcol);
401  Int startBlk = mpirow==root?1:0;
402  for ( Int iblk = startBlk; iblk < Lcol.size(); iblk++ ){
403  LBlock<T>& LB = Lcol[iblk];
404  //column of the target processor
405  Int snode_idx = LB.blockIdx;
406  Int tgt_pcol = snode_idx % npcol;
407  blocks_to_receiver[tgt_pcol].push_back(&LB);
408  proc_set.insert(tgt_pcol);
409  }
410  //Insert the set into the vector to be able to send it
411  receiver_proc.insert(receiver_proc.begin(),proc_set.begin(),proc_set.end());
412 
413  //Now do a gatherv on the root
414  mpi::Gatherv(receiver_proc,all_proc_list,sizes,displs,root, colComm);
415 
416  //Do a gatherv of the local blocks to each processors
417  std::vector<Int> blocks_cnt(receiver_proc.size());
418  for(Int j = 0; j< receiver_proc.size();++j){
419  Int pcol = receiver_proc[j];
420  std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
421  blocks_cnt[j] = blocks.size();
422  }
423  mpi::Gatherv(blocks_cnt,all_blocks_cnt,root, colComm);
424 
425 
426  //On the root, convert from a sender array to a receiver array
427  if(mpirow == root){
428  //sender_proc[i] contains the set of sender to processor column i
429  std::vector<std::set<Int> > sender_procs(npcol);
430  std::vector<Int> urow_sizes(npcol,0);
431  for(Int prow = 0; prow < nprow; ++prow){
432  Int * recv_list = &all_proc_list[displs[prow]];
433  Int * recv_blocks = &all_blocks_cnt[displs[prow]];
434  Int size = sizes[prow];
435  for(Int i = 0; i<size;++i){
436  Int pcol = recv_list[i];
437  sender_procs[pcol].insert(prow);
438  Int ucol_contrib = recv_blocks[i];
439  urow_sizes[pcol]+=ucol_contrib;
440  }
441  }
442  //now prepare the data structures for a scatterv along the rows
443  all_blocks_cnt = urow_sizes;
444  all_proc_list.clear();
445  sizes.resize(npcol);
446  displs.resize(npcol);
447  Int totalsize = 0;
448  for(Int pcol = 0; pcol < npcol; ++pcol){
449  sizes[pcol] = sender_procs[pcol].size()*sizeof(Int);
450  displs[pcol] = totalsize;
451  totalsize += sizes[pcol];
452  }
453  //put the senders in the all_proc_list_array
454  all_proc_list.reserve(totalsize / sizeof(Int) );
455  for(Int pcol = 0; pcol < npcol; ++pcol){
456  all_proc_list.insert(all_proc_list.end(),sender_procs[pcol].begin(),sender_procs[pcol].end());
457  }
458  }
459  }
460 
461  //If I'm in the supernodal row
462  if( mpirow == ( ksup % nprow ) ){
463  Int root = (ksup % npcol);
464  //scatter the sizes
465  Int localSize = 0;
466  MPI_Scatter(mpicol==root?&sizes[0]:NULL,sizeof(Int),MPI_BYTE,
467  &localSize,sizeof(Int),MPI_BYTE, root, rowComm);
468  sender_proc.resize(localSize / sizeof(Int));
469  //Now do the scatterv;
470  if(mpicol==root){
471  MPI_Scatterv(&all_proc_list[0],&sizes[0],&displs[0],MPI_BYTE,
472  &sender_proc[0],localSize,MPI_BYTE, root, rowComm);
473  }
474  else{
475  MPI_Scatterv(NULL,NULL,NULL,MPI_BYTE,
476  &sender_proc[0],localSize,MPI_BYTE, root, rowComm);
477  }
478 
479  Int urowSize = 0;
480  MPI_Scatter(mpicol==root?&all_blocks_cnt[0]:NULL,sizeof(Int),MPI_BYTE,
481  &urowSize,sizeof(Int),MPI_BYTE, root, rowComm);
482  //Resize Urow
483  Int ib = ksup / nprow;
484  std::vector<UBlock<T> >& Urow = PMat.U(ib);
485  Urow.resize(urowSize);
486 
487 
488  //At this point we have both a sender AND a receiver list
489  //and Urows are resized
490  }
491 
492  //Communicate this supernode
493  //If I'm a sender
494  if( mpicol == ( ksup % npcol ) ){
495  std::vector<Int> mask( LBlockMask::TOTAL_NUMBER, 1 );
496  //for each target col
497  for(Int pcol = 0; pcol < npcol; pcol++){
498  Int pnum = (ksup % nprow)*npcol + pcol;
499  if(pnum!=mpirank){
500  //Serialize everything in the blocks_to_receiver list
501  std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
502  if(blocks.size()>0){
503  std::stringstream sstm;
504  Int numLBlocks = blocks.size();
505 #if ( _DEBUGlevel_ >= 1 )
506 statusOFS<<"Sending "<<numLBlocks<<" LBlocks"<<std::endl;
507 #endif
508  serialize( numLBlocks , sstm, NO_MASK);
509  typename std::list<LBlock<T> *>::iterator it;
510  for(it = blocks.begin();
511  it!=blocks.end();++it){
512  LBlock<T> & LB = *(*it);
513 #if ( _DEBUGlevel_ >= 1 )
514 statusOFS<<"Sent LB: "<<LB<<std::endl;
515 #endif
516  serialize(LB, sstm,mask);
517  }
518  mpi::Send(sstm, pnum, PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_SIZE),
519  PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_CONTENT), comm);
520  }
521  }
522  }
523  }
524  //If I'm a receiver
525  if( mpirow == ( ksup % nprow ) ){
526  std::vector<Int> mask( LBlockMask::TOTAL_NUMBER, 1 );
527  Int ib = ksup / nprow;
528  std::vector<UBlock<T> >& Urow = PMat.U(ib);
529 
530  //for each target row
531  Int idx = 0;
532  for(Int i = 0; i < sender_proc.size(); ++i){
533 
534  Int prow = sender_proc[i];
535  Int pnum = (prow)*npcol + (ksup % npcol);
536  if(pnum != mpirank){
537  std::stringstream sstm;
538  mpi::Recv(sstm,pnum,PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_SIZE),
539  PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_CONTENT), comm);
540 
541  //now deserialize and put everything in U
542  Int numLBlocks = 0;
543  deserialize(numLBlocks, sstm, NO_MASK);
544 #if ( _DEBUGlevel_ >= 1 )
545 statusOFS<<"Receiving "<<numLBlocks<<" LBlocks"<<std::endl;
546 #endif
547  for(Int i = 0; i<numLBlocks;++i){
548  LBlock<T> LB;
549  deserialize(LB, sstm, mask);
550 
551 #if ( _DEBUGlevel_ >= 1 )
552 statusOFS<<"Received LB: "<<LB<<std::endl;
553 #endif
554  //put this LBlock in the appropriate UBlock
555  UBlock<T> & UB = Urow[idx];
556 
557  UB.blockIdx = LB.blockIdx;
558  UB.numCol = LB.numRow;
559  UB.numRow = LB.numCol;
560  UB.cols = LB.rows;
561  Transpose(LB.nzval,UB.nzval);
562  ++idx;
563  }
564  }
565  else{
566  Int pcol = mpicol ;
567  //do the copy locally
568  Int ib = ksup / nprow;
569  std::vector<UBlock<T> >& Urow = PMat.U(ib);
570  //Serialize everything in the blocks_to_receiver list
571  std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
572  if(blocks.size()>0){
573  typename std::list<LBlock<T> *>::iterator it;
574  for(it = blocks.begin();
575  it!=blocks.end();++it){
576  LBlock<T> & LB = *(*it);
577  UBlock<T> & UB = Urow[idx];
578 
579  UB.blockIdx = LB.blockIdx;
580  UB.numCol = LB.numRow;
581  UB.numRow = LB.numCol;
582  UB.cols = LB.rows;
583  Transpose(LB.nzval,UB.nzval);
584  ++idx;
585  }
586  }
587  }
588 
589 
590  }
591  }
592  }
593 #ifndef _RELEASE_
594  PopCallStack();
595 #endif
596 
597 } // ----- end of PMatrixLToU -----
598 
599 
600 }
601 
602 
603 #endif //_PEXSI_NGCHOL_INTERF_IMPL_HPP_
604 
Environmental variables.
std::vector< LBlock< T > > & L(Int jLocal)
L returns the vector of nonzero L blocks for the local block column jLocal.
Definition: pselinv.hpp:713
SuperNodeType describes mapping between supernode and column, the permutation information, and potentially the elimination tree (not implemented here).
Definition: pselinv.hpp:166
Thin interface to LAPACK.
Int numRow
Number of nonzero rows.
Definition: pselinv.hpp:188
Int NumLocalBlockCol() const
NumLocalBlockCol returns the total number of block columns.
Definition: pselinv.hpp:684
Int numCol
Number of nonzero columns.
Definition: pselinv.hpp:191
Main file for parallel selected inversion.
const GridType * Grid() const
Grid returns the GridType structure of the current PMatrix.
Definition: pselinv.hpp:705
LBlock stores a nonzero block in the lower triangular part or the diagonal part in PSelInv...
Definition: pselinv.hpp:182
Sparse matrix and Distributed sparse matrix in compressed column format.
GridType is the PSelInv way of defining the grid.
Definition: pselinv.hpp:129
Numerical matrix.
Int NumLocalBlockRow() const
NumLocalBlockRow returns the total number of block rows.
Definition: pselinv.hpp:687
const SuperNodeType * SuperNode() const
SuperNode returns the supernodal partition of the current PMatrix.
Definition: pselinv.hpp:709
IntNumVec rows
Dimension numRow * 1, index (0-based) for the number of nonzero rows.
Definition: pselinv.hpp:194
Int blockIdx
Block index (supernodal index)
Definition: pselinv.hpp:185
PMatrix contains the main data structure and the computational routine for the parallel selected inve...
Definition: ngchol_interf.hpp:57
void NGCHOLMatrixToPMatrix(LIBCHOLESKY::SupernodalMatrix< T > &SMat, PMatrix< T > &PMat)
Converts a matrix of NGCHOL type to PMatrix.
Definition: ngchol_interf_impl.hpp:126
void NGCHOLMatrixToSuperNode(LIBCHOLESKY::SupernodalMatrix< T > &SMat, SuperNodeType &super)
Converts the NGCHOL supernodal structure to PMatrix SuperNodeType structure.
Definition: ngchol_interf_impl.hpp:71
Numerical vector.
NumMat< T > nzval
Dimension numRow * numCol, nonzero elements.
Definition: pselinv.hpp:198
Definition: utility.hpp:1481