PEXSI
 All Classes Namespaces Files Functions Variables Typedefs Pages
sympack_interf_impl.hpp
1 /*
2  Copyright (c) 2012 The Regents of the University of California,
3  through Lawrence Berkeley National Laboratory.
4 
5 Author: Mathias Jacquelin and Lin Lin
6 
7 This file is part of PEXSI. All rights reserved.
8 
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are met:
11 
12 (1) Redistributions of source code must retain the above copyright notice, this
13 list of conditions and the following disclaimer.
14 (2) Redistributions in binary form must reproduce the above copyright notice,
15 this list of conditions and the following disclaimer in the documentation
16 and/or other materials provided with the distribution.
17 (3) Neither the name of the University of California, Lawrence Berkeley
18 National Laboratory, U.S. Dept. of Energy nor the names of its contributors may
19 be used to endorse or promote products derived from this software without
20 specific prior written permission.
21 
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
23 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
26 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
29 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 You are under no obligation whatsoever to provide any bug fixes, patches, or
34 upgrades to the features, functionality or performance of the source code
35 ("Enhancements") to anyone; however, if you choose to make your Enhancements
36 available either publicly, or directly to Lawrence Berkeley National
37 Laboratory, without imposing a separate written license agreement for such
38 Enhancements, then you hereby grant the following license: a non-exclusive,
39 royalty-free perpetual license to install, use, modify, prepare derivative
40 works, incorporate into other computer software, distribute, and sublicense
41 such enhancements or derivative works thereof, in binary and source code form.
42  */
46 #ifndef _PEXSI_symPACK_INTERF_IMPL_HPP_
47 #define _PEXSI_symPACK_INTERF_IMPL_HPP_
48 
49 // Interface with symPACK
50 #include "sympack.hpp"
51 //#include "sympack/sp_blas.hpp"
52 
53 // Interface with PSelInv
54 #include "pexsi/pselinv.hpp"
55 
56 // Interface with sparse matrix (CSC format)
57 #include "pexsi/sparse_matrix.hpp"
58 #include "pexsi/environment.hpp"
59 #include "pexsi/sparse_matrix.hpp"
60 #include "pexsi/NumMat.hpp"
61 #include "pexsi/NumVec.hpp"
62 
63 // Interface with LAPACK
64 #include "pexsi/lapack.hpp"
65 
66 
67 #include <list>
68 
69 namespace PEXSI{
70 
71 template<typename T> void symPACKMatrixToSuperNode(
72  symPACK::symPACKMatrix<T>& SMat,
73  SuperNodeType& super ){
74  Int n = SMat.Size();
75 
76  symPACK::Ordering & Order = (symPACK::Ordering &)SMat.GetOrdering();
77  const std::vector<Int>& SInvp = Order.invp;
78  super.permInv.Resize( SInvp.size() );
79  const std::vector<Int>& SPerm = Order.perm;
80  super.perm.Resize( SPerm.size() );
81 
82  // perm
83  for( Int i = 0; i < SPerm.size(); i++ ){
84  super.permInv[i] = SPerm[i]-1;
85  }
86 
87  // permInv
88  for( Int i = 0; i < SInvp.size(); i++ ){
89  super.perm[i] = SInvp[i]-1;
90  }
91 
92  super.perm_r.Resize( n);
93  for( Int i = 0; i < n; i++ ){
94  super.perm_r[i] = i;
95  }
96  super.permInv_r = super.perm_r;
97 
98 
99  std::vector<Int>& XSuper = SMat.GetSupernodalPartition();
100  Int numSuper = XSuper.size() - 1;
101 
102  // superPtr
103  IntNumVec& superPtr = super.superPtr;
104  superPtr.Resize( numSuper + 1 );
105  for( Int i = 0; i < numSuper + 1; i++ ){
106  superPtr[i] = XSuper[i] - 1;
107  }
108 
109  // superIdx
110  IntNumVec& superIdx = super.superIdx;
111  superIdx.Resize( n );
112  const std::vector<Int>& superMember = SMat.GetSupMembership();
113  for( Int i = 0; i < n; i++ ){
114  superIdx(i) = superMember[i] - 1;
115  }
116 
117  // etree
118  const symPACK::ETree& etree = SMat.GetETree();
119  super.etree.Resize(n);
120  for( Int i = 0; i < etree.Size(); i++ ){
121  super.etree[i] = etree.PostParent(i);
122  if(super.etree[i]==0){
123  super.etree[i]=n+1;
124  }
125  super.etree[i]-=1;
126  }
127 
128 } // ----- end of symPACKMatrixToSuperNode -----
129 
130 
131 
132 template<typename T> void symPACKMatrixToPMatrix(
133  symPACK::symPACKMatrix<T>& SMat,
134  PMatrix<T>& PMat ){
135  TIMER_START(symPACKtoPMatrix);
136  // This routine assumes that the g, supernode and options of PMatrix
137  // has been set outside this routine.
138 
139  Int mpirank, mpisize;
140  const GridType *g = PMat.Grid();
141 
142  // FIXME Check PMatrix and symPACKMatrix has the same communicator
143  MPI_Comm comm = g->comm;
144  MPI_Comm colComm = g->colComm;
145  MPI_Comm rowComm = g->rowComm;
146  MPI_Comm_size(comm, &mpisize);
147  MPI_Comm_rank(comm, &mpirank);
148 
149  Int nprow = g->numProcRow, npcol = g->numProcCol;
150 
151  Int mpirow = mpirank / npcol;
152  Int mpicol = mpirank % npcol;
153 
154  Int n = SMat.Size();
155  PMat.ColBlockIdx().clear();
156  PMat.RowBlockIdx().clear();
157  PMat.ColBlockIdx().resize( PMat.NumLocalBlockCol() );
158  PMat.RowBlockIdx().resize( PMat.NumLocalBlockRow() );
159 
160  const IntNumVec& superIdx = PMat.SuperNode()->superIdx;
161 #if ( _DEBUGlevel_ >= 1 )
162  statusOFS << "superIdx = " << superIdx << std::endl;
163 #endif
164 
165 
166  // for loop over all supernodes
167  //
168  // if the current processor owns the supernode (the ownership is block
169  // cyclic)
170  // serialize the information
171  // broadcast the information to all processors
172  // elseif
173  // receive the information from the processor owning the
174  // supernode, and save the information in a deserialized buffer
175  // endif
176  //
177  // (Local operation from now)
178  // if the current processor owns the correct processor column
179  // for loop over the local blocks
180  // for loop over each row
181  // if the current row number belong to the current processor
182  // add the row index to a vector for LBuf
183  // endif
184  // endfor
185  // endfor
186  //
187  // Allocate the sign of LBuf for saving the nonzero values
188  // Convert the format of rowind
189  //
190  // for loop over the local blocks
191  // for loop over each row
192  // if the current row number belong to the current processor
193  // Append the nonzero values to nzval
194  // endif
195  // endfor
196  // endfor
197  // endif
198  //
199  // discard the temporary information in the buffer for all
200  // processors
201  //
202  // endfor
203  //
204  // Perform SendToCrossDiagonal to fill the U part.
205  //
206  // Remark:
207  // 1. The communiation is performed in a supernode-by-supernode
208  // way. This may not be very fast. A first improvement is to
209  // broadcast all local supernodes at once and then do local
210  // post-processing.
211  symPACK::Mapping * mapping = (symPACK::Mapping*)SMat.GetMapping();
212 
213  Int numSuper = PMat.NumSuper();
214 
215 
216 #if 1
217  symPACK::Icomm snodeIcomm;
218  std::vector<char> buffer;
219  for( Int iSuper = 0; iSuper < numSuper; iSuper++ ){
220  symPACK::SuperNode<T> * snode;
221 
222  Int iOwner = mapping->Map(iSuper,iSuper);
223 
224  if( mpirank == iOwner ){
225  // Get the local supernode
226  Int iSuperLocal = SMat.snodeLocalIndex(iSuper+1);
227  //symPACK::SuperNode<T>& snodetmp = SMat.GetLocalSupernode(iSuperLocal-1);
228  symPACK::SuperNode<T>* snodetmp = SMat.snodeLocal(iSuper+1);
229 #if ( _DEBUGlevel_ >= 1 )
230  statusOFS << "iSuper = " << iSuper << ", iSuperLocal = " <<
231  iSuperLocal << ", id = " << snodetmp->Id() << ", size = " <<
232  snodetmp->Size() << ", #Block = " << snodetmp->NZBlockCnt() <<
233  std::endl;
234  // statusOFS << "snode (before bcast) = " << *snodetmp << std::endl;
235 #endif
236  // Serialize the information in the current supernode
237  snodetmp->Serialize( snodeIcomm);
238  Int msgSize = snodeIcomm.size();
239 #if ( _DEBUGlevel_ >= 1 )
240  statusOFS << "msgSize = " << msgSize << std::endl;
241 #endif
242 
243  // Communicate the supernode
244  MPI_Bcast( &msgSize, 1, MPI_INT, mpirank, comm );
245  MPI_Bcast( snodeIcomm.front(), msgSize, MPI_CHAR, mpirank, comm );
246  // Copy the data from buffer to snode
247  //symPACK::Deserialize( snodeIcomm.front(), snode );
248 
249  snode = SMat.CreateSuperNode(SMat.GetOptions().decomposition,snodeIcomm.front(),msgSize,-1);
250  snode->InitIdxToBlk();
251  } // if owning the supernode
252  else{
253  // Receive the supernode
254  Int rootRank = iOwner;
255  Int msgSize;
256  MPI_Bcast( &msgSize, 1, MPI_INT, rootRank, comm );
257  buffer.resize(msgSize);
258  MPI_Bcast( &buffer[0], msgSize, MPI_CHAR, rootRank, comm );
259  //symPACK::Deserialize( &buffer[0], snode );
260 
261  snode = SMat.CreateSuperNode(SMat.GetOptions().decomposition,&buffer[0],msgSize,-1);
262  snode->InitIdxToBlk();
263  } // if not owning the supernode but is in the receiving column
264 
265 #if ( _DEBUGlevel_ >= 1 )
266  statusOFS << "All communication is finished." << std::endl;
267 #endif
268 
269  // Local operation from now
270  if( mpicol == ( iSuper % npcol ) ){
271  Int jb = iSuper / npcol;
272  std::vector<LBlock<T> >& Lcol = PMat.L(jb);
273  std::set<Int> blkSet;
274  Int superSize = snode->Size();
275 
276  // Count the number of blocks in the supernode belonging to this
277  // processor
278  for( Int blkidx = 0; blkidx < snode->NZBlockCnt(); blkidx++ ){
279  symPACK::NZBlockDesc desc = snode->GetNZBlockDesc( blkidx );
280  Int nrows = snode->NRows(blkidx);
281  Int firstRow = desc.GIndex - 1;
282  Int lastRow = firstRow + nrows - 1;
283 #if ( _DEBUGlevel_ >= 1 )
284  statusOFS << "firstRow = " << firstRow << ", lastRow = " << lastRow << std::endl;
285 #endif
286  for( Int i = superIdx(firstRow); i <= superIdx(lastRow); i++ ){
287  if( mpirow == ( i % nprow ) ){
288  blkSet.insert( i );
289  } // if the current processor is in the right processor row
290  }
291  } // for ( blkidx )
292 
293  Int numBlkLocal = blkSet.size();
294  std::vector<Int> blkVec;
295  blkVec.insert( blkVec.end(), blkSet.begin(), blkSet.end() );
296  Lcol.resize( blkVec.size() );
297 
298  // Allocate the nonzero rows and nzvals
299 #if ( _DEBUGlevel_ >= 1 )
300  statusOFS << "Lcol.size = " << Lcol.size() << std::endl;
301  statusOFS << "blkSet.size = " << blkSet.size() << std::endl;
302  statusOFS << "blkVec = " << blkVec << std::endl;
303 #endif
304 
305  std::vector<std::vector<Int> > rowsBlk( Lcol.size() );
306  std::vector<std::vector<T> > nzvalBlk( Lcol.size() );
307 
308  for( Int blkidx = 0; blkidx < snode->NZBlockCnt(); blkidx++ ){
309  symPACK::NZBlockDesc desc = snode->GetNZBlockDesc( blkidx );
310  Int nrows = snode->NRows(blkidx);
311  Int firstRow = desc.GIndex - 1;
312  Int lastRow = firstRow + nrows - 1;
313  T* nzval = snode->GetNZval( desc.Offset );
314  std::vector<Int>::iterator vi;
315  Int pos;
316  for( Int i = firstRow; i <= lastRow; i++ ){
317  vi = find( blkVec.begin(), blkVec.end(), superIdx(i) );
318  if( vi != blkVec.end() ){
319  pos = vi - blkVec.begin();
320  rowsBlk[pos].push_back(i);
321  nzvalBlk[pos].insert(
322  nzvalBlk[pos].end(),
323  nzval, nzval + superSize );
324  }
325  nzval += superSize;
326  }
327  } // for ( blkidx )
328 
329  // Save the information to Lcol
330  for ( Int iblk = 0; iblk < Lcol.size(); iblk++ ){
331  std::vector<Int>& rows = rowsBlk[iblk];
332  std::vector<T>& nzval = nzvalBlk[iblk];
333  LBlock<T>& LB = Lcol[iblk];
334  LB.blockIdx = blkVec[iblk];
335  LB.numRow = rows.size();
336  LB.numCol = superSize;
337  LB.rows = IntNumVec( LB.numRow, true, &rows[0] );
338  if( LB.numRow * LB.numCol != nzval.size() ){
339  std::ostringstream msg;
340  msg << "message size does not match for the blockIdx " << LB.blockIdx << std::endl
341  << "LB.numRow * LB.numCol = " << LB.numRow * LB.numCol << std::endl
342  << "nzval.size = " << nzval.size() << std::endl;
343  ErrorHandling( msg.str().c_str() );
344  }
345  // Convert the row major format to column major format
346  Transpose( NumMat<T>( LB.numCol, LB.numRow, true,
347  &nzval[0] ), LB.nzval );
348  }
349  } // if the current processor is in the right processor column
350 
351  // Set the MPI Barrier
352  delete snode;
353  MPI_Barrier( comm );
354  }
355 #else
356  std::vector<symPACK::SuperNode<T> * > & localSnodes = SMat.GetLocalSupernodes();
357  std::vector<int> sizes(mpisize,0);
358 
359  //first do the counting
360  for(Int supidx = 0; supidx < localSnodes.size(); supidx++){
361  symPACK::SuperNode<T> * curSnode = localSnodes[supidx];
362  Int ksup = curSnode->Id();
363  Int superSize = curSnode->Size();
364 
365  std::vector<bool> isSent(mpisize,false);
366 
367  //find where this supernode is supposed to go
368  Int destCol = ksup % npcol;
369  for( Int blkidx = 0; blkidx < snode->NZBlockCnt(); blkidx++ ){
370  symPACK::NZBlockDesc desc = snode->GetNZBlockDesc( blkidx );
371  Int nrows = snode->NRows(blkidx);
372  Int firstRow = desc.GIndex - 1;
373  Int lastRow = firstRow + nrows - 1;
374 
375  for( Int i = firstRow; i <= lastRow; i++ ){
376  Int isup = superIdx[i];
377  Int destRow = isup % nprow;
378  Int pnum = destRow*npcol + destCol;
379 
380  if(!isSent[pnum]){
381  //Supernode index and number of rows
382  sizes[pNum]+=sizeof(Int)+sizeof(Int);
383  isSent[pnum] = true;
384  }
385 
386  //add one row + the index
387  sizes[pNum]+=sizeof(Int)+superSize*sizeof(T);
388  }
389  } // for ( blkidx )
390 
391 
392  //compute the displacement and total buffersize required
393  std::vector<int> displs(mpisize+1,0);
394  displs[0]=0;
395  std::partial_sum(sizes.begin(),sizes.end(),displs.begin()+1);
396 
397  int total_send_size = displs.back();
398  symPACK::Icomm * IsendPtr = new symPACK::Icomm(total_send_size,MPI_REQUEST_NULL);
399 
400 
401 #endif
402 
403  PMatrixLtoU( PMat );
404 
405 
406  //Fill ColBlockIdx and RowBlockIdx
407  for( Int jb = 0; jb < PMat.NumLocalBlockCol(); jb++ ){
408  Int bnum = GBj( jb, g );
409  if( bnum >= numSuper ) continue;
410  std::vector<LBlock<T> >& Lcol = PMat.L(jb);
411  for( Int iblk = 0; iblk < Lcol.size(); iblk++ ){
412  LBlock<T>& LB = Lcol[iblk];
413  PMat.ColBlockIdx(jb).push_back(LB.blockIdx);
414  Int LBi = LB.blockIdx / g->numProcRow;
415  PMat.RowBlockIdx( LBi ).push_back( bnum );
416  }
417  }
418 
419  for( Int ib = 0; ib < PMat.NumLocalBlockRow(); ib++ ){
420  Int bnum = GBi( ib, g );
421  if( bnum >= numSuper ) continue;
422  std::vector<UBlock<T> >& Urow = PMat.U(ib);
423  for(Int jblk = 0; jblk < Urow.size(); jblk++ ){
424  UBlock<T> & UB = Urow[jblk];
425  PMat.RowBlockIdx(ib).push_back(UB.blockIdx);
426  Int LBj = UB.blockIdx / g->numProcCol;
427  PMat.ColBlockIdx( LBj ).push_back( bnum );
428  }
429  }
430 
431  for( Int ib = 0; ib < PMat.NumLocalBlockRow(); ib++ ){
432  std::sort(PMat.RowBlockIdx(ib).begin(),PMat.RowBlockIdx(ib).end(),std::less<Int>());
433  }
434  for( Int jb = 0; jb < PMat.NumLocalBlockCol(); jb++ ){
435  std::sort(PMat.ColBlockIdx(jb).begin(),PMat.ColBlockIdx(jb).end(),std::less<Int>());
436  }
437 
438  const SuperNodeType* super = PMat.SuperNode();
439  for( Int ksup = 0; ksup < numSuper; ksup++ ){
440  Int pcol = ( ksup % npcol );
441  Int prow = ( ksup % nprow );
442  if( mpirow == prow ){
443  NumMat<T> DiagBuf;
444  DiagBuf.Resize(SuperSize( ksup, super ), SuperSize( ksup, super ));
445  if( mpicol == pcol ){
446  Int jb = ksup / npcol;
447  std::vector<LBlock<T> >& Lcol = PMat.L(jb);
448  LBlock<T>& LB = Lcol[0];
449  for(Int row = 0; row<LB.nzval.m(); row++){
450  for(Int col = row+1; col<LB.nzval.n(); col++){
451  //LB.nzval(row,col) = T(std::conj(LB.nzval(col,row))) * LB.nzval(row,row);
452  LB.nzval(row,col) = (LB.nzval(col,row)) * LB.nzval(row,row);
453  }
454  }
455  DiagBuf = LB.nzval;
456  }
457  }
458  }
459  TIMER_STOP(symPACKtoPMatrix);
460 } // ----- end of symPACKMatrixToPMatrix -----
461 
462 template<typename T> void PMatrixLtoU( PMatrix<T>& PMat )
463 {
464  TIMER_START(PMatrixLtoU);
465 
466  //Send L to U
467  Int mpirank, mpisize;
468  const GridType *g = PMat.Grid();
469 
470  // FIXME Check PMatrix and symPACKMatrix has the same communicator
471  MPI_Comm comm = g->comm;
472  MPI_Comm colComm = g->colComm;
473  MPI_Comm rowComm = g->rowComm;
474  MPI_Comm_size(comm, &mpisize);
475  MPI_Comm_rank(comm, &mpirank);
476 
477  Int nprow = g->numProcRow, npcol = g->numProcCol;
478  Int mpirow = mpirank / npcol;
479  Int mpicol = mpirank % npcol;
480 
481  Int numSuper = PMat.NumSuper();
482  for( Int ksup = 0; ksup < numSuper; ksup++ ){
483 #if ( _DEBUGlevel_ >= 1 )
484  statusOFS<<"----------------------- "<< ksup<<std::endl;
485 #endif
486  //If I'm in the supernodal column
487  std::vector<Int> all_proc_list;
488  std::vector<Int> all_blocks_cnt;
489  std::vector<Int> sizes;
490  std::vector<Int> displs;
491 
492  std::vector<Int> sender_proc;
493  std::vector<std::list<LBlock<T> * > > blocks_to_receiver;
494 
495  std::vector<Int> receiver_proc;
496  //If I'm in the supernodal column
497  if( mpicol == ( ksup % npcol ) ){
498  Int jb = ksup / npcol;
499  std::vector<LBlock<T> >& Lcol = PMat.L(jb);
500  Int root = (ksup % nprow);
501  //compute the list of receiving processors based on my local structure
502  std::set<Int> proc_set;
503  blocks_to_receiver.resize(npcol);
504  Int startBlk = mpirow==root?1:0;
505  for ( Int iblk = startBlk; iblk < Lcol.size(); iblk++ ){
506  LBlock<T>& LB = Lcol[iblk];
507  //column of the target processor
508  Int snode_idx = LB.blockIdx;
509  Int tgt_pcol = snode_idx % npcol;
510  blocks_to_receiver[tgt_pcol].push_back(&LB);
511  proc_set.insert(tgt_pcol);
512  }
513  //Insert the set into the vector to be able to send it
514  receiver_proc.insert(receiver_proc.begin(),proc_set.begin(),proc_set.end());
515 
516 
517  //Now do a gatherv on the root
518  mpi::Gatherv(receiver_proc,all_proc_list,sizes,displs,root, colComm);
519 
520  //Do a gatherv of the local blocks to each processors
521  std::vector<Int> blocks_cnt(receiver_proc.size());
522  for(Int j = 0; j< receiver_proc.size();++j){
523  Int pcol = receiver_proc[j];
524  std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
525  blocks_cnt[j] = blocks.size();
526  }
527  mpi::Gatherv(blocks_cnt,all_blocks_cnt,root, colComm);
528 
529  //On the root, convert from a sender array to a receiver array
530  if(mpirow == root){
531 
532  //sender_proc[i] contains the set of sender to processor column i
533  std::vector<std::set<Int> > sender_procs(npcol);
534  std::vector<Int> urow_sizes(npcol,0);
535  for(Int prow = 0; prow < nprow; ++prow){
536  Int * recv_list = &all_proc_list[displs[prow]];
537  Int * recv_blocks = &all_blocks_cnt[displs[prow]];
538  Int size = sizes[prow];
539  for(Int i = 0; i<size;++i){
540  Int pcol = recv_list[i];
541  sender_procs[pcol].insert(prow);
542  Int ucol_contrib = recv_blocks[i];
543  urow_sizes[pcol]+=ucol_contrib;
544  }
545  }
546  //now prepare the data structures for a scatterv along the rows
547  all_blocks_cnt = urow_sizes;
548  all_proc_list.clear();
549  sizes.resize(npcol);
550  displs.resize(npcol);
551  Int totalsize = 0;
552  for(Int pcol = 0; pcol < npcol; ++pcol){
553  sizes[pcol] = sender_procs[pcol].size()*sizeof(Int);
554  displs[pcol] = totalsize;
555  totalsize += sizes[pcol];
556  }
557  //put the senders in the all_proc_list_array
558  all_proc_list.reserve(totalsize / sizeof(Int) );
559  for(Int pcol = 0; pcol < npcol; ++pcol){
560  all_proc_list.insert(all_proc_list.end(),sender_procs[pcol].begin(),sender_procs[pcol].end());
561  }
562 
563  }
564  }
565 
566  //If I'm in the supernodal row
567  if( mpirow == ( ksup % nprow ) ){
568  Int root = (ksup % npcol);
569  //scatter the sizes
570  Int localSize = 0;
571  MPI_Scatter(mpicol==root?&sizes[0]:NULL,sizeof(Int),MPI_BYTE,
572  &localSize,sizeof(Int),MPI_BYTE, root, rowComm);
573  sender_proc.resize(localSize / sizeof(Int));
574  //Now do the scatterv;
575  MPI_Scatterv(mpicol==root?&all_proc_list[0]:NULL,
576  mpicol==root?&sizes[0]:NULL,
577  mpicol==root?&displs[0]:NULL,MPI_BYTE,
578  &sender_proc[0],localSize,MPI_BYTE, root, rowComm);
579 
580  Int urowSize = 0;
581  MPI_Scatter(mpicol==root?&all_blocks_cnt[0]:NULL,sizeof(Int),MPI_BYTE,
582  &urowSize,sizeof(Int),MPI_BYTE, root, rowComm);
583  //Resize Urow
584  Int ib = ksup / nprow;
585  std::vector<UBlock<T> >& Urow = PMat.U(ib);
586  Urow.resize(urowSize);
587 
588 
589  //At this point we have both a sender AND a receiver list
590  //and Urows are resized
591  }
592 
593 
594  //Communicate this supernode
595  //If I'm a sender
596  if( mpicol == ( ksup % npcol ) ){
597  std::vector<Int> mask( LBlockMask::TOTAL_NUMBER, 1 );
598  //for each target col
599  for(Int tpcol = 0; tpcol < npcol; tpcol++){
600  //dont send to the root first because it can be a sender too !
601  Int pcol = (mpicol+1+tpcol) % npcol;
602  Int pnum = (ksup % nprow)*npcol + pcol;
603  if(pnum!=mpirank){
604  //Serialize everything in the blocks_to_receiver list
605  std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
606  if(blocks.size()>0){
607  std::stringstream sstm;
608  Int numLBlocks = blocks.size();
609 #if ( _DEBUGlevel_ >= 1 )
610  statusOFS<<"Sending "<<numLBlocks<<" LBlocks"<<std::endl;
611 #endif
612  serialize( numLBlocks , sstm, NO_MASK);
613  typename std::list<LBlock<T> *>::iterator it;
614  for(it = blocks.begin();
615  it!=blocks.end();++it){
616  LBlock<T> & LB = *(*it);
617 #if ( _DEBUGlevel_ >= 1 )
618  statusOFS<<"Sent LB: "<<LB<<std::endl;
619 #endif
620  serialize(LB, sstm,mask);
621  }
622 
623 
624  mpi::Send(sstm, pnum, PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_SIZE),
625  PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_CONTENT), comm);
626  }
627  }
628  }
629  }
630 
631  //If I'm a receiver
632  if( mpirow == ( ksup % nprow ) ){
633  std::vector<Int> mask( LBlockMask::TOTAL_NUMBER, 1 );
634  Int ib = ksup / nprow;
635  std::vector<UBlock<T> >& Urow = PMat.U(ib);
636 
637  //for each target row
638  Int idx = 0;
639  for(Int i = 0; i < sender_proc.size(); ++i){
640  Int prow = sender_proc[i];
641  Int pnum = (prow)*npcol + (ksup % npcol);
642  if(pnum != mpirank){
643  std::stringstream sstm;
644  mpi::Recv(sstm,MPI_ANY_SOURCE,PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_SIZE),
645  PMat.IdxToTag(ksup,PMatrix<T>::SELINV_TAG_L_CONTENT), comm);
646 
647  //now deserialize and put everything in U
648  Int numLBlocks = 0;
649  deserialize(numLBlocks, sstm, NO_MASK);
650 #if ( _DEBUGlevel_ >= 1 )
651  statusOFS<<"Receiving "<<numLBlocks<<" LBlocks"<<std::endl;
652 #endif
653  for(Int i = 0; i<numLBlocks;++i){
654  LBlock<T> LB;
655  deserialize(LB, sstm, mask);
656 
657 #if ( _DEBUGlevel_ >= 1 )
658  statusOFS<<"Received LB: "<<LB<<std::endl;
659 #endif
660  //put this LBlock in the appropriate UBlock
661  UBlock<T> & UB = Urow[idx];
662 
663  UB.blockIdx = LB.blockIdx;
664  UB.numCol = LB.numRow;
665  UB.numRow = LB.numCol;
666  UB.cols = LB.rows;
667  Transpose(LB.nzval,UB.nzval);
668  ++idx;
669  }
670  }
671  else{
672  Int pcol = mpicol ;
673  //do the copy locally
674  Int ib = ksup / nprow;
675  std::vector<UBlock<T> >& Urow = PMat.U(ib);
676  //Serialize everything in the blocks_to_receiver list
677  std::list<LBlock<T> *> & blocks = blocks_to_receiver[pcol];
678  if(blocks.size()>0){
679  typename std::list<LBlock<T> *>::iterator it;
680  for(it = blocks.begin();
681  it!=blocks.end();++it){
682  LBlock<T> & LB = *(*it);
683  UBlock<T> & UB = Urow[idx];
684 
685  UB.blockIdx = LB.blockIdx;
686  UB.numCol = LB.numRow;
687  UB.numRow = LB.numCol;
688  UB.cols = LB.rows;
689  Transpose(LB.nzval,UB.nzval);
690  ++idx;
691  }
692  }
693  }
694 
695 
696  }
697  }
698  }
699 
700  TIMER_STOP(PMatrixLtoU);
701 } // ----- end of PMatrixLToU -----
702 
703 
704 }
705 
706 
707 #endif //_PEXSI_symPACK_INTERF_IMPL_HPP_
708 
Environmental variables.
Int blockIdx
Block index (supernodal index)
Definition: pselinv.hpp:248
std::vector< LBlock< T > > & L(Int jLocal)
L returns the vector of nonzero L blocks for the local block column jLocal.
Definition: pselinv.hpp:724
SuperNodeType describes mapping between supernode and column, the permutation information, and potentially the elimination tree (not implemented here).
Definition: pselinv.hpp:177
Thin interface to LAPACK.
Int numRow
Number of nonzero rows.
Definition: pselinv.hpp:199
Int NumLocalBlockCol() const
NumLocalBlockCol returns the total number of block columns.
Definition: pselinv.hpp:695
Int LBi(Int bnum, const GridType *g)
LBi returns the local block number on the processor at processor row PROW( bnum, g )...
Definition: pselinv.hpp:319
Int SuperSize(Int bnum, const SuperNodeType *s)
SuperSize returns the size of the block bnum.
Definition: pselinv.hpp:360
Int LBj(Int bnum, const GridType *g)
LBj returns the local block number on the processor at processor column PCOL( bnum, g ).
Definition: pselinv.hpp:324
Int GBj(Int jLocal, const GridType *g)
GBj returns the global block number from a local block number in the column direction.
Definition: pselinv.hpp:334
Int numCol
Number of nonzero columns.
Definition: pselinv.hpp:202
Main file for parallel selected inversion.
void symPACKMatrixToPMatrix(symPACK::symPACKMatrix< T > &SMat, PMatrix< T > &PMat)
Converts a matrix of symPACK type to PMatrix.
Definition: sympack_interf_impl.hpp:132
const GridType * Grid() const
Grid returns the GridType structure of the current PMatrix.
Definition: pselinv.hpp:716
LBlock stores a nonzero block in the lower triangular part or the diagonal part in PSelInv...
Definition: pselinv.hpp:193
Sparse matrix and Distributed sparse matrix in compressed column format.
Int GBi(Int iLocal, const GridType *g)
GBi returns the global block number from a local block number in the row direction.
Definition: pselinv.hpp:329
GridType is the PSelInv way of defining the grid.
Definition: pselinv.hpp:140
Numerical matrix.
Int NumLocalBlockRow() const
NumLocalBlockRow returns the total number of block rows.
Definition: pselinv.hpp:698
const SuperNodeType * SuperNode() const
SuperNode returns the supernodal partition of the current PMatrix.
Definition: pselinv.hpp:720
IntNumVec rows
Dimension numRow * 1, index (0-based) for the number of nonzero rows.
Definition: pselinv.hpp:205
Int numCol
Number of nonzero columns.
Definition: pselinv.hpp:254
UBlock stores a nonzero block in the upper triangular part in PSelInv.
Definition: pselinv.hpp:245
std::vector< UBlock< T > > & U(Int iLocal)
U returns the vector of nonzero U blocks for the local block row iLocal.
Definition: pselinv.hpp:728
Int blockIdx
Block index (supernodal index)
Definition: pselinv.hpp:196
PMatrix contains the main data structure and the computational routine for the parallel selected inve...
Definition: pselinv.hpp:542
Int numRow
Number of nonzero rows.
Definition: pselinv.hpp:251
NumMat< T > nzval
Dimension numRow * numCol, nonzero elements.
Definition: pselinv.hpp:260
Numerical vector.
void symPACKMatrixToSuperNode(symPACK::symPACKMatrix< T > &SMat, SuperNodeType &super)
Converts the symPACK supernodal structure to PMatrix SuperNodeType structure.
Definition: sympack_interf_impl.hpp:71
NumMat< T > nzval
Dimension numRow * numCol, nonzero elements.
Definition: pselinv.hpp:209
IntNumVec cols
Dimension numCol * 1, index (0-based) for the number of nonzero rows.
Definition: pselinv.hpp:257