PEXSI
 All Classes Namespaces Files Functions Variables Typedefs Pages
TreeBcast.hpp
1 #ifndef _PEXSI_TREE_HPP_
2 #define _PEXSI_TREE_HPP_
3 
4 #include "pexsi/environment.hpp"
5 #include "pexsi/timer.h"
6 
7 #include <vector>
8 #include <map>
9 #include <algorithm>
10 #include <string>
11 //#include <random>
12 
13 // options to switch from a flat bcast/reduce tree to a binary tree
14 
15 #ifndef FTREE_LIMIT
16 #define FTREE_LIMIT 16
17 #endif
18 
19 
20 
21 namespace PEXSI{
22 
23 
24 extern std::map< MPI_Comm , std::vector<int> > commGlobRanks;
25 
26 #ifdef NEW_BCAST
27 
28 template< typename T>
29  class TreeBcast2{
30  protected:
31 
32  T * myData_;
33  MPI_Request recvRequest_;
34  NumVec<char> myRecvBuffer_;
35 
36  NumVec<MPI_Request> myRequests_;
37  NumVec<MPI_Status> myStatuses_;
38 
39  bool done_;
40  bool fwded_;
41  // bool isAllocated_;
42 
43  Int myRoot_;
44  MPI_Comm comm_;
45  vector<Int> myDests_;
46  Int myRank_;
47  Int msgSize_;
48  bool isReady_;
49  Int mainRoot_;
50  Int tag_;
51  Int numRecv_;
52 
53 #ifdef COMM_PROFILE_BCAST
54  protected:
55  Int myGRoot_;
56  Int myGRank_;
57  //vector<int> Granks_;
58  public:
59  void SetGlobalComm(const MPI_Comm & pGComm){
60  if(commGlobRanks.count(comm_)==0){
61  MPI_Group group2 = MPI_GROUP_NULL;
62  MPI_Comm_group(pGComm, &group2);
63  MPI_Group group1 = MPI_GROUP_NULL;
64  MPI_Comm_group(comm_, &group1);
65 
66  Int size;
67  MPI_Comm_size(comm_,&size);
68  vector<int> globRanks(size);
69  vector<int> Lranks(size);
70  for(int i = 0; i<size;++i){Lranks[i]=i;}
71  MPI_Group_translate_ranks(group1, size, &Lranks[0],group2, &globRanks[0]);
72  commGlobRanks[comm_] = globRanks;
73  }
74  myGRoot_ = commGlobRanks[comm_][myRoot_];
75  myGRank_ = commGlobRanks[comm_][myRank_];
76  //Granks_.resize(myDests_.size());
77  //for(int i = 0; i<myDests_.size();++i){
78  // Granks_[i] = globRanks[myDests_[i]];
79  //}
80 
81  //statusOFS<<myDests_<<std::endl;
82  //statusOFS<<Granks_<<std::endl;
83  }
84 #endif
85 
86 
87 
88  protected:
89  virtual void buildTree(Int * ranks, Int rank_cnt)=0;
90 
91 
92 
93 
94 
95  public:
96 
97  TreeBcast2(){
98  comm_ = MPI_COMM_WORLD;
99  myRank_=-1;
100  myRoot_ = -1;
101  msgSize_ = -1;
102  numRecv_ = -1;
103  tag_=-1;
104  mainRoot_=-1;
105  isReady_ = false;
106  myData_ = NULL;
107  recvRequest_ = MPI_REQUEST_NULL;
108  fwded_=false;
109  // isAllocated_=false;
110  done_ = false;
111  }
112 
113 
114  TreeBcast2(const MPI_Comm & pComm, Int * ranks, Int rank_cnt,Int msgSize):TreeBcast2(){
115  comm_ = pComm;
116  MPI_Comm_rank(comm_,&myRank_);
117  myRoot_ = -1;
118  msgSize_ = msgSize;
119  numRecv_ = 0;
120  tag_=-1;
121  mainRoot_=ranks[0];
122  isReady_ = false;
123  }
124 
125 
126  virtual TreeBcast2 * clone() const = 0;
127 
128  TreeBcast2(const TreeBcast2 & Tree){
129  this->Copy(Tree);
130  }
131 
132  virtual void Copy(const TreeBcast2 & Tree){
133  this->comm_ = Tree.comm_;
134  this->myRank_ = Tree.myRank_;
135  this->myRoot_ = Tree.myRoot_;
136  this->msgSize_ = Tree.msgSize_;
137 
138  this->numRecv_ = Tree.numRecv_;
139  this->tag_= Tree.tag_;
140  this->mainRoot_= Tree.mainRoot_;
141  this->isReady_ = Tree.isReady_;
142  this->myDests_ = Tree.myDests_;
143 
144 
145  this->recvRequest_ = Tree.recvRequest_;
146  this->myRecvBuffer_ = Tree.myRecvBuffer_;
147  this->myRequests_ = Tree.myRequests_;
148  this->myStatuses_ = Tree.myStatuses_;
149  this->myData_ = Tree.myData_;
150  if(Tree.myData_==(T*)&Tree.myRecvBuffer_[0]){
151  this->myData_=(T*)&this->myRecvBuffer_[0];
152  }
153 
154 
155 
156 
157  this->fwded_= Tree.fwded_;
158  // this->isAllocated_= Tree.isAllocated_;
159  this->done_= Tree.done_;
160  }
161 
162  void Reset(){
163  assert(done_);
164  CleanupBuffers();
165  done_=false;
166  myData_ = NULL;
167  recvRequest_ = MPI_REQUEST_NULL;
168  fwded_=false;
169  // isAllocated_=false;
170  isReady_=false;
171  numRecv_ = 0;
172  }
173 
174  //bool IsAllocated(){return isAllocated_;}
175 
176  virtual ~TreeBcast2(){
177  CleanupBuffers();
178  }
179 
180 
181  static TreeBcast2<T> * Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,double rseed);
182 
183  virtual inline Int GetNumRecvMsg(){return numRecv_;}
184  virtual inline Int GetNumMsgToSend(){return GetDestCount();}
185  inline void SetDataReady(bool rdy){
186  isReady_=rdy;
187  //numRecv_ = rdy?1:0;
188  }
189  inline void SetTag(Int tag){ tag_ = tag;}
190  inline int GetTag(){ return tag_;}
191 
192  bool IsDone(){return done_;}
193  bool IsDataReady(){return isReady_;}
194  bool IsDataReceived(){return numRecv_==1;}
195 
196  Int * GetDests(){ return &myDests_[0];}
197  Int GetDest(Int i){ return myDests_[i];}
198  Int GetDestCount(){ return myDests_.size();}
199  Int GetRoot(){ return myRoot_;}
200 
201  bool IsRoot(){ return myRoot_==myRank_;}
202  Int GetMsgSize(){ return msgSize_;}
203 
204  void ForwardMessage( ){
205  if(myRequests_.m()!=GetDestCount()){
206  myRequests_.Resize(GetDestCount());
207  SetValue(myRequests_,MPI_REQUEST_NULL);
208  }
209  for( Int idxRecv = 0; idxRecv < myDests_.size(); ++idxRecv ){
210  Int iProc = myDests_[idxRecv];
211  // Use Isend to send to multiple targets
212  MPI_Isend( myData_, msgSize_, MPI_BYTE,
213  iProc, tag_,comm_, &myRequests_[idxRecv] );
214 
215 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
216  statusOFS<<myRank_<<" FWD to "<<iProc<<" on tag "<<tag_<<std::endl;
217 #endif
218 #ifdef COMM_PROFILE_BCAST
219  // statusOFS<<idxRecv<<std::endl;
220  // statusOFS<<myDests_<<std::endl;
221  // statusOFS<<Granks_<<std::endl;
222  //PROFILE_COMM(myGRank_,Granks_[idxRecv],tag_,msgSize_);
223  PROFILE_COMM(myGRank_,commGlobRanks[comm_][iProc],tag_,msgSize_);
224 #endif
225  } // for (iProc)
226  fwded_ = true;
227  }
228 
229  void CleanupBuffers(){
230  myRequests_.Clear();
231  myStatuses_.Clear();
232  myRecvBuffer_.Clear();
233  }
234 
235 
236  void SetLocalBuffer(T * locBuffer){
237  if(myData_!=NULL && myData_!=locBuffer){
238  if(numRecv_>0){
239  CopyLocalBuffer(locBuffer);
240  }
241  if(!fwded_){
242  myRecvBuffer_.Clear();
243  }
244  }
245 
246  myData_ = locBuffer;
247  }
248 
249  //async wait and forward
250  virtual bool Progress(){
251  if(done_){
252  return true;
253  }
254 
255  bool done = false;
256 
257  if (myRank_==myRoot_){
258  if(isReady_){
259  if(!fwded_){
260 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
261  statusOFS<<myRank_<<" FORWARDING on tag "<<tag_<<std::endl;
262 #endif
263  ForwardMessage();
264  }
265  else{
266 
267  if(myStatuses_.m()!=GetDestCount()){
268  myStatuses_.Resize(GetDestCount());
269  recvRequest_ = MPI_REQUEST_NULL;
270  }
271  //test the send requests
272  int flag = 0;
273  int reqCnt = GetDestCount();
274  if(reqCnt>0){
275  assert(reqCnt == myRequests_.m());
276  MPI_Testall(reqCnt,&myRequests_[0],&flag,&myStatuses_[0]);
277  done = flag==1;
278  }
279  else{
280  done=true;
281  }
282  }
283  }
284  }
285  else{
286  bool received = (numRecv_==1);
287 
288  if(!received){
289  if(recvRequest_ == MPI_REQUEST_NULL ){
290 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
291  statusOFS<<myRank_<<" POSTING RECV on tag "<<tag_<<std::endl;
292 #endif
293  //post the recv
294  PostRecv();
295  }
296  else{
297 
298  if(myStatuses_.m()!=GetDestCount()){
299  myStatuses_.Resize(GetDestCount());
300  recvRequest_ = MPI_REQUEST_NULL;
301  }
302 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
303  statusOFS<<myRank_<<" TESTING RECV on tag "<<tag_<<std::endl;
304 #endif
305  //test
306  int flag = 0;
307  MPI_Status stat;
308  int test = MPI_Test(&recvRequest_,&flag,&stat);
309  assert(test==MPI_SUCCESS);
310 
311  if(flag==1){
312  numRecv_=1;
313  received = true;
314 
315  if(!fwded_){
316 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
317  statusOFS<<myRank_<<" FORWARDING on tag "<<tag_<<std::endl;
318 #endif
319  ForwardMessage();
320  }
321  }
322  }
323  }
324  else {
325  assert(fwded_);
326  //test the send requests
327  int flag = 0;
328  int reqCnt = GetDestCount();
329  if(reqCnt>0){
330  assert(reqCnt == myRequests_.m());
331  MPI_Testall(reqCnt,&myRequests_[0],&flag,&myStatuses_[0]);
332  done = flag==1;
333  }
334  else{
335  done=true;
336  }
337  }
338  }
339 
340  if(done){
341  //free the unnecessary arrays
342  myRequests_.Clear();
343  myStatuses_.Clear();
344 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
345  statusOFS<<myRank_<<" EVERYTHING COMPLETED on tag "<<tag_<<std::endl;
346 #endif
347  }
348 
349  done_ = done;
350 
351  return done;
352  }
353 
354  //blocking wait
355  void Wait(){
356  if(!done_){
357  while(!Progress());
358  }
359  }
360 
361  T * GetLocalBuffer(){
362  return myData_;
363  }
364 
365  virtual void PostRecv()
366  {
367  if(this->numRecv_<1 && this->recvRequest_==MPI_REQUEST_NULL && myRank_!=myRoot_){
368 
369  if(myData_==NULL){
370  myRecvBuffer_.Resize(msgSize_);
371  myData_ = (T*)&myRecvBuffer_[0];
372  }
373  MPI_Irecv( (char*)this->myData_, this->msgSize_, MPI_BYTE,
374  this->myRoot_, this->tag_,this->comm_, &this->recvRequest_ );
375  }
376  }
377 
378 
379 
380  void CopyLocalBuffer(T* destBuffer){
381  std::copy((char*)myData_,(char*)myData_+GetMsgSize(),(char*)destBuffer);
382  }
383 
384 
385 
386 
387  };
388 
389 
390 template< typename T>
391  class FTreeBcast2: public TreeBcast2<T>{
392  protected:
393  virtual void buildTree(Int * ranks, Int rank_cnt){
394 
395  Int idxStart = 0;
396  Int idxEnd = rank_cnt;
397 
398 
399 
400  this->myRoot_ = ranks[0];
401 
402  if(this->myRank_==this->myRoot_){
403  this->myDests_.insert(this->myDests_.begin(),&ranks[1],&ranks[0]+rank_cnt);
404  }
405 
406 #if (defined(BCAST_VERBOSE))
407  statusOFS<<"My root is "<<this->myRoot_<<std::endl;
408  statusOFS<<"My dests are ";
409  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
410  statusOFS<<std::endl;
411 #endif
412  }
413 
414 
415 
416  public:
417  FTreeBcast2(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast2<T>(pComm,ranks,rank_cnt,msgSize){
418  //build the binary tree;
419  buildTree(ranks,rank_cnt);
420  }
421 
422 
423  virtual FTreeBcast2 * clone() const{
424  FTreeBcast2 * out = new FTreeBcast2(*this);
425  return out;
426  }
427  };
428 
429 template< typename T>
430  class BTreeBcast2: public TreeBcast2<T>{
431  protected:
455  virtual void buildTree(Int * ranks, Int rank_cnt){
456 
457  Int idxStart = 0;
458  Int idxEnd = rank_cnt;
459 
460 
461 
462  Int prevRoot = ranks[0];
463  while(idxStart<idxEnd){
464  Int curRoot = ranks[idxStart];
465  Int listSize = idxEnd - idxStart;
466 
467  if(listSize == 1){
468  if(curRoot == this->myRank_){
469  this->myRoot_ = prevRoot;
470  break;
471  }
472  }
473  else{
474  Int halfList = floor(ceil(double(listSize) / 2.0));
475  Int idxStartL = idxStart+1;
476  Int idxStartH = idxStart+halfList;
477 
478  if(curRoot == this->myRank_){
479  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
480  Int childL = ranks[idxStartL];
481  Int childR = ranks[idxStartH];
482 
483  this->myDests_.push_back(childL);
484  this->myDests_.push_back(childR);
485  }
486  else if ((idxEnd - idxStartH) > 0){
487  Int childR = ranks[idxStartH];
488  this->myDests_.push_back(childR);
489  }
490  else{
491  Int childL = ranks[idxStartL];
492  this->myDests_.push_back(childL);
493  }
494  this->myRoot_ = prevRoot;
495  break;
496  }
497 
498  if( this->myRank_ < ranks[idxStartH]){
499  idxStart = idxStartL;
500  idxEnd = idxStartH;
501  }
502  else{
503  idxStart = idxStartH;
504  }
505  prevRoot = curRoot;
506  }
507 
508  }
509 
510 #if (defined(BCAST_VERBOSE))
511  statusOFS<<"My root is "<<myRoot_<<std::endl;
512  statusOFS<<"My dests are ";
513  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
514  statusOFS<<std::endl;
515 #endif
516  }
517 
518 
519 
520  public:
521  BTreeBcast2(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast2<T>(pComm,ranks,rank_cnt,msgSize){
522  //build the binary tree;
523  buildTree(ranks,rank_cnt);
524  }
525 
526  virtual BTreeBcast2<T> * clone() const{
527  BTreeBcast2<T> * out = new BTreeBcast2<T>(*this);
528  return out;
529  }
530 
531  };
532 
533 
534 
535 template< typename T>
536  class ModBTreeBcast2: public TreeBcast2<T>{
537  protected:
538  double rseed_;
539 
540  virtual void buildTree(Int * ranks, Int rank_cnt){
541 
542  Int idxStart = 0;
543  Int idxEnd = rank_cnt;
544 
545  //sort the ranks with the modulo like operation
546  if(rank_cnt>1){
547  //Int new_idx = (int)((rand()+1.0) * (double)rank_cnt / ((double)RAND_MAX+1.0));
548 
549  // srand(ranks[0]+rank_cnt);
550  Int new_idx = (Int)rseed_ % (rank_cnt - 1) + 1;
551  //Int new_idx = (int)((rank_cnt - 0) * ( (double)this->rseed_ / (double)RAND_MAX ) + 0);// (this->rseed_)%(rank_cnt-1)+1;
552  //statusOFS<<"NEW IDX: "<<new_idx<<endl;
553 
554 
555 
556  Int * new_start = &ranks[new_idx];
557 
558  //for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
559 
560  // Int * new_start = std::lower_bound(&ranks[1],&ranks[0]+rank_cnt,ranks[0]);
561  //just swap the two chunks r[0] | r[1] --- r[new_start-1] | r[new_start] --- r[end]
562  // becomes r[0] | r[new_start] --- r[end] | r[1] --- r[new_start-1]
563  std::rotate(&ranks[1], new_start, &ranks[0]+rank_cnt);
564 
565  //for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
566  }
567 
568  Int prevRoot = ranks[0];
569  while(idxStart<idxEnd){
570  Int curRoot = ranks[idxStart];
571  Int listSize = idxEnd - idxStart;
572 
573  if(listSize == 1){
574  if(curRoot == this->myRank_){
575  this->myRoot_ = prevRoot;
576  break;
577  }
578  }
579  else{
580  Int halfList = floor(ceil(double(listSize) / 2.0));
581  Int idxStartL = idxStart+1;
582  Int idxStartH = idxStart+halfList;
583 
584  if(curRoot == this->myRank_){
585  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
586  Int childL = ranks[idxStartL];
587  Int childR = ranks[idxStartH];
588 
589  this->myDests_.push_back(childL);
590  this->myDests_.push_back(childR);
591  }
592  else if ((idxEnd - idxStartH) > 0){
593  Int childR = ranks[idxStartH];
594  this->myDests_.push_back(childR);
595  }
596  else{
597  Int childL = ranks[idxStartL];
598  this->myDests_.push_back(childL);
599  }
600  this->myRoot_ = prevRoot;
601  break;
602  }
603 
604  //not true anymore ?
605  //first half to
606  TIMER_START(FIND_RANK);
607  Int * pos = std::find(&ranks[idxStartL], &ranks[idxStartH], this->myRank_);
608  TIMER_STOP(FIND_RANK);
609  if( pos != &ranks[idxStartH]){
610  idxStart = idxStartL;
611  idxEnd = idxStartH;
612  }
613  else{
614  idxStart = idxStartH;
615  }
616  prevRoot = curRoot;
617  }
618 
619  }
620 
621 #if (defined(REDUCE_VERBOSE))
622  statusOFS<<"My root is "<<myRoot_<<std::endl;
623  statusOFS<<"My dests are ";
624  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
625  statusOFS<<std::endl;
626 #endif
627  }
628 
629 
630 
631  public:
632  ModBTreeBcast2(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed):TreeBcast2<T>(pComm,ranks,rank_cnt,msgSize){
633  //build the binary tree;
634  rseed_ = rseed;
635  buildTree(ranks,rank_cnt);
636  }
637 
638  //virtual void Copy(const ModBTreeBcast & Tree){
639  // comm_ = Tree.comm_;
640  // myRank_ = Tree.myRank_;
641  // myRoot_ = Tree.myRoot_;
642  // msgSize_ = Tree.msgSize_;
643 
644  // numRecv_ = Tree.numRecv_;
645  // tag_= Tree.tag_;
646  // mainRoot_= Tree.mainRoot_;
647  // isReady_ = Tree.isReady_;
648  // myDests_ = Tree.myDests_;
649 
650  // rseed_ = Tree.rseed_;
651  // myRank_ = Tree.myRank_;
652  // myRoot_ = Tree.myRoot_;
653  // msgSize_ = Tree.msgSize_;
654 
655  // numRecv_ = Tree.numRecv_;
656  // tag_= Tree.tag_;
657  // mainRoot_= Tree.mainRoot_;
658  // isReady_ = Tree.isReady_;
659  // myDests_ = Tree.myDests_;
660  //}
661 
662  virtual ModBTreeBcast2 * clone() const{
663  ModBTreeBcast2 * out = new ModBTreeBcast2(*this);
664  return out;
665  }
666 
667  };
668 
669 
670 
671 #endif
672 
673 
674 
675 
676 
677 
678 
679 
680 
681 
682 
683 
684 
685 
686 
687 
688 
689 
690 
691 
692 
693 
694 
695 
696 
697 
698 
699 
700 
701 
702 
703 
704 
705 class TreeBcast{
706 protected:
707  Int myRoot_;
708  MPI_Comm comm_;
709  vector<Int> myDests_;
710  Int myRank_;
711  Int msgSize_;
712  bool isReady_;
713  Int mainRoot_;
714  Int tag_;
715  Int numRecv_;
716 
717 
718 #if defined(COMM_PROFILE_BCAST) || defined(COMM_PROFILE)
719 protected:
720  Int myGRank_;
721  Int myGRoot_;
722  //vector<int> Granks_;
723 public:
724  void SetGlobalComm(const MPI_Comm & pGComm){
725  if(commGlobRanks.count(comm_)==0){
726  MPI_Group group2 = MPI_GROUP_NULL;
727  MPI_Comm_group(pGComm, &group2);
728  MPI_Group group1 = MPI_GROUP_NULL;
729  MPI_Comm_group(comm_, &group1);
730 
731  Int size;
732  MPI_Comm_size(comm_,&size);
733  vector<int> globRanks(size);
734  vector<int> Lranks(size);
735  for(int i = 0; i<size;++i){Lranks[i]=i;}
736  MPI_Group_translate_ranks(group1, size, &Lranks[0],group2, &globRanks[0]);
737  commGlobRanks[comm_] = globRanks;
738  }
739  myGRoot_ = commGlobRanks[comm_][myRoot_];
740  myGRank_ = commGlobRanks[comm_][myRank_];
741  // Granks_.resize(myDests_.size());
742  // for(int i = 0; i<myDests_.size();++i){
743  // Granks_[i] = globRanks[myDests_[i]];
744  // }
745  //statusOFS<<myDests_<<std::endl;
746  //statusOFS<<Granks_<<std::endl;
747  }
748 #endif
749 
750 
751 
752  virtual void buildTree(Int * ranks, Int rank_cnt)=0;
753 public:
754  TreeBcast(){
755  comm_ = MPI_COMM_WORLD;
756  myRank_=-1;
757  myRoot_ = -1;
758  msgSize_ = -1;
759  numRecv_ = -1;
760  tag_=-1;
761  mainRoot_=-1;
762  isReady_ = false;
763  }
764 
765 
766  TreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt,Int msgSize){
767  comm_ = pComm;
768  MPI_Comm_rank(comm_,&myRank_);
769  myRoot_ = -1;
770  msgSize_ = msgSize;
771 
772  numRecv_ = 0;
773  tag_=-1;
774  mainRoot_=ranks[0];
775  isReady_ = false;
776  }
777 
778  TreeBcast(const TreeBcast & Tree){
779  Copy(Tree);
780  }
781 
782  virtual void Copy(const TreeBcast & Tree){
783  comm_ = Tree.comm_;
784  myRank_ = Tree.myRank_;
785  myRoot_ = Tree.myRoot_;
786  msgSize_ = Tree.msgSize_;
787 
788  tag_= Tree.tag_;
789  mainRoot_= Tree.mainRoot_;
790  myDests_ = Tree.myDests_;
791 
792  //numRecv_ = Tree.numRecv_;
793  //isReady_ = Tree.isReady_;
794  isReady_ = false;
795  numRecv_ = 0;
796  }
797 
798  virtual TreeBcast * clone() const = 0;
799 
800  void Reset(){
801  //statusOFS<<"RESET CALLED"<<std::endl;
802  this->numRecv_ = 0;
803  this->isReady_ = false;
804  }
805 
806 
807 
808  static TreeBcast * Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,double rseed);
809 
810  virtual inline Int GetNumRecvMsg(){return numRecv_;}
811  virtual inline Int GetNumMsgToRecv(){return 1;}
812  inline void SetDataReady(bool rdy){ isReady_=rdy; }
813  inline void SetTag(Int tag){ tag_ = tag;}
814  inline int GetTag(){ return tag_;}
815 
816 
817  Int * GetDests(){ return &myDests_[0];}
818  Int GetDest(Int i){ return myDests_[i];}
819  Int GetDestCount(){ return myDests_.size();}
820  Int GetRoot(){ return myRoot_;}
821  Int GetMsgSize(){ return msgSize_;}
822 
823  void ForwardMessage( char * data, size_t size, int tag, MPI_Request * requests ){
824  tag_ = tag;
825  for( Int idxRecv = 0; idxRecv < myDests_.size(); ++idxRecv ){
826  Int iProc = myDests_[idxRecv];
827  // Use Isend to send to multiple targets
828  MPI_Isend( data, size, MPI_BYTE,
829  iProc, tag,comm_, &requests[2*iProc+1] );
830 
831 #if defined(COMM_PROFILE_BCAST) || defined(COMM_PROFILE)
832  // statusOFS<<idxRecv<<std::endl;
833  // statusOFS<<Granks_<<std::endl;
834  //PROFILE_COMM(myGRank_,Granks_[idxRecv],tag,msgSize_);
835  PROFILE_COMM(myGRank_,commGlobRanks[comm_][iProc],tag_,msgSize_);
836 #endif
837  } // for (iProc)
838  }
839 
840 
841 };
842 
843 class FTreeBcast: public TreeBcast{
844 protected:
845  virtual void buildTree(Int * ranks, Int rank_cnt){
846 
847  Int idxStart = 0;
848  Int idxEnd = rank_cnt;
849 
850 
851 
852  myRoot_ = ranks[0];
853 
854  if(myRank_==myRoot_){
855  myDests_.insert(myDests_.begin(),&ranks[1],&ranks[0]+rank_cnt);
856  }
857 
858 #if (defined(BCAST_VERBOSE))
859  statusOFS<<"My root is "<<myRoot_<<std::endl;
860  statusOFS<<"My dests are ";
861  for(int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<" ";}
862  statusOFS<<std::endl;
863 #endif
864  }
865 
866 
867 
868 public:
869  FTreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast(pComm,ranks,rank_cnt,msgSize){
870  //build the binary tree;
871  buildTree(ranks,rank_cnt);
872  }
873 
874 
875  virtual FTreeBcast * clone() const{
876  FTreeBcast * out = new FTreeBcast(*this);
877  return out;
878  }
879 };
880 
881 
882 
883 class BTreeBcast: public TreeBcast{
884 protected:
908  virtual void buildTree(Int * ranks, Int rank_cnt){
909 
910  Int idxStart = 0;
911  Int idxEnd = rank_cnt;
912 
913 
914 
915  Int prevRoot = ranks[0];
916  while(idxStart<idxEnd){
917  Int curRoot = ranks[idxStart];
918  Int listSize = idxEnd - idxStart;
919 
920  if(listSize == 1){
921  if(curRoot == myRank_){
922  myRoot_ = prevRoot;
923  break;
924  }
925  }
926  else{
927  Int halfList = floor(ceil(double(listSize) / 2.0));
928  Int idxStartL = idxStart+1;
929  Int idxStartH = idxStart+halfList;
930 
931  if(curRoot == myRank_){
932  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
933  Int childL = ranks[idxStartL];
934  Int childR = ranks[idxStartH];
935 
936  myDests_.push_back(childL);
937  myDests_.push_back(childR);
938  }
939  else if ((idxEnd - idxStartH) > 0){
940  Int childR = ranks[idxStartH];
941  myDests_.push_back(childR);
942  }
943  else{
944  Int childL = ranks[idxStartL];
945  myDests_.push_back(childL);
946  }
947  myRoot_ = prevRoot;
948  break;
949  }
950 
951  if( myRank_ < ranks[idxStartH]){
952  idxStart = idxStartL;
953  idxEnd = idxStartH;
954  }
955  else{
956  idxStart = idxStartH;
957  }
958  prevRoot = curRoot;
959  }
960 
961  }
962 
963 #if (defined(BCAST_VERBOSE))
964  statusOFS<<"My root is "<<myRoot_<<std::endl;
965  statusOFS<<"My dests are ";
966  for(int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<" ";}
967  statusOFS<<std::endl;
968 #endif
969  }
970 
971 
972 
973 public:
974  BTreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast(pComm,ranks,rank_cnt,msgSize){
975  //build the binary tree;
976  buildTree(ranks,rank_cnt);
977  }
978 
979  virtual BTreeBcast * clone() const{
980  BTreeBcast * out = new BTreeBcast(*this);
981  return out;
982  }
983 
984 };
985 
986 
987 
988 class ModBTreeBcast: public TreeBcast{
989 protected:
990  double rseed_;
991 
992  virtual void buildTree(Int * ranks, Int rank_cnt){
993 
994  Int idxStart = 0;
995  Int idxEnd = rank_cnt;
996 
997  //sort the ranks with the modulo like operation
998  if(rank_cnt>1){
999  //Int new_idx = (int)((rand()+1.0) * (double)rank_cnt / ((double)RAND_MAX+1.0));
1000 
1001  // srand(ranks[0]+rank_cnt);
1002  //Int new_idx = (Int)rseed_ % (rank_cnt - 1) + 1;
1003 // Int new_idx = (int)((rank_cnt - 0) * ( (double)this->rseed_ / (double)RAND_MAX ) + 0);// (this->rseed_)%(rank_cnt-1)+1;
1004  Int new_idx = (int)(this->rseed_)%(rank_cnt-1)+1;
1005  //statusOFS<<"NEW IDX: "<<new_idx<<endl;
1006 
1007  assert(new_idx<rank_cnt && new_idx>=1);
1008 
1009  Int * new_start = &ranks[new_idx];
1010 
1011  //for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1012 
1013  // Int * new_start = std::lower_bound(&ranks[1],&ranks[0]+rank_cnt,ranks[0]);
1014  //just swap the two chunks r[0] | r[1] --- r[new_start-1] | r[new_start] --- r[end]
1015  // becomes r[0] | r[new_start] --- r[end] | r[1] --- r[new_start-1]
1016  std::rotate(&ranks[1], new_start, &ranks[0]+rank_cnt);
1017 
1018  //for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1019  }
1020 
1021  Int prevRoot = ranks[0];
1022  while(idxStart<idxEnd){
1023  assert(idxStart<rank_cnt && idxStart>=0);
1024  Int curRoot = ranks[idxStart];
1025  Int listSize = idxEnd - idxStart;
1026 
1027  if(listSize == 1){
1028  if(curRoot == myRank_){
1029  myRoot_ = prevRoot;
1030  break;
1031  }
1032  }
1033  else{
1034  Int halfList = floor(ceil(double(listSize) / 2.0));
1035  Int idxStartL = idxStart+1;
1036  Int idxStartH = idxStart+halfList;
1037  assert(idxStartL<rank_cnt && idxStartL>=0);
1038  assert(idxStartH<rank_cnt && idxStartH>=0);
1039 
1040  if(curRoot == myRank_){
1041  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
1042  Int childL = ranks[idxStartL];
1043  Int childR = ranks[idxStartH];
1044 
1045  myDests_.push_back(childL);
1046  myDests_.push_back(childR);
1047  }
1048  else if ((idxEnd - idxStartH) > 0){
1049  Int childR = ranks[idxStartH];
1050  myDests_.push_back(childR);
1051  }
1052  else{
1053  Int childL = ranks[idxStartL];
1054  myDests_.push_back(childL);
1055  }
1056  myRoot_ = prevRoot;
1057  break;
1058  }
1059 
1060  //not true anymore ?
1061  //first half to
1062  TIMER_START(FIND_RANK);
1063  Int * pos = std::find(&ranks[idxStartL], &ranks[idxStartH], myRank_);
1064  TIMER_STOP(FIND_RANK);
1065  if( pos != &ranks[idxStartH]){
1066  idxStart = idxStartL;
1067  idxEnd = idxStartH;
1068  }
1069  else{
1070  idxStart = idxStartH;
1071  }
1072  prevRoot = curRoot;
1073  }
1074 
1075  }
1076 
1077 #if (defined(REDUCE_VERBOSE))
1078  statusOFS<<"My root is "<<myRoot_<<std::endl;
1079  statusOFS<<"My dests are ";
1080  for(int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<" ";}
1081  statusOFS<<std::endl;
1082 #endif
1083  }
1084 
1085 
1086 
1087 public:
1088  ModBTreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed):TreeBcast(pComm,ranks,rank_cnt,msgSize){
1089  //build the binary tree;
1090  rseed_ = rseed;
1091  buildTree(ranks,rank_cnt);
1092  }
1093 
1094  virtual void Copy(const ModBTreeBcast & Tree){
1095  ((TreeBcast*)this)->Copy(*((const TreeBcast*)&Tree));
1116 
1117 
1118  rseed_ = Tree.rseed_;
1119 
1120  }
1121 
1122  virtual ModBTreeBcast * clone() const{
1123  ModBTreeBcast * out = new ModBTreeBcast(*this);
1124  return out;
1125  }
1126 
1127 };
1128 
1129 
1131 protected:
1132  virtual void buildTree(Int * ranks, Int rank_cnt){
1133 
1134  Int idxStart = 0;
1135  Int idxEnd = rank_cnt;
1136 
1137  //random permute ranks
1138  if(rank_cnt>1){
1139  for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1140  srand(ranks[0]);
1141  std::random_shuffle(&ranks[1],&ranks[0]+rank_cnt);
1142  for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1143 
1144  }
1145 
1146  Int prevRoot = ranks[0];
1147  while(idxStart<idxEnd){
1148  Int curRoot = ranks[idxStart];
1149  Int listSize = idxEnd - idxStart;
1150 
1151  if(listSize == 1){
1152  if(curRoot == myRank_){
1153  myRoot_ = prevRoot;
1154  break;
1155  }
1156  }
1157  else{
1158  Int halfList = floor(ceil(double(listSize) / 2.0));
1159  Int idxStartL = idxStart+1;
1160  Int idxStartH = idxStart+halfList;
1161 
1162  if(curRoot == myRank_){
1163  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
1164  Int childL = ranks[idxStartL];
1165  Int childR = ranks[idxStartH];
1166 
1167  myDests_.push_back(childL);
1168  myDests_.push_back(childR);
1169  }
1170  else if ((idxEnd - idxStartH) > 0){
1171  Int childR = ranks[idxStartH];
1172  myDests_.push_back(childR);
1173  }
1174  else{
1175  Int childL = ranks[idxStartL];
1176  myDests_.push_back(childL);
1177  }
1178  myRoot_ = prevRoot;
1179  break;
1180  }
1181 
1182  //not true anymore ?
1183  //first half to
1184  Int * pos = std::find(&ranks[idxStartL], &ranks[idxStartH], myRank_);
1185  if( pos != &ranks[idxStartH]){
1186  idxStart = idxStartL;
1187  idxEnd = idxStartH;
1188  }
1189  else{
1190  idxStart = idxStartH;
1191  }
1192  prevRoot = curRoot;
1193  }
1194 
1195  }
1196 
1197 #if (defined(REDUCE_VERBOSE))
1198  statusOFS<<"My root is "<<myRoot_<<std::endl;
1199  statusOFS<<"My dests are ";
1200  for(int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<" ";}
1201  statusOFS<<std::endl;
1202 #endif
1203  }
1204 
1205 
1206 
1207 public:
1208  RandBTreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast(pComm,ranks,rank_cnt,msgSize){
1209  //build the binary tree;
1210  buildTree(ranks,rank_cnt);
1211  }
1212 
1213  virtual RandBTreeBcast * clone() const{
1214  RandBTreeBcast * out = new RandBTreeBcast(*this);
1215  return out;
1216  }
1217 
1218 };
1219 
1220 
1221 
1222 
1223 class PalmTreeBcast: public TreeBcast{
1224 protected:
1225  virtual void buildTree(Int * ranks, Int rank_cnt){
1226  Int numLevel = floor(log2(rank_cnt));
1227  Int numRoots = 0;
1228  for(Int level=0;level<numLevel;++level){
1229  numRoots = std::min( rank_cnt, numRoots + (Int)pow(2.0,level));
1230  Int numNextRoots = std::min(rank_cnt,numRoots + (Int)pow(2.0,(level+1)));
1231  Int numReceivers = numNextRoots - numRoots;
1232  for(Int ip = 0; ip<numRoots;++ip){
1233  Int p = ranks[ip];
1234  for(Int ir = ip; ir<numReceivers;ir+=numRoots){
1235  Int r = ranks[numRoots+ir];
1236  if(r==myRank_){
1237  myRoot_ = p;
1238  }
1239 
1240  if(p==myRank_){
1241  myDests_.push_back(r);
1242  }
1243  }
1244  }
1245  }
1246 
1247 #if (defined(BCAST_VERBOSE))
1248  statusOFS<<"My root is "<<myRoot_<<std::endl;
1249  statusOFS<<"My dests are ";
1250  for(int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<" ";}
1251  statusOFS<<std::endl;
1252 #endif
1253  }
1254 
1255 
1256 
1257 public:
1258  PalmTreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast(pComm,ranks,rank_cnt,msgSize){
1259  //build the binary tree;
1260  buildTree(ranks,rank_cnt);
1261  }
1262 
1263  virtual PalmTreeBcast * clone() const{
1264  PalmTreeBcast * out = new PalmTreeBcast(*this);
1265  return out;
1266  }
1267 
1268 };
1269 
1270 
1271 template< typename T>
1272 class TreeReduce: public TreeBcast{
1273 protected:
1274  T * myData_;
1275  MPI_Request sendRequest_;
1276  NumVec<char> myLocalBuffer_;
1277  NumVec<char> myRecvBuffers_;
1278  NumVec<T *> remoteData_;
1279  NumVec<MPI_Request> myRequests_;
1280  NumVec<MPI_Status> myStatuses_;
1281  NumVec<int> recvIdx_;
1282 
1283  bool fwded_;
1284  bool done_;
1285  bool isAllocated_;
1286  Int numRecvPosted_;
1287 
1288 public:
1289  TreeReduce(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast(pComm,ranks,rank_cnt,msgSize){
1290  myData_ = NULL;
1291  sendRequest_ = MPI_REQUEST_NULL;
1292  fwded_=false;
1293  done_=false;
1294  isAllocated_=false;
1295  numRecvPosted_= 0;
1296  }
1297 
1298 
1299  virtual TreeReduce * clone() const = 0;
1300 
1301  TreeReduce(const TreeReduce & Tree){
1302  this->Copy(Tree);
1303  }
1304 
1305  virtual void Copy(const TreeReduce & Tree){
1306  ((TreeBcast*)this)->Copy(*(const TreeBcast*)&Tree);
1307 
1308  // this->comm_ = Tree.comm_;
1309  // this->myRank_ = Tree.myRank_;
1310  // this->myRoot_ = Tree.myRoot_;
1311  // this->msgSize_ = Tree.msgSize_;
1312  // this->numRecv_ = Tree.numRecv_;
1313  // this->tag_= Tree.tag_;
1314  // this->mainRoot_= Tree.mainRoot_;
1315  // this->isReady_ = Tree.isReady_;
1316  // this->myDests_ = Tree.myDests_;
1317 
1318 
1319  this->myData_ = NULL;
1320  this->sendRequest_ = MPI_REQUEST_NULL;
1321  this->fwded_= false;
1322  this->done_= false;
1323  this->isAllocated_= Tree.isAllocated_;
1324  this->numRecvPosted_= 0;
1325 
1326  //this->myLocalBuffer_.resize(Tree.myLocalBuffer_.size());
1327  //this->remoteData_ = Tree.remoteData_;
1328  //this->recvIdx_ = Tree.recvIdx_;
1329 
1330  CleanupBuffers();
1331  }
1332 
1333 
1334 
1335  bool IsAllocated(){return isAllocated_;}
1336 
1337  virtual ~TreeReduce(){
1338  CleanupBuffers();
1339  }
1340 
1341 
1342  static TreeReduce<T> * Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,double rseed);
1343 
1344  virtual inline Int GetNumMsgToRecv(){return GetDestCount();}
1345 
1346  virtual void AllocRecvBuffers(){
1347  remoteData_.Resize(GetDestCount());
1348  //SetValue(remoteData_,(T*)NULL);
1349 
1350  //assert(myRecvBuffers_==NULL);
1351  //myRecvBuffers_ = new char[GetDestCount()*msgSize_];
1352 
1353 
1354  myRecvBuffers_.Resize(GetDestCount()*msgSize_);
1355  //SetValue(myRecvBuffers_,(char)0);
1356 
1357  for( Int idxRecv = 0; idxRecv < GetDestCount(); ++idxRecv ){
1358  remoteData_[idxRecv] = (T*)&(myRecvBuffers_[idxRecv*msgSize_]);
1359  //Int nelem = msgSize_ / sizeof(T);
1360  //std::fill(remoteData_[idxRecv],remoteData_[idxRecv]+nelem,ZERO<T>());
1361  }
1362 
1363  myRequests_.Resize(GetDestCount());
1364  SetValue(myRequests_,MPI_REQUEST_NULL);
1365  myStatuses_.Resize(GetDestCount());
1366  recvIdx_.Resize(GetDestCount());
1367 
1368  sendRequest_ = MPI_REQUEST_NULL;
1369 
1370  isAllocated_ = true;
1371  }
1372 
1373  void CleanupBuffers(){
1374  myLocalBuffer_.Clear();
1375  // if(myLocalBuffer_!=NULL){
1376  // delete []myLocalBuffer_;
1377  // myLocalBuffer_=NULL;
1378  // }
1379 
1380 
1381  remoteData_.Clear();
1382  // myRecvBuffers_.Clear();
1383  // if(myRecvBuffers_!=NULL){
1384  // delete []myRecvBuffers_;
1385  // myRecvBuffers_=NULL;
1386  // }
1387 
1388 
1389  myRequests_.Clear();
1390  myStatuses_.Clear();
1391  recvIdx_.Clear();
1392 
1393 
1394  // if(myLocalBuffer_!=NULL){
1395  // delete [] myLocalBuffer_;
1396  // }
1397  // myLocalBuffer_=NULL;
1398 
1399 
1400  }
1401 
1402  void Reset(){
1403  // assert(done_ || myDests_.size()==0);
1404  CleanupBuffers();
1405  done_=false;
1406 
1407  myData_ = NULL;
1408  sendRequest_ = MPI_REQUEST_NULL;
1409  fwded_=false;
1410  isAllocated_=false;
1411  isReady_=false;
1412  numRecv_ = 0;
1413  numRecvPosted_= 0;
1414  }
1415 
1416 
1417 
1418  void SetLocalBuffer(T * locBuffer){
1419  if(myData_!=NULL && myData_!=locBuffer){
1420 
1421  //statusOFS<<"DOING SUM"<<std::endl;
1422  //gdb_lock();
1423  blas::Axpy(msgSize_/sizeof(T), ONE<T>(), myData_, 1, locBuffer, 1 );
1424  myLocalBuffer_.Clear();
1425  }
1426 
1427  myData_ = locBuffer;
1428  }
1429 
1430  inline bool AccumulationDone(){
1431  if(myRank_==myRoot_ && isAllocated_){
1432  isReady_=true;
1433  }
1434  return isReady_ && (numRecv_ == GetDestCount());
1435  }
1436 
1437 
1438  inline bool IsDone(){
1439  if(myRank_==myRoot_ && isAllocated_){
1440  isReady_=true;
1441  }
1442 
1443  bool retVal = AccumulationDone();
1444  if(myRoot_ != myRank_ && !fwded_){
1445  retVal = false;
1446  }
1447 
1448  if (retVal && myRoot_ != myRank_ && fwded_){
1449  //test the send request
1450  int flag = 0;
1451  MPI_Test(&sendRequest_,&flag,MPI_STATUS_IGNORE);
1452  retVal = flag==1;
1453  }
1454 
1455  return retVal;
1456  }
1457 
1458  //async wait and forward
1459  virtual bool Progress(){
1460 
1461  if(done_){
1462  return true;
1463  }
1464  if(!isAllocated_){
1465  return false;
1466  }
1467 
1468  if(myRank_==myRoot_ && isAllocated_){
1469  isReady_=true;
1470  }
1471 
1472  // if(this->numRecvPosted_==0){
1473  // this->PostFirstRecv();
1474  // }
1475 
1476  bool retVal = AccumulationDone();
1477  if(isReady_ && !retVal){
1478 
1479  //assert(isAllocated_);
1480 
1481  //mpi_test_some on my requests
1482  int recvCount = -1;
1483  int reqCnt = GetDestCount();
1484  assert(reqCnt == myRequests_.m());
1485  MPI_Testsome(reqCnt,&myRequests_[0],&recvCount,&recvIdx_[0],&myStatuses_[0]);
1486  //if something has been received, accumulate and potentially forward it
1487  for(Int i = 0;i<recvCount;++i ){
1488  Int idx = recvIdx_[i];
1489 
1490  if(idx!=MPI_UNDEFINED){
1491 
1492  Int size = 0;
1493  MPI_Get_count(&myStatuses_[i], MPI_BYTE, &size);
1494 
1495 
1496 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
1497  statusOFS<<myRank_<<" RECVD from "<<myStatuses_[i].MPI_SOURCE<<" on tag "<<tag_<<std::endl;
1498 #endif
1499  if(size>0){
1500  //If myData is 0, allocate to the size of what has been received
1501  if(myData_==NULL){
1502  //assert(size==msgSize_);
1503  myLocalBuffer_.Resize(msgSize_);
1504 
1505  myData_ = (T*)&myLocalBuffer_[0];
1506  Int nelem = +msgSize_/sizeof(T);
1507  std::fill(myData_,myData_+nelem,ZERO<T>());
1508  }
1509 
1510  Reduce(idx,i);
1511 
1512  }
1513 
1514  numRecv_++;
1515  //MPI_Request_free(&myRequests_[idx]);
1516  }
1517  }
1518 
1519  }
1520  else if (isReady_ && sendRequest_ == MPI_REQUEST_NULL && myRoot_ != myRank_ && !fwded_){
1521  //free the unnecessary arrays
1522  myRecvBuffers_.Clear();
1523  myRequests_.Clear();
1524  myStatuses_.Clear();
1525  recvIdx_.Clear();
1526 
1527  //assert(isAllocated_);
1528 
1529  //Forward
1530  Forward();
1531  retVal = false;
1532  }
1533  else{
1534  retVal = IsDone();
1535  if(retVal){
1536  //free the unnecessary arrays
1537  myRecvBuffers_.Clear();
1538  myRequests_.Clear();
1539  myStatuses_.Clear();
1540  recvIdx_.Clear();
1541  }
1542  }
1543 
1544 
1545  done_ = retVal;
1546  return retVal;
1547  }
1548 
1549  //blocking wait
1550  void Wait(){
1551  if(!done_){
1552  while(!Progress());
1553  }
1554  }
1555 
1556  T * GetLocalBuffer(){
1557  return myData_;
1558  }
1559 
1560 
1561 
1562  void CopyLocalBuffer(T* destBuffer){
1563  std::copy((char*)myData_,(char*)myData_+GetMsgSize(),(char*)destBuffer);
1564  }
1565 
1566 
1567  virtual void PostFirstRecv()
1568  {
1569  if(this->GetDestCount()>this->numRecvPosted_){
1570  for( Int idxRecv = 0; idxRecv < myDests_.size(); ++idxRecv ){
1571  Int iProc = myDests_[idxRecv];
1572  //assert(msgSize_>=0);
1573  MPI_Irecv( (char*)remoteData_[idxRecv], msgSize_, MPI_BYTE,
1574  iProc, tag_,comm_, &myRequests_[idxRecv] );
1575  this->numRecvPosted_++;
1576  } // for (iProc)
1577  }
1578  }
1579 
1580 
1581 
1582 
1583 protected:
1584  virtual void Reduce( Int idxRecv, Int idReq){
1585  //add thing to my data
1586  blas::Axpy(msgSize_/sizeof(T), ONE<T>(), remoteData_[idxRecv], 1, myData_, 1 );
1587  }
1588 
1589  void Forward(){
1590  //forward to my root if I have reseived everything
1591  Int iProc = myRoot_;
1592  // Use Isend to send to multiple targets
1593  if(myData_==NULL){
1594  MPI_Isend( NULL, 0, MPI_BYTE,
1595  iProc, tag_,comm_, &sendRequest_ );
1596 #ifdef COMM_PROFILE
1597  PROFILE_COMM(myGRank_,myGRoot_,tag_,0);
1598 #endif
1599  }
1600  else{
1601  MPI_Isend( (char*)myData_, msgSize_, MPI_BYTE,
1602  iProc, tag_,comm_, &sendRequest_ );
1603 #ifdef COMM_PROFILE
1604  PROFILE_COMM(myGRank_,myGRoot_,tag_,msgSize_);
1605 #endif
1606  }
1607 
1608 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
1609  statusOFS<<myRank_<<" FWD to "<<iProc<<" on tag "<<tag_<<std::endl;
1610 #endif
1611 
1612  fwded_ = true;
1613 
1614  }
1615 
1616 };
1617 
1618 
1619 template< typename T>
1620 class FTreeReduce: public TreeReduce<T>{
1621 protected:
1622  virtual void buildTree(Int * ranks, Int rank_cnt){
1623 
1624  Int idxStart = 0;
1625  Int idxEnd = rank_cnt;
1626 
1627 
1628 
1629  this->myRoot_ = ranks[0];
1630 
1631  if(this->myRank_==this->myRoot_){
1632  this->myDests_.insert(this->myDests_.begin(),&ranks[1],&ranks[0]+rank_cnt);
1633  }
1634 
1635 #if (defined(REDUCE_VERBOSE))
1636  statusOFS<<"My root is "<<this->myRoot_<<std::endl;
1637  statusOFS<<"My dests are ";
1638  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
1639  statusOFS<<std::endl;
1640 #endif
1641  }
1642 
1643  virtual void Reduce( ){
1644  //add thing to my data
1645  blas::Axpy(this->msgSize_/sizeof(T), ONE<T>(), this->remoteData_[0], 1, this->myData_, 1 );
1646 
1647 
1648 #if (defined(REDUCE_DEBUG))
1649  statusOFS << std::endl << /*"["<<snode.Index<<"]*/" Recv contrib"<< std::endl;
1650  for(int i = 0; i < this->msgSize_/sizeof(T); ++i){
1651  statusOFS<< this->remoteData_[0][i]<< " ";
1652  if(i%3==0){statusOFS<<std::endl;}
1653  }
1654  statusOFS<<std::endl;
1655 
1656  statusOFS << std::endl << /*"["<<snode.Index<<"]*/" Reduce buffer now is"<< std::endl;
1657  for(int i = 0; i < this->msgSize_/sizeof(T); ++i){
1658  statusOFS<< this->myData_[i]<< " ";
1659  if(i%3==0){statusOFS<<std::endl;}
1660  }
1661  statusOFS<<std::endl;
1662 #endif
1663 
1664  }
1665 
1666 
1667 
1668 public:
1669  FTreeReduce(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeReduce<T>(pComm, ranks, rank_cnt, msgSize){
1670  buildTree(ranks,rank_cnt);
1671  }
1672 
1673  virtual void PostFirstRecv()
1674  {
1675  // if(!this->isAllocated_){
1676  // this->AllocRecvBuffers();
1677  // }
1678  if(this->isAllocated_ && this->GetDestCount()>this->numRecvPosted_){
1679  MPI_Irecv( (char*)this->remoteData_[0], this->msgSize_, MPI_BYTE,
1680  MPI_ANY_SOURCE, this->tag_,this->comm_, &this->myRequests_[0] );
1681  this->numRecvPosted_++;
1682  }
1683  }
1684 
1685  virtual void AllocRecvBuffers(){
1686  if(this->GetDestCount()>0){
1687  this->remoteData_.Resize(1);
1688 
1689  this->myRecvBuffers_.Resize(this->msgSize_);
1690 
1691  this->remoteData_[0] = (T*)&(this->myRecvBuffers_[0]);
1692 
1693  this->myRequests_.Resize(1);
1694  SetValue(this->myRequests_,MPI_REQUEST_NULL);
1695  this->myStatuses_.Resize(1);
1696  this->recvIdx_.Resize(1);
1697  }
1698 
1699  this->sendRequest_ = MPI_REQUEST_NULL;
1700 
1701  this->isAllocated_ = true;
1702  }
1703 
1704  virtual bool Progress(){
1705 
1706  if(!this->isAllocated_){
1707  return true;
1708  }
1709 
1710 
1711  if(this->myRank_==this->myRoot_ && this->isAllocated_){
1712  this->isReady_=true;
1713  }
1714 
1715  // if(this->numRecvPosted_==0){
1716  // this->PostFirstRecv();
1717  // }
1718 
1719  bool retVal = this->AccumulationDone();
1720  if(this->isReady_ && !retVal){
1721 
1722  //assert(this->isAllocated_);
1723 
1724  //mpi_test_some on my requests
1725  int recvCount = -1;
1726  int reqCnt = 1;
1727 
1728  MPI_Testsome(reqCnt,&this->myRequests_[0],&recvCount,&this->recvIdx_[0],&this->myStatuses_[0]);
1729  //MPI_Waitsome(reqCnt,&myRequests_[0],&recvCount,&recvIdx_[0],&myStatuses_[0]);
1730  //if something has been received, accumulate and potentially forward it
1731  for(Int i = 0;i<recvCount;++i ){
1732  Int idx = this->recvIdx_[i];
1733 
1734  if(idx!=MPI_UNDEFINED){
1735 
1736  Int size = 0;
1737  MPI_Get_count(&this->myStatuses_[i], MPI_BYTE, &size);
1738 
1739 
1740 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
1741 
1742  statusOFS<<this->myRank_<<" RECVD from "<<this->myStatuses_[i].MPI_SOURCE<<" on tag "<<this->tag_<<std::endl;
1743 #endif
1744  if(size>0){
1745  //If myData is 0, allocate to the size of what has been received
1746  if(this->myData_==NULL){
1747  //assert(size==this->msgSize_);
1748  this->myLocalBuffer_.Resize(this->msgSize_);
1749 
1750  this->myData_ = (T*)&this->myLocalBuffer_[0];
1751  Int nelem = this->msgSize_/sizeof(T);
1752  std::fill(this->myData_,this->myData_+nelem,ZERO<T>());
1753  }
1754 
1755  this->Reduce();
1756  }
1757 
1758  this->numRecv_++;
1759  }
1760  }
1761 
1762  if(recvCount>0){
1763  this->PostFirstRecv();
1764  }
1765  }
1766  else if (this->isReady_ && this->sendRequest_ == MPI_REQUEST_NULL && this->myRoot_ != this->myRank_ && !this->fwded_){
1767  //free the unnecessary arrays
1768  this->myRecvBuffers_.Clear();
1769  this->myRequests_.Clear();
1770  this->myStatuses_.Clear();
1771  this->recvIdx_.Clear();
1772 
1773  //Forward
1774  this->Forward();
1775  retVal = false;
1776  }
1777  else{
1778  retVal = this->IsDone();
1779  if(retVal){
1780  //free the unnecessary arrays
1781  this->myRecvBuffers_.Clear();
1782  this->myRequests_.Clear();
1783  this->myStatuses_.Clear();
1784  this->recvIdx_.Clear();
1785  }
1786  }
1787 
1788  return retVal;
1789  }
1790 
1791 
1792  virtual FTreeReduce * clone() const{
1793  FTreeReduce * out = new FTreeReduce(*this);
1794  return out;
1795  }
1796 
1797 
1798 
1799 };
1800 
1801 
1802 
1803 template< typename T>
1804 class BTreeReduce: public TreeReduce<T>{
1805 protected:
1806  virtual void buildTree(Int * ranks, Int rank_cnt){
1807  Int idxStart = 0;
1808  Int idxEnd = rank_cnt;
1809 
1810 
1811 
1812  Int prevRoot = ranks[0];
1813  while(idxStart<idxEnd){
1814  Int curRoot = ranks[idxStart];
1815  Int listSize = idxEnd - idxStart;
1816 
1817  if(listSize == 1){
1818  if(curRoot == this->myRank_){
1819  this->myRoot_ = prevRoot;
1820  break;
1821  }
1822  }
1823  else{
1824  Int halfList = floor(ceil(double(listSize) / 2.0));
1825  Int idxStartL = idxStart+1;
1826  Int idxStartH = idxStart+halfList;
1827 
1828  if(curRoot == this->myRank_){
1829  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
1830  Int childL = ranks[idxStartL];
1831  Int childR = ranks[idxStartH];
1832 
1833  this->myDests_.push_back(childL);
1834  this->myDests_.push_back(childR);
1835  }
1836  else if ((idxEnd - idxStartH) > 0){
1837  Int childR = ranks[idxStartH];
1838  this->myDests_.push_back(childR);
1839  }
1840  else{
1841  Int childL = ranks[idxStartL];
1842  this->myDests_.push_back(childL);
1843  }
1844  this->myRoot_ = prevRoot;
1845  break;
1846  }
1847 
1848  if( this->myRank_ < ranks[idxStartH]){
1849  idxStart = idxStartL;
1850  idxEnd = idxStartH;
1851  }
1852  else{
1853  idxStart = idxStartH;
1854  }
1855  prevRoot = curRoot;
1856  }
1857 
1858  }
1859 
1860 #if (defined(REDUCE_VERBOSE))
1861  statusOFS<<"My root is "<<this->myRoot_<<std::endl;
1862  statusOFS<<"My dests are ";
1863  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
1864  statusOFS<<std::endl;
1865 #endif
1866  }
1867 public:
1868  BTreeReduce(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeReduce<T>(pComm, ranks, rank_cnt, msgSize){
1869  buildTree(ranks,rank_cnt);
1870  }
1871 
1872  virtual BTreeReduce * clone() const{
1873  BTreeReduce * out = new BTreeReduce(*this);
1874  return out;
1875  }
1876 };
1877 
1878 
1879 template< typename T>
1880 class ModBTreeReduce: public TreeReduce<T>{
1881 protected:
1882  double rseed_;
1883  virtual void buildTree(Int * ranks, Int rank_cnt){
1884 
1885  Int idxStart = 0;
1886  Int idxEnd = rank_cnt;
1887 
1888  //sort the ranks with the modulo like operation
1889  if(rank_cnt>1){
1890  //generate a random position in [1 .. rand_cnt]
1891  //Int new_idx = (int)((rand()+1.0) * (double)rank_cnt / ((double)RAND_MAX+1.0));
1892  //srand(ranks[0]+rank_cnt);
1893  //Int new_idx = rseed_%(rank_cnt-1)+1;
1894 
1895  //Int new_idx = (int)((rank_cnt - 0) * ( (double)this->rseed_ / (double)RAND_MAX ) + 0);// (this->rseed_)%(rank_cnt-1)+1;
1896  //Int new_idx = (Int)rseed_ % (rank_cnt - 1) + 1;
1897 // Int new_idx = (int)((rank_cnt - 0) * ( (double)this->rseed_ / (double)RAND_MAX ) + 0);// (this->rseed_)%(rank_cnt-1)+1;
1898  Int new_idx = (int)(this->rseed_)%(rank_cnt-1)+1;
1899 
1900  Int * new_start = &ranks[new_idx];
1901  // for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1902 
1903  // Int * new_start = std::lower_bound(&ranks[1],&ranks[0]+rank_cnt,ranks[0]);
1904  //just swap the two chunks r[0] | r[1] --- r[new_start-1] | r[new_start] --- r[end]
1905  // becomes r[0] | r[new_start] --- r[end] | r[1] --- r[new_start-1]
1906  std::rotate(&ranks[1], new_start, &ranks[0]+rank_cnt);
1907  // for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1908  }
1909 
1910  Int prevRoot = ranks[0];
1911  while(idxStart<idxEnd){
1912  Int curRoot = ranks[idxStart];
1913  Int listSize = idxEnd - idxStart;
1914 
1915  if(listSize == 1){
1916  if(curRoot == this->myRank_){
1917  this->myRoot_ = prevRoot;
1918  break;
1919  }
1920  }
1921  else{
1922  Int halfList = floor(ceil(double(listSize) / 2.0));
1923  Int idxStartL = idxStart+1;
1924  Int idxStartH = idxStart+halfList;
1925 
1926  if(curRoot == this->myRank_){
1927  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
1928  Int childL = ranks[idxStartL];
1929  Int childR = ranks[idxStartH];
1930 
1931  this->myDests_.push_back(childL);
1932  this->myDests_.push_back(childR);
1933  }
1934  else if ((idxEnd - idxStartH) > 0){
1935  Int childR = ranks[idxStartH];
1936  this->myDests_.push_back(childR);
1937  }
1938  else{
1939  Int childL = ranks[idxStartL];
1940  this->myDests_.push_back(childL);
1941  }
1942  this->myRoot_ = prevRoot;
1943  break;
1944  }
1945 
1946  //not true anymore ?
1947  //first half to
1948  TIMER_START(FIND_RANK);
1949  Int * pos = std::find(&ranks[idxStartL], &ranks[idxStartH], this->myRank_);
1950  TIMER_STOP(FIND_RANK);
1951  if( pos != &ranks[idxStartH]){
1952  idxStart = idxStartL;
1953  idxEnd = idxStartH;
1954  }
1955  else{
1956  idxStart = idxStartH;
1957  }
1958  prevRoot = curRoot;
1959  }
1960 
1961  }
1962 
1963 #if (defined(REDUCE_VERBOSE))
1964  statusOFS<<"My root is "<<this->myRoot_<<std::endl;
1965  statusOFS<<"My dests are ";
1966  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
1967  statusOFS<<std::endl;
1968 #endif
1969  }
1970 public:
1971  ModBTreeReduce(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed):TreeReduce<T>(pComm, ranks, rank_cnt, msgSize){
1972  this->rseed_ = rseed;
1973  buildTree(ranks,rank_cnt);
1974  }
1975 
1976  virtual void Copy(const ModBTreeReduce & Tree){
1977  ((TreeReduce<T>*)this)->Copy(*((const TreeReduce<T>*)&Tree));
1978  //this->comm_ = Tree.comm_;
1979  //this->myRank_ = Tree.myRank_;
1980  //this->myRoot_ = Tree.myRoot_;
1981  //this->msgSize_ = Tree.msgSize_;
1982 
1983  //this->numRecv_ = Tree.numRecv_;
1984  //this->tag_= Tree.tag_;
1985  //this->mainRoot_= Tree.mainRoot_;
1986  //this->isReady_ = Tree.isReady_;
1987  //this->myDests_ = Tree.myDests_;
1988 
1989 
1990  //this->myData_ = Tree.myData_;
1991  //this->sendRequest_ = Tree.sendRequest_;
1992  //this->fwded_= Tree.fwded_;
1993  //this->isAllocated_= Tree.isAllocated_;
1994  //this->numRecvPosted_= Tree.numRecvPosted_;
1995 
1996  //this->myLocalBuffer_ = Tree.myLocalBuffer_;
1997  //this->myRecvBuffers_ = Tree.myRecvBuffers_;
1998  //this->remoteData_ = Tree.remoteData_;
1999  //this->myRequests_ = Tree.myRequests_;
2000  //this->myStatuses_ = Tree.myStatuses_;
2001  //this->recvIdx_ = Tree.recvIdx_;
2002  this->rseed_ = Tree.rseed_;
2003  }
2004 
2005 
2006 
2007 
2008  virtual ModBTreeReduce * clone() const{
2009  ModBTreeReduce * out = new ModBTreeReduce(*this);
2010  return out;
2011  }
2012 
2013 };
2014 
2015 
2016 template< typename T>
2017 class PalmTreeReduce: public TreeReduce<T>{
2018 protected:
2019 
2020  virtual void buildTree(Int * ranks, Int rank_cnt){
2021  Int numLevel = floor(log2(rank_cnt));
2022  Int numRoots = 0;
2023  for(Int level=0;level<numLevel;++level){
2024  numRoots = std::min( rank_cnt, numRoots + (Int)pow(2,level));
2025  Int numNextRoots = std::min(rank_cnt,numRoots + (Int)pow(2,(level+1)));
2026  Int numReceivers = numNextRoots - numRoots;
2027  for(Int ip = 0; ip<numRoots;++ip){
2028  Int p = ranks[ip];
2029  for(Int ir = ip; ir<numReceivers;ir+=numRoots){
2030  Int r = ranks[numRoots+ir];
2031  if(r==this->myRank_){
2032  this->myRoot_ = p;
2033  }
2034 
2035  if(p==this->myRank_){
2036  this->myDests_.push_back(r);
2037  }
2038  }
2039  }
2040  }
2041 
2042 #if (defined(BCAST_VERBOSE))
2043  statusOFS<<"My root is "<<this->myRoot_<<std::endl;
2044  statusOFS<<"My dests are ";
2045  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
2046  statusOFS<<std::endl;
2047 #endif
2048  }
2049 
2050 
2051 
2052 public:
2053  PalmTreeReduce(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeReduce<T>(pComm,ranks,rank_cnt,msgSize){
2054  //build the binary tree;
2055  buildTree(ranks,rank_cnt);
2056  }
2057 
2058 
2059 
2060  virtual void Copy(const PalmTreeReduce & Tree){
2061  ((TreeReduce<T>*)this)->Copy(*((const TreeReduce<T>*)&Tree));
2062  //this->comm_ = Tree.comm_;
2063  //this->myRank_ = Tree.myRank_;
2064  //this->myRoot_ = Tree.myRoot_;
2065  //this->msgSize_ = Tree.msgSize_;
2066 
2067  //this->numRecv_ = Tree.numRecv_;
2068  //this->tag_= Tree.tag_;
2069  //this->mainRoot_= Tree.mainRoot_;
2070  //this->isReady_ = Tree.isReady_;
2071  //this->myDests_ = Tree.myDests_;
2072 
2073 
2074  //this->myData_ = Tree.myData_;
2075  //this->sendRequest_ = Tree.sendRequest_;
2076  //this->fwded_= Tree.fwded_;
2077  //this->isAllocated_= Tree.isAllocated_;
2078  //this->numRecvPosted_= Tree.numRecvPosted_;
2079 
2080  //this->myLocalBuffer_ = Tree.myLocalBuffer_;
2081  //this->myRecvBuffers_ = Tree.myRecvBuffers_;
2082  //this->remoteData_ = Tree.remoteData_;
2083  //this->myRequests_ = Tree.myRequests_;
2084  //this->myStatuses_ = Tree.myStatuses_;
2085  //this->recvIdx_ = Tree.recvIdx_;
2086  //this->rseed_ = Tree.rseed_;
2087  }
2088 
2089 
2090 
2091 
2092  virtual PalmTreeReduce * clone() const{
2093  PalmTreeReduce * out = new PalmTreeReduce(*this);
2094  return out;
2095  }
2096 
2097 };
2098 
2099 
2100 
2101 inline TreeBcast * TreeBcast::Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed){
2102  //get communicator size
2103  Int nprocs = 0;
2104  MPI_Comm_size(pComm, &nprocs);
2105 
2106 
2107 #if defined(FTREE)
2108  return new FTreeBcast(pComm,ranks,rank_cnt,msgSize);
2109 #elif defined(MODBTREE)
2110  return new ModBTreeBcast(pComm,ranks,rank_cnt,msgSize, rseed);
2111 #elif defined(BTREE)
2112  return new BTreeBcast(pComm,ranks,rank_cnt,msgSize);
2113 #elif defined(PALMTREE)
2114  return new PalmTreeBcast(pComm,ranks,rank_cnt,msgSize);
2115 #endif
2116 
2117 
2118  // return new PalmTreeBcast(pComm,ranks,rank_cnt,msgSize);
2119  // return new ModBTreeBcast(pComm,ranks,rank_cnt,msgSize, rseed);
2120  // return new RandBTreeBcast(pComm,ranks,rank_cnt,msgSize);
2121 
2122  if(nprocs<=FTREE_LIMIT){
2123  return new FTreeBcast(pComm,ranks,rank_cnt,msgSize);
2124  }
2125  else{
2126  return new ModBTreeBcast(pComm,ranks,rank_cnt,msgSize, rseed);
2127  }
2128 
2129 
2130 
2131 
2132 }
2133 
2134 
2135 
2136 
2137 template< typename T>
2138 inline TreeReduce<T> * TreeReduce<T>::Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed){
2139  //get communicator size
2140  Int nprocs = 0;
2141  MPI_Comm_size(pComm, &nprocs);
2142 
2143 #if defined(FTREE)
2144  return new FTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2145 #elif defined(MODBTREE)
2146  return new ModBTreeReduce<T>(pComm,ranks,rank_cnt,msgSize, rseed);
2147 #elif defined(BTREE)
2148  return new BTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2149 #elif defined(PALMTREE)
2150  return new PalmTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2151 #endif
2152 
2153 
2154  if(nprocs<=FTREE_LIMIT){
2155 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
2156  statusOFS<<"FLAT TREE USED"<<endl;
2157 #endif
2158  return new FTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2159  }
2160  else{
2161 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
2162  statusOFS<<"BINARY TREE USED"<<endl;
2163 #endif
2164  return new ModBTreeReduce<T>(pComm,ranks,rank_cnt,msgSize, rseed);
2165  //return new BTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2166  }
2167 }
2168 
2169 
2170 #ifdef NEW_BCAST
2171 template< typename T>
2172 inline TreeBcast2<T> * TreeBcast2<T>::Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed){
2173  //get communicator size
2174  Int nprocs = 0;
2175  MPI_Comm_size(pComm, &nprocs);
2176 
2177 
2178 
2179 
2180 #if defined(FTREE)
2181  return new FTreeBcast2<T>(pComm,ranks,rank_cnt,msgSize);
2182 #elif defined(MODBTREE)
2183  return new ModBTreeBcast2<T>(pComm,ranks,rank_cnt,msgSize,rseed);
2184 #elif defined(BTREE)
2185  return new BTreeBcast2<T>(pComm,ranks,rank_cnt,msgSize);
2186 #endif
2187 
2188 
2189  if(nprocs<=FTREE_LIMIT){
2190 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
2191  statusOFS<<"FLAT TREE USED"<<endl;
2192 #endif
2193 
2194  return new FTreeBcast2<T>(pComm,ranks,rank_cnt,msgSize);
2195 
2196  }
2197  else{
2198 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
2199  statusOFS<<"BINARY TREE USED"<<endl;
2200 #endif
2201  return new ModBTreeBcast2<T>(pComm,ranks,rank_cnt,msgSize, rseed);
2202  // //return new BTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2203  }
2204 }
2205 #endif
2206 
2207 
2208 
2209 
2210 
2211 
2212 }
2213 
2214 #endif
Environmental variables.
Definition: TreeBcast.hpp:705
Definition: TreeBcast.hpp:1620
Definition: TreeBcast.hpp:1130
virtual void Copy(const ModBTreeBcast &Tree)
Definition: TreeBcast.hpp:1094
Definition: TreeBcast.hpp:2017
Profiling and timing using TAU.
void SetValue(NumMat< F > &M, F val)
SetValue sets a numerical matrix to a constant val.
Definition: NumMat_impl.hpp:153
Definition: TreeBcast.hpp:843
Definition: TreeBcast.hpp:988
Definition: TreeBcast.hpp:1880
Definition: TreeBcast.hpp:883
Definition: TreeBcast.hpp:1223
Definition: TreeBcast.hpp:1272
Definition: TreeBcast.hpp:1804