PEXSI
 All Classes Namespaces Files Functions Variables Typedefs Pages
TreeBcast.hpp
1 #ifndef _PEXSI_TREE_HPP_
2 #define _PEXSI_TREE_HPP_
3 
4 #include "pexsi/environment.hpp"
5 #include "pexsi/timer.h"
6 
7 #include <vector>
8 #include <map>
9 #include <algorithm>
10 #include <string>
11 //#include <random>
12 
13 // options to switch from a flat bcast/reduce tree to a binary tree
14 
15 #ifndef FTREE_LIMIT
16 #define FTREE_LIMIT 16
17 #endif
18 
19 
20 
21 namespace PEXSI{
22 
23 
24 extern std::map< MPI_Comm , std::vector<int> > commGlobRanks;
25 
26 #ifdef NEW_BCAST
27 
28 template< typename T>
29  class TreeBcast2{
30  protected:
31 
32  T * myData_;
33  MPI_Request recvRequest_;
34  NumVec<char> myRecvBuffer_;
35 
36  NumVec<MPI_Request> myRequests_;
37  NumVec<MPI_Status> myStatuses_;
38 
39  bool done_;
40  bool fwded_;
41  // bool isAllocated_;
42 
43  Int myRoot_;
44  MPI_Comm comm_;
45  vector<Int> myDests_;
46  Int myRank_;
47  Int msgSize_;
48  bool isReady_;
49  Int mainRoot_;
50  Int tag_;
51  Int numRecv_;
52 
53 #ifdef COMM_PROFILE_BCAST
54  protected:
55  Int myGRoot_;
56  Int myGRank_;
57  //vector<int> Granks_;
58  public:
59  void SetGlobalComm(const MPI_Comm & pGComm){
60  if(commGlobRanks.count(comm_)==0){
61  MPI_Group group2 = MPI_GROUP_NULL;
62  MPI_Comm_group(pGComm, &group2);
63  MPI_Group group1 = MPI_GROUP_NULL;
64  MPI_Comm_group(comm_, &group1);
65 
66  Int size;
67  MPI_Comm_size(comm_,&size);
68  vector<int> globRanks(size);
69  vector<int> Lranks(size);
70  for(int i = 0; i<size;++i){Lranks[i]=i;}
71  MPI_Group_translate_ranks(group1, size, &Lranks[0],group2, &globRanks[0]);
72  commGlobRanks[comm_] = globRanks;
73  }
74  myGRoot_ = commGlobRanks[comm_][myRoot_];
75  myGRank_ = commGlobRanks[comm_][myRank_];
76  //Granks_.resize(myDests_.size());
77  //for(int i = 0; i<myDests_.size();++i){
78  // Granks_[i] = globRanks[myDests_[i]];
79  //}
80 
81  //statusOFS<<myDests_<<std::endl;
82  //statusOFS<<Granks_<<std::endl;
83  }
84 #endif
85 
86 
87 
88  protected:
89  virtual void buildTree(Int * ranks, Int rank_cnt)=0;
90 
91 
92 
93 
94 
95  public:
96 
97  TreeBcast2(){
98  comm_ = MPI_COMM_WORLD;
99  myRank_=-1;
100  myRoot_ = -1;
101  msgSize_ = -1;
102  numRecv_ = -1;
103  tag_=-1;
104  mainRoot_=-1;
105  isReady_ = false;
106  myData_ = NULL;
107  recvRequest_ = MPI_REQUEST_NULL;
108  fwded_=false;
109  // isAllocated_=false;
110  done_ = false;
111  }
112 
113 
114  TreeBcast2(const MPI_Comm & pComm, Int * ranks, Int rank_cnt,Int msgSize):TreeBcast2(){
115  comm_ = pComm;
116  MPI_Comm_rank(comm_,&myRank_);
117  myRoot_ = -1;
118  msgSize_ = msgSize;
119  numRecv_ = 0;
120  tag_=-1;
121  mainRoot_=ranks[0];
122  isReady_ = false;
123  }
124 
125 
126  virtual TreeBcast2 * clone() const = 0;
127 
128  TreeBcast2(const TreeBcast2 & Tree){
129  this->Copy(Tree);
130  }
131 
132  virtual void Copy(const TreeBcast2 & Tree){
133  this->comm_ = Tree.comm_;
134  this->myRank_ = Tree.myRank_;
135  this->myRoot_ = Tree.myRoot_;
136  this->msgSize_ = Tree.msgSize_;
137 
138  this->numRecv_ = Tree.numRecv_;
139  this->tag_= Tree.tag_;
140  this->mainRoot_= Tree.mainRoot_;
141  this->isReady_ = Tree.isReady_;
142  this->myDests_ = Tree.myDests_;
143 
144 
145  this->recvRequest_ = Tree.recvRequest_;
146  this->myRecvBuffer_ = Tree.myRecvBuffer_;
147  this->myRequests_ = Tree.myRequests_;
148  this->myStatuses_ = Tree.myStatuses_;
149  this->myData_ = Tree.myData_;
150  if(Tree.myData_==(T*)&Tree.myRecvBuffer_[0]){
151  this->myData_=(T*)&this->myRecvBuffer_[0];
152  }
153 
154 
155 
156 
157  this->fwded_= Tree.fwded_;
158  // this->isAllocated_= Tree.isAllocated_;
159  this->done_= Tree.done_;
160  }
161 
162  void Reset(){
163  assert(done_);
164  CleanupBuffers();
165  done_=false;
166  myData_ = NULL;
167  recvRequest_ = MPI_REQUEST_NULL;
168  fwded_=false;
169  // isAllocated_=false;
170  isReady_=false;
171  numRecv_ = 0;
172  }
173 
174  //bool IsAllocated(){return isAllocated_;}
175 
176  virtual ~TreeBcast2(){
177  CleanupBuffers();
178  }
179 
180 
181  static TreeBcast2<T> * Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,double rseed);
182 
183  virtual inline Int GetNumRecvMsg(){return numRecv_;}
184  virtual inline Int GetNumMsgToSend(){return GetDestCount();}
185  inline void SetDataReady(bool rdy){
186  isReady_=rdy;
187  //numRecv_ = rdy?1:0;
188  }
189  inline void SetTag(Int tag){ tag_ = tag;}
190  inline int GetTag(){ return tag_;}
191 
192  bool IsDone(){return done_;}
193  bool IsDataReady(){return isReady_;}
194  bool IsDataReceived(){return numRecv_==1;}
195 
196  Int * GetDests(){ return &myDests_[0];}
197  Int GetDest(Int i){ return myDests_[i];}
198  Int GetDestCount(){ return myDests_.size();}
199  Int GetRoot(){ return myRoot_;}
200 
201  bool IsRoot(){ return myRoot_==myRank_;}
202  Int GetMsgSize(){ return msgSize_;}
203 
204  void ForwardMessage( ){
205  if(myRequests_.m()!=GetDestCount()){
206  myRequests_.Resize(GetDestCount());
207  SetValue(myRequests_,MPI_REQUEST_NULL);
208  }
209  for( Int idxRecv = 0; idxRecv < myDests_.size(); ++idxRecv ){
210  Int iProc = myDests_[idxRecv];
211  // Use Isend to send to multiple targets
212  MPI_Isend( myData_, msgSize_, MPI_BYTE,
213  iProc, tag_,comm_, &myRequests_[idxRecv] );
214 
215 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
216  statusOFS<<myRank_<<" FWD to "<<iProc<<" on tag "<<tag_<<std::endl;
217 #endif
218 #ifdef COMM_PROFILE_BCAST
219 // statusOFS<<idxRecv<<std::endl;
220 // statusOFS<<myDests_<<std::endl;
221 // statusOFS<<Granks_<<std::endl;
222  //PROFILE_COMM(myGRank_,Granks_[idxRecv],tag_,msgSize_);
223  PROFILE_COMM(myGRank_,commGlobRanks[comm_][iProc],tag_,msgSize_);
224 #endif
225  } // for (iProc)
226  fwded_ = true;
227  }
228 
229  void CleanupBuffers(){
230  myRequests_.Clear();
231  myStatuses_.Clear();
232  myRecvBuffer_.Clear();
233  }
234 
235 
236  void SetLocalBuffer(T * locBuffer){
237  if(myData_!=NULL && myData_!=locBuffer){
238  if(numRecv_>0){
239  CopyLocalBuffer(locBuffer);
240  }
241  if(!fwded_){
242  myRecvBuffer_.Clear();
243  }
244  }
245 
246  myData_ = locBuffer;
247  }
248 
249  //async wait and forward
250  virtual bool Progress(){
251  if(done_){
252  return true;
253  }
254 
255  bool done = false;
256 
257  if (myRank_==myRoot_){
258  if(isReady_){
259  if(!fwded_){
260 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
261  statusOFS<<myRank_<<" FORWARDING on tag "<<tag_<<std::endl;
262 #endif
263  ForwardMessage();
264  }
265  else{
266 
267  if(myStatuses_.m()!=GetDestCount()){
268  myStatuses_.Resize(GetDestCount());
269  recvRequest_ = MPI_REQUEST_NULL;
270  }
271  //test the send requests
272  int flag = 0;
273  int reqCnt = GetDestCount();
274  if(reqCnt>0){
275  assert(reqCnt == myRequests_.m());
276  MPI_Testall(reqCnt,&myRequests_[0],&flag,&myStatuses_[0]);
277  done = flag==1;
278  }
279  else{
280  done=true;
281  }
282  }
283  }
284  }
285  else{
286  bool received = (numRecv_==1);
287 
288  if(!received){
289  if(recvRequest_ == MPI_REQUEST_NULL ){
290 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
291  statusOFS<<myRank_<<" POSTING RECV on tag "<<tag_<<std::endl;
292 #endif
293  //post the recv
294  PostRecv();
295  }
296  else{
297 
298  if(myStatuses_.m()!=GetDestCount()){
299  myStatuses_.Resize(GetDestCount());
300  recvRequest_ = MPI_REQUEST_NULL;
301  }
302 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
303  statusOFS<<myRank_<<" TESTING RECV on tag "<<tag_<<std::endl;
304 #endif
305  //test
306  int flag = 0;
307  MPI_Status stat;
308  int test = MPI_Test(&recvRequest_,&flag,&stat);
309  assert(test==MPI_SUCCESS);
310 
311  if(flag==1){
312  numRecv_=1;
313  received = true;
314 
315  if(!fwded_){
316 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
317  statusOFS<<myRank_<<" FORWARDING on tag "<<tag_<<std::endl;
318 #endif
319  ForwardMessage();
320  }
321  }
322  }
323  }
324  else {
325  assert(fwded_);
326  //test the send requests
327  int flag = 0;
328  int reqCnt = GetDestCount();
329  if(reqCnt>0){
330  assert(reqCnt == myRequests_.m());
331  MPI_Testall(reqCnt,&myRequests_[0],&flag,&myStatuses_[0]);
332  done = flag==1;
333  }
334  else{
335  done=true;
336  }
337  }
338  }
339 
340  if(done){
341  //free the unnecessary arrays
342  myRequests_.Clear();
343  myStatuses_.Clear();
344 #if ( _DEBUGlevel_ >= 1 ) || defined(BCAST_VERBOSE)
345  statusOFS<<myRank_<<" EVERYTHING COMPLETED on tag "<<tag_<<std::endl;
346 #endif
347  }
348 
349  done_ = done;
350 
351  return done;
352  }
353 
354  //blocking wait
355  void Wait(){
356  if(!done_){
357  while(!Progress());
358  }
359  }
360 
361  T * GetLocalBuffer(){
362  return myData_;
363  }
364 
365  virtual void PostRecv()
366  {
367  if(this->numRecv_<1 && this->recvRequest_==MPI_REQUEST_NULL && myRank_!=myRoot_){
368 
369  if(myData_==NULL){
370  myRecvBuffer_.Resize(msgSize_);
371  myData_ = (T*)&myRecvBuffer_[0];
372  }
373  MPI_Irecv( (char*)this->myData_, this->msgSize_, MPI_BYTE,
374  this->myRoot_, this->tag_,this->comm_, &this->recvRequest_ );
375  }
376  }
377 
378 
379 
380  void CopyLocalBuffer(T* destBuffer){
381  std::copy((char*)myData_,(char*)myData_+GetMsgSize(),(char*)destBuffer);
382  }
383 
384 
385 
386 
387  };
388 
389 
390 template< typename T>
391 class FTreeBcast2: public TreeBcast2<T>{
392  protected:
393  virtual void buildTree(Int * ranks, Int rank_cnt){
394 
395  Int idxStart = 0;
396  Int idxEnd = rank_cnt;
397 
398 
399 
400  this->myRoot_ = ranks[0];
401 
402  if(this->myRank_==this->myRoot_){
403  this->myDests_.insert(this->myDests_.begin(),&ranks[1],&ranks[0]+rank_cnt);
404  }
405 
406 #if (defined(BCAST_VERBOSE))
407  statusOFS<<"My root is "<<this->myRoot_<<std::endl;
408  statusOFS<<"My dests are ";
409  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
410  statusOFS<<std::endl;
411 #endif
412  }
413 
414 
415 
416  public:
417  FTreeBcast2(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast2<T>(pComm,ranks,rank_cnt,msgSize){
418  //build the binary tree;
419  buildTree(ranks,rank_cnt);
420  }
421 
422 
423  virtual FTreeBcast2 * clone() const{
424  FTreeBcast2 * out = new FTreeBcast2(*this);
425  return out;
426  }
427 };
428 
429 template< typename T>
430 class BTreeBcast2: public TreeBcast2<T>{
431  protected:
455  virtual void buildTree(Int * ranks, Int rank_cnt){
456 
457  Int idxStart = 0;
458  Int idxEnd = rank_cnt;
459 
460 
461 
462  Int prevRoot = ranks[0];
463  while(idxStart<idxEnd){
464  Int curRoot = ranks[idxStart];
465  Int listSize = idxEnd - idxStart;
466 
467  if(listSize == 1){
468  if(curRoot == this->myRank_){
469  this->myRoot_ = prevRoot;
470  break;
471  }
472  }
473  else{
474  Int halfList = floor(ceil(double(listSize) / 2.0));
475  Int idxStartL = idxStart+1;
476  Int idxStartH = idxStart+halfList;
477 
478  if(curRoot == this->myRank_){
479  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
480  Int childL = ranks[idxStartL];
481  Int childR = ranks[idxStartH];
482 
483  this->myDests_.push_back(childL);
484  this->myDests_.push_back(childR);
485  }
486  else if ((idxEnd - idxStartH) > 0){
487  Int childR = ranks[idxStartH];
488  this->myDests_.push_back(childR);
489  }
490  else{
491  Int childL = ranks[idxStartL];
492  this->myDests_.push_back(childL);
493  }
494  this->myRoot_ = prevRoot;
495  break;
496  }
497 
498  if( this->myRank_ < ranks[idxStartH]){
499  idxStart = idxStartL;
500  idxEnd = idxStartH;
501  }
502  else{
503  idxStart = idxStartH;
504  }
505  prevRoot = curRoot;
506  }
507 
508  }
509 
510 #if (defined(BCAST_VERBOSE))
511  statusOFS<<"My root is "<<myRoot_<<std::endl;
512  statusOFS<<"My dests are ";
513  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
514  statusOFS<<std::endl;
515 #endif
516  }
517 
518 
519 
520  public:
521  BTreeBcast2(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast2<T>(pComm,ranks,rank_cnt,msgSize){
522  //build the binary tree;
523  buildTree(ranks,rank_cnt);
524  }
525 
526  virtual BTreeBcast2<T> * clone() const{
527  BTreeBcast2<T> * out = new BTreeBcast2<T>(*this);
528  return out;
529  }
530 
531 };
532 
533 
534 
535 template< typename T>
536 class ModBTreeBcast2: public TreeBcast2<T>{
537  protected:
538  double rseed_;
539 
540  virtual void buildTree(Int * ranks, Int rank_cnt){
541 
542  Int idxStart = 0;
543  Int idxEnd = rank_cnt;
544 
545  //sort the ranks with the modulo like operation
546  if(rank_cnt>1){
547  //Int new_idx = (int)((rand()+1.0) * (double)rank_cnt / ((double)RAND_MAX+1.0));
548 
549 // srand(ranks[0]+rank_cnt);
550  Int new_idx = (Int)rseed_ % (rank_cnt - 1) + 1;
551  //Int new_idx = (int)((rank_cnt - 0) * ( (double)this->rseed_ / (double)RAND_MAX ) + 0);// (this->rseed_)%(rank_cnt-1)+1;
552  //statusOFS<<"NEW IDX: "<<new_idx<<endl;
553 
554 
555 
556  Int * new_start = &ranks[new_idx];
557 
558  //for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
559 
560 // Int * new_start = std::lower_bound(&ranks[1],&ranks[0]+rank_cnt,ranks[0]);
561  //just swap the two chunks r[0] | r[1] --- r[new_start-1] | r[new_start] --- r[end]
562  // becomes r[0] | r[new_start] --- r[end] | r[1] --- r[new_start-1]
563  std::rotate(&ranks[1], new_start, &ranks[0]+rank_cnt);
564 
565  //for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
566  }
567 
568  Int prevRoot = ranks[0];
569  while(idxStart<idxEnd){
570  Int curRoot = ranks[idxStart];
571  Int listSize = idxEnd - idxStart;
572 
573  if(listSize == 1){
574  if(curRoot == this->myRank_){
575  this->myRoot_ = prevRoot;
576  break;
577  }
578  }
579  else{
580  Int halfList = floor(ceil(double(listSize) / 2.0));
581  Int idxStartL = idxStart+1;
582  Int idxStartH = idxStart+halfList;
583 
584  if(curRoot == this->myRank_){
585  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
586  Int childL = ranks[idxStartL];
587  Int childR = ranks[idxStartH];
588 
589  this->myDests_.push_back(childL);
590  this->myDests_.push_back(childR);
591  }
592  else if ((idxEnd - idxStartH) > 0){
593  Int childR = ranks[idxStartH];
594  this->myDests_.push_back(childR);
595  }
596  else{
597  Int childL = ranks[idxStartL];
598  this->myDests_.push_back(childL);
599  }
600  this->myRoot_ = prevRoot;
601  break;
602  }
603 
604  //not true anymore ?
605  //first half to
606 TIMER_START(FIND_RANK);
607  Int * pos = std::find(&ranks[idxStartL], &ranks[idxStartH], this->myRank_);
608 TIMER_STOP(FIND_RANK);
609  if( pos != &ranks[idxStartH]){
610  idxStart = idxStartL;
611  idxEnd = idxStartH;
612  }
613  else{
614  idxStart = idxStartH;
615  }
616  prevRoot = curRoot;
617  }
618 
619  }
620 
621 #if (defined(REDUCE_VERBOSE))
622  statusOFS<<"My root is "<<myRoot_<<std::endl;
623  statusOFS<<"My dests are ";
624  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
625  statusOFS<<std::endl;
626 #endif
627  }
628 
629 
630 
631  public:
632  ModBTreeBcast2(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed):TreeBcast2<T>(pComm,ranks,rank_cnt,msgSize){
633  //build the binary tree;
634  rseed_ = rseed;
635  buildTree(ranks,rank_cnt);
636  }
637 
638  //virtual void Copy(const ModBTreeBcast & Tree){
639  // comm_ = Tree.comm_;
640  // myRank_ = Tree.myRank_;
641  // myRoot_ = Tree.myRoot_;
642  // msgSize_ = Tree.msgSize_;
643 
644  // numRecv_ = Tree.numRecv_;
645  // tag_= Tree.tag_;
646  // mainRoot_= Tree.mainRoot_;
647  // isReady_ = Tree.isReady_;
648  // myDests_ = Tree.myDests_;
649 
650  // rseed_ = Tree.rseed_;
651  // myRank_ = Tree.myRank_;
652  // myRoot_ = Tree.myRoot_;
653  // msgSize_ = Tree.msgSize_;
654 
655  // numRecv_ = Tree.numRecv_;
656  // tag_= Tree.tag_;
657  // mainRoot_= Tree.mainRoot_;
658  // isReady_ = Tree.isReady_;
659  // myDests_ = Tree.myDests_;
660  //}
661 
662  virtual ModBTreeBcast2 * clone() const{
663  ModBTreeBcast2 * out = new ModBTreeBcast2(*this);
664  return out;
665  }
666 
667 };
668 
669 
670 
671 #endif
672 
673 
674 
675 
676 
677 
678 
679 
680 
681 
682 
683 
684 
685 
686 
687 
688 
689 
690 
691 
692 
693 
694 
695 
696 
697 
698 
699 
700 
701 
702 
703 
704 
705 class TreeBcast{
706  protected:
707  Int myRoot_;
708  MPI_Comm comm_;
709  vector<Int> myDests_;
710  Int myRank_;
711  Int msgSize_;
712  bool isReady_;
713  Int mainRoot_;
714  Int tag_;
715  Int numRecv_;
716 
717 
718 #if defined(COMM_PROFILE_BCAST) || defined(COMM_PROFILE)
719 protected:
720  Int myGRank_;
721  Int myGRoot_;
722  //vector<int> Granks_;
723 public:
724  void SetGlobalComm(const MPI_Comm & pGComm){
725  if(commGlobRanks.count(comm_)==0){
726  MPI_Group group2 = MPI_GROUP_NULL;
727  MPI_Comm_group(pGComm, &group2);
728  MPI_Group group1 = MPI_GROUP_NULL;
729  MPI_Comm_group(comm_, &group1);
730 
731  Int size;
732  MPI_Comm_size(comm_,&size);
733  vector<int> globRanks(size);
734  vector<int> Lranks(size);
735  for(int i = 0; i<size;++i){Lranks[i]=i;}
736  MPI_Group_translate_ranks(group1, size, &Lranks[0],group2, &globRanks[0]);
737  commGlobRanks[comm_] = globRanks;
738  }
739  myGRoot_ = commGlobRanks[comm_][myRoot_];
740  myGRank_ = commGlobRanks[comm_][myRank_];
741  // Granks_.resize(myDests_.size());
742  // for(int i = 0; i<myDests_.size();++i){
743  // Granks_[i] = globRanks[myDests_[i]];
744  // }
745  //statusOFS<<myDests_<<std::endl;
746  //statusOFS<<Granks_<<std::endl;
747  }
748 #endif
749 
750 
751 
752  virtual void buildTree(Int * ranks, Int rank_cnt)=0;
753  public:
754  TreeBcast(){
755  comm_ = MPI_COMM_WORLD;
756  myRank_=-1;
757  myRoot_ = -1;
758  msgSize_ = -1;
759  numRecv_ = -1;
760  tag_=-1;
761  mainRoot_=-1;
762  isReady_ = false;
763  }
764 
765 
766  TreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt,Int msgSize){
767  comm_ = pComm;
768  MPI_Comm_rank(comm_,&myRank_);
769  myRoot_ = -1;
770  msgSize_ = msgSize;
771 
772  numRecv_ = 0;
773  tag_=-1;
774  mainRoot_=ranks[0];
775  isReady_ = false;
776  }
777 
778  TreeBcast(const TreeBcast & Tree){
779  Copy(Tree);
780  }
781 
782  virtual void Copy(const TreeBcast & Tree){
783  comm_ = Tree.comm_;
784  myRank_ = Tree.myRank_;
785  myRoot_ = Tree.myRoot_;
786  msgSize_ = Tree.msgSize_;
787 
788  tag_= Tree.tag_;
789  mainRoot_= Tree.mainRoot_;
790  myDests_ = Tree.myDests_;
791 
792  //numRecv_ = Tree.numRecv_;
793  //isReady_ = Tree.isReady_;
794  isReady_ = false;
795  numRecv_ = 0;
796  }
797 
798  virtual TreeBcast * clone() const = 0;
799 
800  void Reset(){
801 //statusOFS<<"RESET CALLED"<<std::endl;
802  this->numRecv_ = 0;
803  this->isReady_ = false;
804  }
805 
806 
807 
808  static TreeBcast * Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,double rseed);
809 
810  virtual inline Int GetNumRecvMsg(){return numRecv_;}
811  virtual inline Int GetNumMsgToRecv(){return 1;}
812  inline void SetDataReady(bool rdy){ isReady_=rdy; }
813  inline void SetTag(Int tag){ tag_ = tag;}
814  inline int GetTag(){ return tag_;}
815 
816 
817  Int * GetDests(){ return &myDests_[0];}
818  Int GetDest(Int i){ return myDests_[i];}
819  Int GetDestCount(){ return myDests_.size();}
820  Int GetRoot(){ return myRoot_;}
821  Int GetMsgSize(){ return msgSize_;}
822 
823  void ForwardMessage( char * data, size_t size, int tag, MPI_Request * requests ){
824  tag_ = tag;
825  for( Int idxRecv = 0; idxRecv < myDests_.size(); ++idxRecv ){
826  Int iProc = myDests_[idxRecv];
827  // Use Isend to send to multiple targets
828  MPI_Isend( data, size, MPI_BYTE,
829  iProc, tag,comm_, &requests[2*iProc+1] );
830 
831 #if defined(COMM_PROFILE_BCAST) || defined(COMM_PROFILE)
832 // statusOFS<<idxRecv<<std::endl;
833 // statusOFS<<Granks_<<std::endl;
834  //PROFILE_COMM(myGRank_,Granks_[idxRecv],tag,msgSize_);
835  PROFILE_COMM(myGRank_,commGlobRanks[comm_][iProc],tag_,msgSize_);
836 #endif
837  } // for (iProc)
838  }
839 
840 
841 };
842 
843 class FTreeBcast: public TreeBcast{
844  protected:
845  virtual void buildTree(Int * ranks, Int rank_cnt){
846 
847  Int idxStart = 0;
848  Int idxEnd = rank_cnt;
849 
850 
851 
852  myRoot_ = ranks[0];
853 
854  if(myRank_==myRoot_){
855  myDests_.insert(myDests_.begin(),&ranks[1],&ranks[0]+rank_cnt);
856  }
857 
858 #if (defined(BCAST_VERBOSE))
859  statusOFS<<"My root is "<<myRoot_<<std::endl;
860  statusOFS<<"My dests are ";
861  for(int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<" ";}
862  statusOFS<<std::endl;
863 #endif
864  }
865 
866 
867 
868  public:
869  FTreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast(pComm,ranks,rank_cnt,msgSize){
870  //build the binary tree;
871  buildTree(ranks,rank_cnt);
872  }
873 
874 
875  virtual FTreeBcast * clone() const{
876  FTreeBcast * out = new FTreeBcast(*this);
877  return out;
878  }
879 };
880 
881 
882 
883 class BTreeBcast: public TreeBcast{
884  protected:
908  virtual void buildTree(Int * ranks, Int rank_cnt){
909 
910  Int idxStart = 0;
911  Int idxEnd = rank_cnt;
912 
913 
914 
915  Int prevRoot = ranks[0];
916  while(idxStart<idxEnd){
917  Int curRoot = ranks[idxStart];
918  Int listSize = idxEnd - idxStart;
919 
920  if(listSize == 1){
921  if(curRoot == myRank_){
922  myRoot_ = prevRoot;
923  break;
924  }
925  }
926  else{
927  Int halfList = floor(ceil(double(listSize) / 2.0));
928  Int idxStartL = idxStart+1;
929  Int idxStartH = idxStart+halfList;
930 
931  if(curRoot == myRank_){
932  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
933  Int childL = ranks[idxStartL];
934  Int childR = ranks[idxStartH];
935 
936  myDests_.push_back(childL);
937  myDests_.push_back(childR);
938  }
939  else if ((idxEnd - idxStartH) > 0){
940  Int childR = ranks[idxStartH];
941  myDests_.push_back(childR);
942  }
943  else{
944  Int childL = ranks[idxStartL];
945  myDests_.push_back(childL);
946  }
947  myRoot_ = prevRoot;
948  break;
949  }
950 
951  if( myRank_ < ranks[idxStartH]){
952  idxStart = idxStartL;
953  idxEnd = idxStartH;
954  }
955  else{
956  idxStart = idxStartH;
957  }
958  prevRoot = curRoot;
959  }
960 
961  }
962 
963 #if (defined(BCAST_VERBOSE))
964  statusOFS<<"My root is "<<myRoot_<<std::endl;
965  statusOFS<<"My dests are ";
966  for(int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<" ";}
967  statusOFS<<std::endl;
968 #endif
969  }
970 
971 
972 
973  public:
974  BTreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast(pComm,ranks,rank_cnt,msgSize){
975  //build the binary tree;
976  buildTree(ranks,rank_cnt);
977  }
978 
979  virtual BTreeBcast * clone() const{
980  BTreeBcast * out = new BTreeBcast(*this);
981  return out;
982  }
983 
984 };
985 
986 
987 
988 class ModBTreeBcast: public TreeBcast{
989  protected:
990  double rseed_;
991 
992  virtual void buildTree(Int * ranks, Int rank_cnt){
993 
994  Int idxStart = 0;
995  Int idxEnd = rank_cnt;
996 
997  //sort the ranks with the modulo like operation
998  if(rank_cnt>1){
999  //Int new_idx = (int)((rand()+1.0) * (double)rank_cnt / ((double)RAND_MAX+1.0));
1000 
1001 // srand(ranks[0]+rank_cnt);
1002  //Int new_idx = (Int)rseed_ % (rank_cnt - 1) + 1;
1003  Int new_idx = (int)((rank_cnt - 0) * ( (double)this->rseed_ / (double)RAND_MAX ) + 0);// (this->rseed_)%(rank_cnt-1)+1;
1004  //statusOFS<<"NEW IDX: "<<new_idx<<endl;
1005 
1006 
1007 
1008  Int * new_start = &ranks[new_idx];
1009 
1010  //for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1011 
1012 // Int * new_start = std::lower_bound(&ranks[1],&ranks[0]+rank_cnt,ranks[0]);
1013  //just swap the two chunks r[0] | r[1] --- r[new_start-1] | r[new_start] --- r[end]
1014  // becomes r[0] | r[new_start] --- r[end] | r[1] --- r[new_start-1]
1015  std::rotate(&ranks[1], new_start, &ranks[0]+rank_cnt);
1016 
1017  //for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1018  }
1019 
1020  Int prevRoot = ranks[0];
1021  while(idxStart<idxEnd){
1022  Int curRoot = ranks[idxStart];
1023  Int listSize = idxEnd - idxStart;
1024 
1025  if(listSize == 1){
1026  if(curRoot == myRank_){
1027  myRoot_ = prevRoot;
1028  break;
1029  }
1030  }
1031  else{
1032  Int halfList = floor(ceil(double(listSize) / 2.0));
1033  Int idxStartL = idxStart+1;
1034  Int idxStartH = idxStart+halfList;
1035 
1036  if(curRoot == myRank_){
1037  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
1038  Int childL = ranks[idxStartL];
1039  Int childR = ranks[idxStartH];
1040 
1041  myDests_.push_back(childL);
1042  myDests_.push_back(childR);
1043  }
1044  else if ((idxEnd - idxStartH) > 0){
1045  Int childR = ranks[idxStartH];
1046  myDests_.push_back(childR);
1047  }
1048  else{
1049  Int childL = ranks[idxStartL];
1050  myDests_.push_back(childL);
1051  }
1052  myRoot_ = prevRoot;
1053  break;
1054  }
1055 
1056  //not true anymore ?
1057  //first half to
1058 TIMER_START(FIND_RANK);
1059  Int * pos = std::find(&ranks[idxStartL], &ranks[idxStartH], myRank_);
1060 TIMER_STOP(FIND_RANK);
1061  if( pos != &ranks[idxStartH]){
1062  idxStart = idxStartL;
1063  idxEnd = idxStartH;
1064  }
1065  else{
1066  idxStart = idxStartH;
1067  }
1068  prevRoot = curRoot;
1069  }
1070 
1071  }
1072 
1073 #if (defined(REDUCE_VERBOSE))
1074  statusOFS<<"My root is "<<myRoot_<<std::endl;
1075  statusOFS<<"My dests are ";
1076  for(int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<" ";}
1077  statusOFS<<std::endl;
1078 #endif
1079  }
1080 
1081 
1082 
1083  public:
1084  ModBTreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed):TreeBcast(pComm,ranks,rank_cnt,msgSize){
1085  //build the binary tree;
1086  rseed_ = rseed;
1087  buildTree(ranks,rank_cnt);
1088  }
1089 
1090  virtual void Copy(const ModBTreeBcast & Tree){
1091  ((TreeBcast*)this)->Copy(*((const TreeBcast*)&Tree));
1112 
1113 
1114  rseed_ = Tree.rseed_;
1115 
1116  }
1117 
1118  virtual ModBTreeBcast * clone() const{
1119  ModBTreeBcast * out = new ModBTreeBcast(*this);
1120  return out;
1121  }
1122 
1123 };
1124 
1125 
1127  protected:
1128  virtual void buildTree(Int * ranks, Int rank_cnt){
1129 
1130  Int idxStart = 0;
1131  Int idxEnd = rank_cnt;
1132 
1133  //random permute ranks
1134  if(rank_cnt>1){
1135  for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1136  srand(ranks[0]);
1137  std::random_shuffle(&ranks[1],&ranks[0]+rank_cnt);
1138  for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1139 
1140  }
1141 
1142  Int prevRoot = ranks[0];
1143  while(idxStart<idxEnd){
1144  Int curRoot = ranks[idxStart];
1145  Int listSize = idxEnd - idxStart;
1146 
1147  if(listSize == 1){
1148  if(curRoot == myRank_){
1149  myRoot_ = prevRoot;
1150  break;
1151  }
1152  }
1153  else{
1154  Int halfList = floor(ceil(double(listSize) / 2.0));
1155  Int idxStartL = idxStart+1;
1156  Int idxStartH = idxStart+halfList;
1157 
1158  if(curRoot == myRank_){
1159  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
1160  Int childL = ranks[idxStartL];
1161  Int childR = ranks[idxStartH];
1162 
1163  myDests_.push_back(childL);
1164  myDests_.push_back(childR);
1165  }
1166  else if ((idxEnd - idxStartH) > 0){
1167  Int childR = ranks[idxStartH];
1168  myDests_.push_back(childR);
1169  }
1170  else{
1171  Int childL = ranks[idxStartL];
1172  myDests_.push_back(childL);
1173  }
1174  myRoot_ = prevRoot;
1175  break;
1176  }
1177 
1178  //not true anymore ?
1179  //first half to
1180  Int * pos = std::find(&ranks[idxStartL], &ranks[idxStartH], myRank_);
1181  if( pos != &ranks[idxStartH]){
1182  idxStart = idxStartL;
1183  idxEnd = idxStartH;
1184  }
1185  else{
1186  idxStart = idxStartH;
1187  }
1188  prevRoot = curRoot;
1189  }
1190 
1191  }
1192 
1193 #if (defined(REDUCE_VERBOSE))
1194  statusOFS<<"My root is "<<myRoot_<<std::endl;
1195  statusOFS<<"My dests are ";
1196  for(int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<" ";}
1197  statusOFS<<std::endl;
1198 #endif
1199  }
1200 
1201 
1202 
1203  public:
1204  RandBTreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast(pComm,ranks,rank_cnt,msgSize){
1205  //build the binary tree;
1206  buildTree(ranks,rank_cnt);
1207  }
1208 
1209  virtual RandBTreeBcast * clone() const{
1210  RandBTreeBcast * out = new RandBTreeBcast(*this);
1211  return out;
1212  }
1213 
1214 };
1215 
1216 
1217 
1218 
1219 class PalmTreeBcast: public TreeBcast{
1220  protected:
1221  virtual void buildTree(Int * ranks, Int rank_cnt){
1222  Int numLevel = floor(log2(rank_cnt));
1223  Int numRoots = 0;
1224  for(Int level=0;level<numLevel;++level){
1225  numRoots = std::min( rank_cnt, numRoots + (Int)pow(2.0,level));
1226  Int numNextRoots = std::min(rank_cnt,numRoots + (Int)pow(2.0,(level+1)));
1227  Int numReceivers = numNextRoots - numRoots;
1228  for(Int ip = 0; ip<numRoots;++ip){
1229  Int p = ranks[ip];
1230  for(Int ir = ip; ir<numReceivers;ir+=numRoots){
1231  Int r = ranks[numRoots+ir];
1232  if(r==myRank_){
1233  myRoot_ = p;
1234  }
1235 
1236  if(p==myRank_){
1237  myDests_.push_back(r);
1238  }
1239  }
1240  }
1241  }
1242 
1243 #if (defined(BCAST_VERBOSE))
1244  statusOFS<<"My root is "<<myRoot_<<std::endl;
1245  statusOFS<<"My dests are ";
1246  for(int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<" ";}
1247  statusOFS<<std::endl;
1248 #endif
1249  }
1250 
1251 
1252 
1253  public:
1254  PalmTreeBcast(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast(pComm,ranks,rank_cnt,msgSize){
1255  //build the binary tree;
1256  buildTree(ranks,rank_cnt);
1257  }
1258 
1259  virtual PalmTreeBcast * clone() const{
1260  PalmTreeBcast * out = new PalmTreeBcast(*this);
1261  return out;
1262  }
1263 
1264 };
1265 
1266 
1267 template< typename T>
1268 class TreeReduce: public TreeBcast{
1269  protected:
1270  T * myData_;
1271  MPI_Request sendRequest_;
1272  NumVec<char> myLocalBuffer_;
1273  NumVec<char> myRecvBuffers_;
1274  NumVec<T *> remoteData_;
1275  NumVec<MPI_Request> myRequests_;
1276  NumVec<MPI_Status> myStatuses_;
1277  NumVec<int> recvIdx_;
1278 
1279  bool fwded_;
1280  bool done_;
1281  bool isAllocated_;
1282  Int numRecvPosted_;
1283 
1284  public:
1285  TreeReduce(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeBcast(pComm,ranks,rank_cnt,msgSize){
1286  myData_ = NULL;
1287  sendRequest_ = MPI_REQUEST_NULL;
1288  fwded_=false;
1289  done_=false;
1290  isAllocated_=false;
1291  numRecvPosted_= 0;
1292  }
1293 
1294 
1295  virtual TreeReduce * clone() const = 0;
1296 
1297  TreeReduce(const TreeReduce & Tree){
1298  this->Copy(Tree);
1299  }
1300 
1301  virtual void Copy(const TreeReduce & Tree){
1302  ((TreeBcast*)this)->Copy(*(const TreeBcast*)&Tree);
1303 
1304 // this->comm_ = Tree.comm_;
1305 // this->myRank_ = Tree.myRank_;
1306 // this->myRoot_ = Tree.myRoot_;
1307 // this->msgSize_ = Tree.msgSize_;
1308 // this->numRecv_ = Tree.numRecv_;
1309 // this->tag_= Tree.tag_;
1310 // this->mainRoot_= Tree.mainRoot_;
1311 // this->isReady_ = Tree.isReady_;
1312 // this->myDests_ = Tree.myDests_;
1313 
1314 
1315  this->myData_ = NULL;
1316  this->sendRequest_ = MPI_REQUEST_NULL;
1317  this->fwded_= false;
1318  this->done_= false;
1319  this->isAllocated_= Tree.isAllocated_;
1320  this->numRecvPosted_= 0;
1321 
1322  //this->myLocalBuffer_.resize(Tree.myLocalBuffer_.size());
1323  //this->remoteData_ = Tree.remoteData_;
1324  //this->recvIdx_ = Tree.recvIdx_;
1325 
1326  CleanupBuffers();
1327  }
1328 
1329 
1330 
1331  bool IsAllocated(){return isAllocated_;}
1332 
1333  virtual ~TreeReduce(){
1334  CleanupBuffers();
1335  }
1336 
1337 
1338  static TreeReduce<T> * Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,double rseed);
1339 
1340  virtual inline Int GetNumMsgToRecv(){return GetDestCount();}
1341 
1342  virtual void AllocRecvBuffers(){
1343  remoteData_.Resize(GetDestCount());
1344  //SetValue(remoteData_,(T*)NULL);
1345 
1346  //assert(myRecvBuffers_==NULL);
1347  //myRecvBuffers_ = new char[GetDestCount()*msgSize_];
1348 
1349 
1350  myRecvBuffers_.Resize(GetDestCount()*msgSize_);
1351  //SetValue(myRecvBuffers_,(char)0);
1352 
1353  for( Int idxRecv = 0; idxRecv < GetDestCount(); ++idxRecv ){
1354  remoteData_[idxRecv] = (T*)&(myRecvBuffers_[idxRecv*msgSize_]);
1355  //Int nelem = msgSize_ / sizeof(T);
1356  //std::fill(remoteData_[idxRecv],remoteData_[idxRecv]+nelem,ZERO<T>());
1357  }
1358 
1359  myRequests_.Resize(GetDestCount());
1360  SetValue(myRequests_,MPI_REQUEST_NULL);
1361  myStatuses_.Resize(GetDestCount());
1362  recvIdx_.Resize(GetDestCount());
1363 
1364  sendRequest_ = MPI_REQUEST_NULL;
1365 
1366  isAllocated_ = true;
1367  }
1368 
1369  void CleanupBuffers(){
1370  myLocalBuffer_.Clear();
1371 // if(myLocalBuffer_!=NULL){
1372 // delete []myLocalBuffer_;
1373 // myLocalBuffer_=NULL;
1374 // }
1375 
1376 
1377  remoteData_.Clear();
1378 // myRecvBuffers_.Clear();
1379 // if(myRecvBuffers_!=NULL){
1380 // delete []myRecvBuffers_;
1381 // myRecvBuffers_=NULL;
1382 // }
1383 
1384 
1385  myRequests_.Clear();
1386  myStatuses_.Clear();
1387  recvIdx_.Clear();
1388 
1389 
1390 // if(myLocalBuffer_!=NULL){
1391 // delete [] myLocalBuffer_;
1392 // }
1393 // myLocalBuffer_=NULL;
1394 
1395 
1396  }
1397 
1398  void Reset(){
1399 // assert(done_ || myDests_.size()==0);
1400  CleanupBuffers();
1401  done_=false;
1402 
1403  myData_ = NULL;
1404  sendRequest_ = MPI_REQUEST_NULL;
1405  fwded_=false;
1406  isAllocated_=false;
1407  isReady_=false;
1408  numRecv_ = 0;
1409  numRecvPosted_= 0;
1410  }
1411 
1412 
1413 
1414  void SetLocalBuffer(T * locBuffer){
1415  if(myData_!=NULL && myData_!=locBuffer){
1416 
1417 //statusOFS<<"DOING SUM"<<std::endl;
1418 //gdb_lock();
1419  blas::Axpy(msgSize_/sizeof(T), ONE<T>(), myData_, 1, locBuffer, 1 );
1420  myLocalBuffer_.Clear();
1421  }
1422 
1423  myData_ = locBuffer;
1424  }
1425 
1426  inline bool AccumulationDone(){
1427  if(myRank_==myRoot_ && isAllocated_){
1428  isReady_=true;
1429  }
1430  return isReady_ && (numRecv_ == GetDestCount());
1431  }
1432 
1433 
1434  inline bool IsDone(){
1435  if(myRank_==myRoot_ && isAllocated_){
1436  isReady_=true;
1437  }
1438 
1439  bool retVal = AccumulationDone();
1440  if(myRoot_ != myRank_ && !fwded_){
1441  retVal = false;
1442  }
1443 
1444  if (retVal && myRoot_ != myRank_ && fwded_){
1445  //test the send request
1446  int flag = 0;
1447  MPI_Test(&sendRequest_,&flag,MPI_STATUS_IGNORE);
1448  retVal = flag==1;
1449  }
1450 
1451  return retVal;
1452  }
1453 
1454  //async wait and forward
1455  virtual bool Progress(){
1456 
1457  if(done_){
1458  return true;
1459  }
1460  if(!isAllocated_){
1461  return false;
1462  }
1463 
1464  if(myRank_==myRoot_ && isAllocated_){
1465  isReady_=true;
1466  }
1467 
1468 // if(this->numRecvPosted_==0){
1469 // this->PostFirstRecv();
1470 // }
1471 
1472  bool retVal = AccumulationDone();
1473  if(isReady_ && !retVal){
1474 
1475  //assert(isAllocated_);
1476 
1477  //mpi_test_some on my requests
1478  int recvCount = -1;
1479  int reqCnt = GetDestCount();
1480  assert(reqCnt == myRequests_.m());
1481  MPI_Testsome(reqCnt,&myRequests_[0],&recvCount,&recvIdx_[0],&myStatuses_[0]);
1482  //if something has been received, accumulate and potentially forward it
1483  for(Int i = 0;i<recvCount;++i ){
1484  Int idx = recvIdx_[i];
1485 
1486  if(idx!=MPI_UNDEFINED){
1487 
1488  Int size = 0;
1489  MPI_Get_count(&myStatuses_[i], MPI_BYTE, &size);
1490 
1491 
1492 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
1493  statusOFS<<myRank_<<" RECVD from "<<myStatuses_[i].MPI_SOURCE<<" on tag "<<tag_<<std::endl;
1494 #endif
1495  if(size>0){
1496  //If myData is 0, allocate to the size of what has been received
1497  if(myData_==NULL){
1498  //assert(size==msgSize_);
1499  myLocalBuffer_.Resize(msgSize_);
1500 
1501  myData_ = (T*)&myLocalBuffer_[0];
1502  Int nelem = +msgSize_/sizeof(T);
1503  std::fill(myData_,myData_+nelem,ZERO<T>());
1504  }
1505 
1506  Reduce(idx,i);
1507 
1508  }
1509 
1510  numRecv_++;
1511  //MPI_Request_free(&myRequests_[idx]);
1512  }
1513  }
1514 
1515  }
1516  else if (isReady_ && sendRequest_ == MPI_REQUEST_NULL && myRoot_ != myRank_ && !fwded_){
1517  //free the unnecessary arrays
1518  myRecvBuffers_.Clear();
1519  myRequests_.Clear();
1520  myStatuses_.Clear();
1521  recvIdx_.Clear();
1522 
1523  //assert(isAllocated_);
1524 
1525  //Forward
1526  Forward();
1527  retVal = false;
1528  }
1529  else{
1530  retVal = IsDone();
1531  if(retVal){
1532  //free the unnecessary arrays
1533  myRecvBuffers_.Clear();
1534  myRequests_.Clear();
1535  myStatuses_.Clear();
1536  recvIdx_.Clear();
1537  }
1538  }
1539 
1540 
1541  done_ = retVal;
1542  return retVal;
1543  }
1544 
1545  //blocking wait
1546  void Wait(){
1547  if(!done_){
1548  while(!Progress());
1549  }
1550  }
1551 
1552  T * GetLocalBuffer(){
1553  return myData_;
1554  }
1555 
1556 
1557 
1558  void CopyLocalBuffer(T* destBuffer){
1559  std::copy((char*)myData_,(char*)myData_+GetMsgSize(),(char*)destBuffer);
1560  }
1561 
1562 
1563  virtual void PostFirstRecv()
1564  {
1565  if(this->GetDestCount()>this->numRecvPosted_){
1566  for( Int idxRecv = 0; idxRecv < myDests_.size(); ++idxRecv ){
1567  Int iProc = myDests_[idxRecv];
1568  //assert(msgSize_>=0);
1569  MPI_Irecv( (char*)remoteData_[idxRecv], msgSize_, MPI_BYTE,
1570  iProc, tag_,comm_, &myRequests_[idxRecv] );
1571  this->numRecvPosted_++;
1572  } // for (iProc)
1573  }
1574  }
1575 
1576 
1577 
1578 
1579  protected:
1580  virtual void Reduce( Int idxRecv, Int idReq){
1581  //add thing to my data
1582  blas::Axpy(msgSize_/sizeof(T), ONE<T>(), remoteData_[idxRecv], 1, myData_, 1 );
1583  }
1584 
1585  void Forward(){
1586  //forward to my root if I have reseived everything
1587  Int iProc = myRoot_;
1588  // Use Isend to send to multiple targets
1589  if(myData_==NULL){
1590  MPI_Isend( NULL, 0, MPI_BYTE,
1591  iProc, tag_,comm_, &sendRequest_ );
1592 #ifdef COMM_PROFILE
1593  PROFILE_COMM(myGRank_,myGRoot_,tag_,0);
1594 #endif
1595  }
1596  else{
1597  MPI_Isend( (char*)myData_, msgSize_, MPI_BYTE,
1598  iProc, tag_,comm_, &sendRequest_ );
1599 #ifdef COMM_PROFILE
1600  PROFILE_COMM(myGRank_,myGRoot_,tag_,msgSize_);
1601 #endif
1602  }
1603 
1604 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
1605  statusOFS<<myRank_<<" FWD to "<<iProc<<" on tag "<<tag_<<std::endl;
1606 #endif
1607 
1608  fwded_ = true;
1609 
1610  }
1611 
1612 };
1613 
1614 
1615 template< typename T>
1616 class FTreeReduce: public TreeReduce<T>{
1617  protected:
1618  virtual void buildTree(Int * ranks, Int rank_cnt){
1619 
1620  Int idxStart = 0;
1621  Int idxEnd = rank_cnt;
1622 
1623 
1624 
1625  this->myRoot_ = ranks[0];
1626 
1627  if(this->myRank_==this->myRoot_){
1628  this->myDests_.insert(this->myDests_.begin(),&ranks[1],&ranks[0]+rank_cnt);
1629  }
1630 
1631 #if (defined(REDUCE_VERBOSE))
1632  statusOFS<<"My root is "<<this->myRoot_<<std::endl;
1633  statusOFS<<"My dests are ";
1634  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
1635  statusOFS<<std::endl;
1636 #endif
1637  }
1638 
1639  virtual void Reduce( ){
1640  //add thing to my data
1641  blas::Axpy(this->msgSize_/sizeof(T), ONE<T>(), this->remoteData_[0], 1, this->myData_, 1 );
1642 
1643 
1644 #if (defined(REDUCE_DEBUG))
1645  statusOFS << std::endl << /*"["<<snode.Index<<"]*/" Recv contrib"<< std::endl;
1646  for(int i = 0; i < this->msgSize_/sizeof(T); ++i){
1647  statusOFS<< this->remoteData_[0][i]<< " ";
1648  if(i%3==0){statusOFS<<std::endl;}
1649  }
1650  statusOFS<<std::endl;
1651 
1652  statusOFS << std::endl << /*"["<<snode.Index<<"]*/" Reduce buffer now is"<< std::endl;
1653  for(int i = 0; i < this->msgSize_/sizeof(T); ++i){
1654  statusOFS<< this->myData_[i]<< " ";
1655  if(i%3==0){statusOFS<<std::endl;}
1656  }
1657  statusOFS<<std::endl;
1658 #endif
1659 
1660  }
1661 
1662 
1663 
1664  public:
1665  FTreeReduce(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeReduce<T>(pComm, ranks, rank_cnt, msgSize){
1666  buildTree(ranks,rank_cnt);
1667  }
1668 
1669  virtual void PostFirstRecv()
1670  {
1671 // if(!this->isAllocated_){
1672 // this->AllocRecvBuffers();
1673 // }
1674  if(this->isAllocated_ && this->GetDestCount()>this->numRecvPosted_){
1675  MPI_Irecv( (char*)this->remoteData_[0], this->msgSize_, MPI_BYTE,
1676  MPI_ANY_SOURCE, this->tag_,this->comm_, &this->myRequests_[0] );
1677  this->numRecvPosted_++;
1678  }
1679  }
1680 
1681  virtual void AllocRecvBuffers(){
1682  if(this->GetDestCount()>0){
1683  this->remoteData_.Resize(1);
1684 
1685  this->myRecvBuffers_.Resize(this->msgSize_);
1686 
1687  this->remoteData_[0] = (T*)&(this->myRecvBuffers_[0]);
1688 
1689  this->myRequests_.Resize(1);
1690  SetValue(this->myRequests_,MPI_REQUEST_NULL);
1691  this->myStatuses_.Resize(1);
1692  this->recvIdx_.Resize(1);
1693  }
1694 
1695  this->sendRequest_ = MPI_REQUEST_NULL;
1696 
1697  this->isAllocated_ = true;
1698  }
1699 
1700  virtual bool Progress(){
1701 
1702  if(!this->isAllocated_){
1703  return true;
1704  }
1705 
1706 
1707  if(this->myRank_==this->myRoot_ && this->isAllocated_){
1708  this->isReady_=true;
1709  }
1710 
1711 // if(this->numRecvPosted_==0){
1712 // this->PostFirstRecv();
1713 // }
1714 
1715  bool retVal = this->AccumulationDone();
1716  if(this->isReady_ && !retVal){
1717 
1718  //assert(this->isAllocated_);
1719 
1720  //mpi_test_some on my requests
1721  int recvCount = -1;
1722  int reqCnt = 1;
1723 
1724  MPI_Testsome(reqCnt,&this->myRequests_[0],&recvCount,&this->recvIdx_[0],&this->myStatuses_[0]);
1725  //MPI_Waitsome(reqCnt,&myRequests_[0],&recvCount,&recvIdx_[0],&myStatuses_[0]);
1726  //if something has been received, accumulate and potentially forward it
1727  for(Int i = 0;i<recvCount;++i ){
1728  Int idx = this->recvIdx_[i];
1729 
1730  if(idx!=MPI_UNDEFINED){
1731 
1732  Int size = 0;
1733  MPI_Get_count(&this->myStatuses_[i], MPI_BYTE, &size);
1734 
1735 
1736 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
1737 
1738  statusOFS<<this->myRank_<<" RECVD from "<<this->myStatuses_[i].MPI_SOURCE<<" on tag "<<this->tag_<<std::endl;
1739 #endif
1740  if(size>0){
1741  //If myData is 0, allocate to the size of what has been received
1742  if(this->myData_==NULL){
1743  //assert(size==this->msgSize_);
1744  this->myLocalBuffer_.Resize(this->msgSize_);
1745 
1746  this->myData_ = (T*)&this->myLocalBuffer_[0];
1747  Int nelem = this->msgSize_/sizeof(T);
1748  std::fill(this->myData_,this->myData_+nelem,ZERO<T>());
1749  }
1750 
1751  this->Reduce();
1752  }
1753 
1754  this->numRecv_++;
1755  }
1756  }
1757 
1758  if(recvCount>0){
1759  this->PostFirstRecv();
1760  }
1761  }
1762  else if (this->isReady_ && this->sendRequest_ == MPI_REQUEST_NULL && this->myRoot_ != this->myRank_ && !this->fwded_){
1763  //free the unnecessary arrays
1764  this->myRecvBuffers_.Clear();
1765  this->myRequests_.Clear();
1766  this->myStatuses_.Clear();
1767  this->recvIdx_.Clear();
1768 
1769  //Forward
1770  this->Forward();
1771  retVal = false;
1772  }
1773  else{
1774  retVal = this->IsDone();
1775  if(retVal){
1776  //free the unnecessary arrays
1777  this->myRecvBuffers_.Clear();
1778  this->myRequests_.Clear();
1779  this->myStatuses_.Clear();
1780  this->recvIdx_.Clear();
1781  }
1782  }
1783 
1784  return retVal;
1785  }
1786 
1787 
1788  virtual FTreeReduce * clone() const{
1789  FTreeReduce * out = new FTreeReduce(*this);
1790  return out;
1791  }
1792 
1793 
1794 
1795 };
1796 
1797 
1798 
1799 template< typename T>
1800 class BTreeReduce: public TreeReduce<T>{
1801  protected:
1802  virtual void buildTree(Int * ranks, Int rank_cnt){
1803  Int idxStart = 0;
1804  Int idxEnd = rank_cnt;
1805 
1806 
1807 
1808  Int prevRoot = ranks[0];
1809  while(idxStart<idxEnd){
1810  Int curRoot = ranks[idxStart];
1811  Int listSize = idxEnd - idxStart;
1812 
1813  if(listSize == 1){
1814  if(curRoot == this->myRank_){
1815  this->myRoot_ = prevRoot;
1816  break;
1817  }
1818  }
1819  else{
1820  Int halfList = floor(ceil(double(listSize) / 2.0));
1821  Int idxStartL = idxStart+1;
1822  Int idxStartH = idxStart+halfList;
1823 
1824  if(curRoot == this->myRank_){
1825  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
1826  Int childL = ranks[idxStartL];
1827  Int childR = ranks[idxStartH];
1828 
1829  this->myDests_.push_back(childL);
1830  this->myDests_.push_back(childR);
1831  }
1832  else if ((idxEnd - idxStartH) > 0){
1833  Int childR = ranks[idxStartH];
1834  this->myDests_.push_back(childR);
1835  }
1836  else{
1837  Int childL = ranks[idxStartL];
1838  this->myDests_.push_back(childL);
1839  }
1840  this->myRoot_ = prevRoot;
1841  break;
1842  }
1843 
1844  if( this->myRank_ < ranks[idxStartH]){
1845  idxStart = idxStartL;
1846  idxEnd = idxStartH;
1847  }
1848  else{
1849  idxStart = idxStartH;
1850  }
1851  prevRoot = curRoot;
1852  }
1853 
1854  }
1855 
1856 #if (defined(REDUCE_VERBOSE))
1857  statusOFS<<"My root is "<<this->myRoot_<<std::endl;
1858  statusOFS<<"My dests are ";
1859  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
1860  statusOFS<<std::endl;
1861 #endif
1862  }
1863  public:
1864  BTreeReduce(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeReduce<T>(pComm, ranks, rank_cnt, msgSize){
1865  buildTree(ranks,rank_cnt);
1866  }
1867 
1868  virtual BTreeReduce * clone() const{
1869  BTreeReduce * out = new BTreeReduce(*this);
1870  return out;
1871  }
1872 };
1873 
1874 
1875 template< typename T>
1876 class ModBTreeReduce: public TreeReduce<T>{
1877  protected:
1878  double rseed_;
1879  virtual void buildTree(Int * ranks, Int rank_cnt){
1880 
1881  Int idxStart = 0;
1882  Int idxEnd = rank_cnt;
1883 
1884  //sort the ranks with the modulo like operation
1885  if(rank_cnt>1){
1886  //generate a random position in [1 .. rand_cnt]
1887  //Int new_idx = (int)((rand()+1.0) * (double)rank_cnt / ((double)RAND_MAX+1.0));
1888  //srand(ranks[0]+rank_cnt);
1889  //Int new_idx = rseed_%(rank_cnt-1)+1;
1890 
1891  //Int new_idx = (int)((rank_cnt - 0) * ( (double)this->rseed_ / (double)RAND_MAX ) + 0);// (this->rseed_)%(rank_cnt-1)+1;
1892  //Int new_idx = (Int)rseed_ % (rank_cnt - 1) + 1;
1893  Int new_idx = (int)((rank_cnt - 0) * ( (double)this->rseed_ / (double)RAND_MAX ) + 0);// (this->rseed_)%(rank_cnt-1)+1;
1894  //Int new_idx = (int)((rank_cnt - 0) * ( (double)this->rseed_ / (double)RAND_MAX ) + 0);// (this->rseed_)%(rank_cnt-1)+1;
1895 
1896 
1897  Int * new_start = &ranks[new_idx];
1898 // for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1899 
1900 // Int * new_start = std::lower_bound(&ranks[1],&ranks[0]+rank_cnt,ranks[0]);
1901  //just swap the two chunks r[0] | r[1] --- r[new_start-1] | r[new_start] --- r[end]
1902  // becomes r[0] | r[new_start] --- r[end] | r[1] --- r[new_start-1]
1903  std::rotate(&ranks[1], new_start, &ranks[0]+rank_cnt);
1904 // for(int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<" ";} statusOFS<<std::endl;
1905  }
1906 
1907  Int prevRoot = ranks[0];
1908  while(idxStart<idxEnd){
1909  Int curRoot = ranks[idxStart];
1910  Int listSize = idxEnd - idxStart;
1911 
1912  if(listSize == 1){
1913  if(curRoot == this->myRank_){
1914  this->myRoot_ = prevRoot;
1915  break;
1916  }
1917  }
1918  else{
1919  Int halfList = floor(ceil(double(listSize) / 2.0));
1920  Int idxStartL = idxStart+1;
1921  Int idxStartH = idxStart+halfList;
1922 
1923  if(curRoot == this->myRank_){
1924  if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
1925  Int childL = ranks[idxStartL];
1926  Int childR = ranks[idxStartH];
1927 
1928  this->myDests_.push_back(childL);
1929  this->myDests_.push_back(childR);
1930  }
1931  else if ((idxEnd - idxStartH) > 0){
1932  Int childR = ranks[idxStartH];
1933  this->myDests_.push_back(childR);
1934  }
1935  else{
1936  Int childL = ranks[idxStartL];
1937  this->myDests_.push_back(childL);
1938  }
1939  this->myRoot_ = prevRoot;
1940  break;
1941  }
1942 
1943  //not true anymore ?
1944  //first half to
1945 TIMER_START(FIND_RANK);
1946  Int * pos = std::find(&ranks[idxStartL], &ranks[idxStartH], this->myRank_);
1947 TIMER_STOP(FIND_RANK);
1948  if( pos != &ranks[idxStartH]){
1949  idxStart = idxStartL;
1950  idxEnd = idxStartH;
1951  }
1952  else{
1953  idxStart = idxStartH;
1954  }
1955  prevRoot = curRoot;
1956  }
1957 
1958  }
1959 
1960 #if (defined(REDUCE_VERBOSE))
1961  statusOFS<<"My root is "<<this->myRoot_<<std::endl;
1962  statusOFS<<"My dests are ";
1963  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
1964  statusOFS<<std::endl;
1965 #endif
1966  }
1967  public:
1968  ModBTreeReduce(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed):TreeReduce<T>(pComm, ranks, rank_cnt, msgSize){
1969  this->rseed_ = rseed;
1970  buildTree(ranks,rank_cnt);
1971  }
1972 
1973  virtual void Copy(const ModBTreeReduce & Tree){
1974  ((TreeReduce<T>*)this)->Copy(*((const TreeReduce<T>*)&Tree));
1975  //this->comm_ = Tree.comm_;
1976  //this->myRank_ = Tree.myRank_;
1977  //this->myRoot_ = Tree.myRoot_;
1978  //this->msgSize_ = Tree.msgSize_;
1979 
1980  //this->numRecv_ = Tree.numRecv_;
1981  //this->tag_= Tree.tag_;
1982  //this->mainRoot_= Tree.mainRoot_;
1983  //this->isReady_ = Tree.isReady_;
1984  //this->myDests_ = Tree.myDests_;
1985 
1986 
1987  //this->myData_ = Tree.myData_;
1988  //this->sendRequest_ = Tree.sendRequest_;
1989  //this->fwded_= Tree.fwded_;
1990  //this->isAllocated_= Tree.isAllocated_;
1991  //this->numRecvPosted_= Tree.numRecvPosted_;
1992 
1993  //this->myLocalBuffer_ = Tree.myLocalBuffer_;
1994  //this->myRecvBuffers_ = Tree.myRecvBuffers_;
1995  //this->remoteData_ = Tree.remoteData_;
1996  //this->myRequests_ = Tree.myRequests_;
1997  //this->myStatuses_ = Tree.myStatuses_;
1998  //this->recvIdx_ = Tree.recvIdx_;
1999  this->rseed_ = Tree.rseed_;
2000  }
2001 
2002 
2003 
2004 
2005  virtual ModBTreeReduce * clone() const{
2006  ModBTreeReduce * out = new ModBTreeReduce(*this);
2007  return out;
2008  }
2009 
2010 };
2011 
2012 
2013 template< typename T>
2014 class PalmTreeReduce: public TreeReduce<T>{
2015  protected:
2016 
2017  virtual void buildTree(Int * ranks, Int rank_cnt){
2018  Int numLevel = floor(log2(rank_cnt));
2019  Int numRoots = 0;
2020  for(Int level=0;level<numLevel;++level){
2021  numRoots = std::min( rank_cnt, numRoots + (Int)pow(2,level));
2022  Int numNextRoots = std::min(rank_cnt,numRoots + (Int)pow(2,(level+1)));
2023  Int numReceivers = numNextRoots - numRoots;
2024  for(Int ip = 0; ip<numRoots;++ip){
2025  Int p = ranks[ip];
2026  for(Int ir = ip; ir<numReceivers;ir+=numRoots){
2027  Int r = ranks[numRoots+ir];
2028  if(r==this->myRank_){
2029  this->myRoot_ = p;
2030  }
2031 
2032  if(p==this->myRank_){
2033  this->myDests_.push_back(r);
2034  }
2035  }
2036  }
2037  }
2038 
2039 #if (defined(BCAST_VERBOSE))
2040  statusOFS<<"My root is "<<this->myRoot_<<std::endl;
2041  statusOFS<<"My dests are ";
2042  for(int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<" ";}
2043  statusOFS<<std::endl;
2044 #endif
2045  }
2046 
2047 
2048 
2049  public:
2050  PalmTreeReduce(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):TreeReduce<T>(pComm,ranks,rank_cnt,msgSize){
2051  //build the binary tree;
2052  buildTree(ranks,rank_cnt);
2053  }
2054 
2055 
2056 
2057  virtual void Copy(const PalmTreeReduce & Tree){
2058  ((TreeReduce<T>*)this)->Copy(*((const TreeReduce<T>*)&Tree));
2059  //this->comm_ = Tree.comm_;
2060  //this->myRank_ = Tree.myRank_;
2061  //this->myRoot_ = Tree.myRoot_;
2062  //this->msgSize_ = Tree.msgSize_;
2063 
2064  //this->numRecv_ = Tree.numRecv_;
2065  //this->tag_= Tree.tag_;
2066  //this->mainRoot_= Tree.mainRoot_;
2067  //this->isReady_ = Tree.isReady_;
2068  //this->myDests_ = Tree.myDests_;
2069 
2070 
2071  //this->myData_ = Tree.myData_;
2072  //this->sendRequest_ = Tree.sendRequest_;
2073  //this->fwded_= Tree.fwded_;
2074  //this->isAllocated_= Tree.isAllocated_;
2075  //this->numRecvPosted_= Tree.numRecvPosted_;
2076 
2077  //this->myLocalBuffer_ = Tree.myLocalBuffer_;
2078  //this->myRecvBuffers_ = Tree.myRecvBuffers_;
2079  //this->remoteData_ = Tree.remoteData_;
2080  //this->myRequests_ = Tree.myRequests_;
2081  //this->myStatuses_ = Tree.myStatuses_;
2082  //this->recvIdx_ = Tree.recvIdx_;
2083  //this->rseed_ = Tree.rseed_;
2084  }
2085 
2086 
2087 
2088 
2089  virtual PalmTreeReduce * clone() const{
2090  PalmTreeReduce * out = new PalmTreeReduce(*this);
2091  return out;
2092  }
2093 
2094 };
2095 
2096 
2097 
2098  inline TreeBcast * TreeBcast::Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed){
2099  //get communicator size
2100  Int nprocs = 0;
2101  MPI_Comm_size(pComm, &nprocs);
2102 
2103 
2104 #if defined(FTREE)
2105  return new FTreeBcast(pComm,ranks,rank_cnt,msgSize);
2106 #elif defined(MODBTREE)
2107  return new ModBTreeBcast(pComm,ranks,rank_cnt,msgSize, rseed);
2108 #elif defined(BTREE)
2109  return new BTreeBcast(pComm,ranks,rank_cnt,msgSize);
2110 #elif defined(PALMTREE)
2111  return new PalmTreeBcast(pComm,ranks,rank_cnt,msgSize);
2112 #endif
2113 
2114 
2115 // return new PalmTreeBcast(pComm,ranks,rank_cnt,msgSize);
2116 // return new ModBTreeBcast(pComm,ranks,rank_cnt,msgSize, rseed);
2117 // return new RandBTreeBcast(pComm,ranks,rank_cnt,msgSize);
2118 
2119  if(nprocs<=FTREE_LIMIT){
2120  return new FTreeBcast(pComm,ranks,rank_cnt,msgSize);
2121  }
2122  else{
2123  return new ModBTreeBcast(pComm,ranks,rank_cnt,msgSize, rseed);
2124  }
2125 
2126 
2127 
2128 
2129  }
2130 
2131 
2132 
2133 
2134 template< typename T>
2135  inline TreeReduce<T> * TreeReduce<T>::Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed){
2136  //get communicator size
2137  Int nprocs = 0;
2138  MPI_Comm_size(pComm, &nprocs);
2139 
2140 #if defined(FTREE)
2141  return new FTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2142 #elif defined(MODBTREE)
2143  return new ModBTreeReduce<T>(pComm,ranks,rank_cnt,msgSize, rseed);
2144 #elif defined(BTREE)
2145  return new BTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2146 #elif defined(PALMTREE)
2147  return new PalmTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2148 #endif
2149 
2150 
2151  if(nprocs<=FTREE_LIMIT){
2152 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
2153 statusOFS<<"FLAT TREE USED"<<endl;
2154 #endif
2155  return new FTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2156  }
2157  else{
2158 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
2159 statusOFS<<"BINARY TREE USED"<<endl;
2160 #endif
2161  return new ModBTreeReduce<T>(pComm,ranks,rank_cnt,msgSize, rseed);
2162  //return new BTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2163  }
2164  }
2165 
2166 
2167 #ifdef NEW_BCAST
2168 template< typename T>
2169  inline TreeBcast2<T> * TreeBcast2<T>::Create(const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize, double rseed){
2170  //get communicator size
2171  Int nprocs = 0;
2172  MPI_Comm_size(pComm, &nprocs);
2173 
2174 
2175 
2176 
2177 #if defined(FTREE)
2178  return new FTreeBcast2<T>(pComm,ranks,rank_cnt,msgSize);
2179 #elif defined(MODBTREE)
2180  return new ModBTreeBcast2<T>(pComm,ranks,rank_cnt,msgSize,rseed);
2181 #elif defined(BTREE)
2182  return new BTreeBcast2<T>(pComm,ranks,rank_cnt,msgSize);
2183 #endif
2184 
2185 
2186  if(nprocs<=FTREE_LIMIT){
2187 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
2188 statusOFS<<"FLAT TREE USED"<<endl;
2189 #endif
2190 
2191  return new FTreeBcast2<T>(pComm,ranks,rank_cnt,msgSize);
2192 
2193  }
2194  else{
2195 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
2196 statusOFS<<"BINARY TREE USED"<<endl;
2197 #endif
2198  return new ModBTreeBcast2<T>(pComm,ranks,rank_cnt,msgSize, rseed);
2199 // //return new BTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
2200  }
2201  }
2202 #endif
2203 
2204 
2205 
2206 
2207 
2208 
2209 }
2210 
2211 #endif
Environmental variables.
Definition: TreeBcast.hpp:705
Definition: TreeBcast.hpp:1616
Definition: TreeBcast.hpp:1126
virtual void Copy(const ModBTreeBcast &Tree)
Definition: TreeBcast.hpp:1090
Definition: TreeBcast.hpp:2014
Profiling and timing using TAU.
void SetValue(NumMat< F > &M, F val)
SetValue sets a numerical matrix to a constant val.
Definition: NumMat_impl.hpp:171
Definition: TreeBcast.hpp:843
Definition: TreeBcast.hpp:988
Definition: TreeBcast.hpp:1876
Definition: TreeBcast.hpp:883
Definition: TreeBcast.hpp:1219
Definition: TreeBcast.hpp:1268
Definition: TreeBcast.hpp:1800