1 #ifndef _PEXSI_TREE_HPP_
2 #define _PEXSI_TREE_HPP_
15 #define FTREE_LIMIT 24
40 void SetGlobalComm(
const MPI_Comm & pGComm){
41 MPI_Comm_rank(pGComm,&myGRank_);
42 MPI_Group group2 = MPI_GROUP_NULL;
43 MPI_Comm_group(pGComm, &group2);
44 MPI_Group group1 = MPI_GROUP_NULL;
45 MPI_Comm_group(comm_, &group1);
48 MPI_Comm_size(comm_,&size);
50 vector<int> Lranks(size);
51 for(
int i = 0; i<size;++i){Lranks[i]=i;}
52 MPI_Group_translate_ranks(group1, size, &Lranks[0],group2, &Granks_[0]);
58 virtual void buildTree(Int * ranks, Int rank_cnt)=0;
61 comm_ = MPI_COMM_WORLD;
72 TreeBcast(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt,Int msgSize){
74 MPI_Comm_rank(comm_,&myRank_);
88 virtual void Copy(
const TreeBcast & Tree){
90 myRank_ = Tree.myRank_;
91 myRoot_ = Tree.myRoot_;
92 msgSize_ = Tree.msgSize_;
94 numRecv_ = Tree.numRecv_;
96 mainRoot_= Tree.mainRoot_;
97 isReady_ = Tree.isReady_;
98 myDests_ = Tree.myDests_;
105 static TreeBcast * Create(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,
double rseed);
107 virtual inline Int GetNumRecvMsg(){
return numRecv_;}
108 virtual inline Int GetNumMsgToRecv(){
return 1;}
109 inline void SetDataReady(
bool rdy){ isReady_=rdy;}
110 inline void SetTag(Int tag){ tag_ = tag;}
113 Int * GetDests(){
return &myDests_[0];}
114 Int GetDest(Int i){
return myDests_[i];}
115 Int GetDestCount(){
return myDests_.size();}
116 Int GetRoot(){
return myRoot_;}
117 Int GetMsgSize(){
return msgSize_;}
119 void ForwardMessage(
char * data,
size_t size,
int tag, MPI_Request * requests ){
120 for( Int idxRecv = 0; idxRecv < myDests_.size(); ++idxRecv ){
121 Int iProc = myDests_[idxRecv];
123 MPI_Isend( data, size, MPI_BYTE,
124 iProc, tag,comm_, &requests[2*iProc+1] );
127 PROFILE_COMM(myGRank_,Granks_[iProc],tag,msgSize_);
137 virtual void buildTree(Int * ranks, Int rank_cnt){
140 Int idxEnd = rank_cnt;
146 if(myRank_==myRoot_){
147 myDests_.insert(myDests_.begin(),&ranks[1],&ranks[0]+rank_cnt);
150 #if ( _DEBUGlevel_ >= 1 )
151 statusOFS<<
"My root is "<<myRoot_<<std::endl;
152 statusOFS<<
"My dests are ";
153 for(
int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<
" ";}
154 statusOFS<<std::endl;
161 FTreeBcast(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):
TreeBcast(pComm,ranks,rank_cnt,msgSize){
163 buildTree(ranks,rank_cnt);
200 virtual void buildTree(Int * ranks, Int rank_cnt){
203 Int idxEnd = rank_cnt;
207 Int prevRoot = ranks[0];
208 while(idxStart<idxEnd){
209 Int curRoot = ranks[idxStart];
210 Int listSize = idxEnd - idxStart;
213 if(curRoot == myRank_){
219 Int halfList = floor(ceil(
double(listSize) / 2.0));
220 Int idxStartL = idxStart+1;
221 Int idxStartH = idxStart+halfList;
223 if(curRoot == myRank_){
224 if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
225 Int childL = ranks[idxStartL];
226 Int childR = ranks[idxStartH];
228 myDests_.push_back(childL);
229 myDests_.push_back(childR);
231 else if ((idxEnd - idxStartH) > 0){
232 Int childR = ranks[idxStartH];
233 myDests_.push_back(childR);
236 Int childL = ranks[idxStartL];
237 myDests_.push_back(childL);
243 if( myRank_ < ranks[idxStartH]){
244 idxStart = idxStartL;
248 idxStart = idxStartH;
255 #if ( _DEBUGlevel_ >= 1 )
256 statusOFS<<
"My root is "<<myRoot_<<std::endl;
257 statusOFS<<
"My dests are ";
258 for(
int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<
" ";}
259 statusOFS<<std::endl;
266 BTreeBcast(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):
TreeBcast(pComm,ranks,rank_cnt,msgSize){
268 buildTree(ranks,rank_cnt);
284 virtual void buildTree(Int * ranks, Int rank_cnt){
287 Int idxEnd = rank_cnt;
294 Int new_idx = (int)((rank_cnt - 0) * ( (double)this->rseed_ / (
double)RAND_MAX ) + 0);
299 Int * new_start = &ranks[new_idx];
306 std::rotate(&ranks[1], new_start, &ranks[0]+rank_cnt);
311 Int prevRoot = ranks[0];
312 while(idxStart<idxEnd){
313 Int curRoot = ranks[idxStart];
314 Int listSize = idxEnd - idxStart;
317 if(curRoot == myRank_){
323 Int halfList = floor(ceil(
double(listSize) / 2.0));
324 Int idxStartL = idxStart+1;
325 Int idxStartH = idxStart+halfList;
327 if(curRoot == myRank_){
328 if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
329 Int childL = ranks[idxStartL];
330 Int childR = ranks[idxStartH];
332 myDests_.push_back(childL);
333 myDests_.push_back(childR);
335 else if ((idxEnd - idxStartH) > 0){
336 Int childR = ranks[idxStartH];
337 myDests_.push_back(childR);
340 Int childL = ranks[idxStartL];
341 myDests_.push_back(childL);
349 TIMER_START(FIND_RANK);
350 Int * pos = std::find(&ranks[idxStartL], &ranks[idxStartH], myRank_);
351 TIMER_STOP(FIND_RANK);
352 if( pos != &ranks[idxStartH]){
353 idxStart = idxStartL;
357 idxStart = idxStartH;
364 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
365 statusOFS<<
"My root is "<<myRoot_<<std::endl;
366 statusOFS<<
"My dests are ";
367 for(
int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<
" ";}
368 statusOFS<<std::endl;
375 ModBTreeBcast(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,
double rseed):
TreeBcast(pComm,ranks,rank_cnt,msgSize){
378 buildTree(ranks,rank_cnt);
383 myRank_ = Tree.myRank_;
384 myRoot_ = Tree.myRoot_;
385 msgSize_ = Tree.msgSize_;
387 numRecv_ = Tree.numRecv_;
389 mainRoot_= Tree.mainRoot_;
390 isReady_ = Tree.isReady_;
391 myDests_ = Tree.myDests_;
393 rseed_ = Tree.rseed_;
394 myRank_ = Tree.myRank_;
395 myRoot_ = Tree.myRoot_;
396 msgSize_ = Tree.msgSize_;
398 numRecv_ = Tree.numRecv_;
400 mainRoot_= Tree.mainRoot_;
401 isReady_ = Tree.isReady_;
402 myDests_ = Tree.myDests_;
415 virtual void buildTree(Int * ranks, Int rank_cnt){
418 Int idxEnd = rank_cnt;
422 for(
int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<
" ";} statusOFS<<std::endl;
424 std::random_shuffle(&ranks[1],&ranks[0]+rank_cnt);
425 for(
int i =0;i<rank_cnt;++i){statusOFS<<ranks[i]<<
" ";} statusOFS<<std::endl;
429 Int prevRoot = ranks[0];
430 while(idxStart<idxEnd){
431 Int curRoot = ranks[idxStart];
432 Int listSize = idxEnd - idxStart;
435 if(curRoot == myRank_){
441 Int halfList = floor(ceil(
double(listSize) / 2.0));
442 Int idxStartL = idxStart+1;
443 Int idxStartH = idxStart+halfList;
445 if(curRoot == myRank_){
446 if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
447 Int childL = ranks[idxStartL];
448 Int childR = ranks[idxStartH];
450 myDests_.push_back(childL);
451 myDests_.push_back(childR);
453 else if ((idxEnd - idxStartH) > 0){
454 Int childR = ranks[idxStartH];
455 myDests_.push_back(childR);
458 Int childL = ranks[idxStartL];
459 myDests_.push_back(childL);
467 Int * pos = std::find(&ranks[idxStartL], &ranks[idxStartH], myRank_);
468 if( pos != &ranks[idxStartH]){
469 idxStart = idxStartL;
473 idxStart = idxStartH;
480 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
481 statusOFS<<
"My root is "<<myRoot_<<std::endl;
482 statusOFS<<
"My dests are ";
483 for(
int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<
" ";}
484 statusOFS<<std::endl;
491 RandBTreeBcast(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):
TreeBcast(pComm,ranks,rank_cnt,msgSize){
493 buildTree(ranks,rank_cnt);
508 virtual void buildTree(Int * ranks, Int rank_cnt){
509 Int numLevel = floor(log2(rank_cnt));
511 for(Int level=0;level<numLevel;++level){
512 numRoots = std::min( rank_cnt, numRoots + (Int)pow(2,level));
513 Int numNextRoots = std::min(rank_cnt,numRoots + (Int)pow(2,(level+1)));
514 Int numReceivers = numNextRoots - numRoots;
515 for(Int ip = 0; ip<numRoots;++ip){
517 for(Int ir = ip; ir<numReceivers;ir+=numRoots){
518 Int r = ranks[numRoots+ir];
524 myDests_.push_back(r);
530 #if ( _DEBUGlevel_ >= 1 )
531 statusOFS<<
"My root is "<<myRoot_<<std::endl;
532 statusOFS<<
"My dests are ";
533 for(
int i =0;i<myDests_.size();++i){statusOFS<<myDests_[i]<<
" ";}
534 statusOFS<<std::endl;
541 PalmTreeBcast(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):
TreeBcast(pComm,ranks,rank_cnt,msgSize){
543 buildTree(ranks,rank_cnt);
554 template<
typename T>
559 MPI_Request sendRequest_;
575 TreeReduce(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):
TreeBcast(pComm,ranks,rank_cnt,msgSize){
577 sendRequest_ = MPI_REQUEST_NULL;
591 this->comm_ = Tree.comm_;
592 this->myRank_ = Tree.myRank_;
593 this->myRoot_ = Tree.myRoot_;
594 this->msgSize_ = Tree.msgSize_;
596 this->numRecv_ = Tree.numRecv_;
597 this->tag_= Tree.tag_;
598 this->mainRoot_= Tree.mainRoot_;
599 this->isReady_ = Tree.isReady_;
600 this->myDests_ = Tree.myDests_;
603 this->myData_ = Tree.myData_;
604 this->sendRequest_ = Tree.sendRequest_;
605 this->fwded_= Tree.fwded_;
606 this->isAllocated_= Tree.isAllocated_;
607 this->numRecvPosted_= Tree.numRecvPosted_;
609 this->myLocalBuffer_ = Tree.myLocalBuffer_;
610 this->myRecvBuffers_ = Tree.myRecvBuffers_;
611 this->remoteData_ = Tree.remoteData_;
612 this->myRequests_ = Tree.myRequests_;
613 this->myStatuses_ = Tree.myStatuses_;
614 this->recvIdx_ = Tree.recvIdx_;
619 bool IsAllocated(){
return isAllocated_;}
626 static TreeReduce<T> * Create(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,
double rseed);
628 virtual inline Int GetNumMsgToRecv(){
return GetDestCount();}
630 virtual void AllocRecvBuffers(){
631 remoteData_.Resize(GetDestCount());
638 myRecvBuffers_.Resize(GetDestCount()*msgSize_);
641 for( Int idxRecv = 0; idxRecv < GetDestCount(); ++idxRecv ){
642 remoteData_[idxRecv] = (T*)&(myRecvBuffers_[idxRecv*msgSize_]);
647 myRequests_.Resize(GetDestCount());
648 SetValue(myRequests_,MPI_REQUEST_NULL);
649 myStatuses_.Resize(GetDestCount());
650 recvIdx_.Resize(GetDestCount());
652 sendRequest_ = MPI_REQUEST_NULL;
657 void CleanupBuffers(){
658 myLocalBuffer_.Clear();
685 sendRequest_ = MPI_REQUEST_NULL;
694 void SetLocalBuffer(T * locBuffer){
695 if(myData_!=NULL && myData_!=locBuffer){
696 blas::Axpy(msgSize_/
sizeof(T), ONE<T>(), myData_, 1, locBuffer, 1 );
697 myLocalBuffer_.Clear();
703 inline bool AccumulationDone(){
704 if(myRank_==myRoot_ && isAllocated_){
707 return isReady_ && (numRecv_ == GetDestCount());
711 inline bool IsDone(){
712 if(myRank_==myRoot_ && isAllocated_){
716 bool retVal = AccumulationDone();
717 if(myRoot_ != myRank_ && !fwded_){
721 if (retVal && myRoot_ != myRank_ && fwded_){
724 MPI_Test(&sendRequest_,&flag,MPI_STATUS_IGNORE);
732 virtual bool Progress(){
737 if(myRank_==myRoot_ && isAllocated_){
745 bool retVal = AccumulationDone();
746 if(isReady_ && !retVal){
752 int reqCnt = GetDestCount();
753 assert(reqCnt == myRequests_.m());
754 MPI_Testsome(reqCnt,&myRequests_[0],&recvCount,&recvIdx_[0],&myStatuses_[0]);
756 for(Int i = 0;i<recvCount;++i ){
757 Int idx = recvIdx_[i];
759 if(idx!=MPI_UNDEFINED){
762 MPI_Get_count(&myStatuses_[i], MPI_BYTE, &size);
765 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
766 statusOFS<<myRank_<<
" RECVD from "<<myStatuses_[i].MPI_SOURCE<<
" on tag "<<tag_<<std::endl;
772 myLocalBuffer_.Resize(msgSize_);
774 myData_ = (T*)&myLocalBuffer_[0];
775 Int nelem = +msgSize_/
sizeof(T);
776 std::fill(myData_,myData_+nelem,ZERO<T>());
789 else if (isReady_ && sendRequest_ == MPI_REQUEST_NULL && myRoot_ != myRank_ && !fwded_){
791 myRecvBuffers_.Clear();
806 myRecvBuffers_.Clear();
823 T * GetLocalBuffer(){
829 void CopyLocalBuffer(T* destBuffer){
830 std::copy((
char*)myData_,(
char*)myData_+GetMsgSize(),(
char*)destBuffer);
834 virtual void PostFirstRecv()
836 if(this->GetDestCount()>this->numRecvPosted_){
837 for( Int idxRecv = 0; idxRecv < myDests_.size(); ++idxRecv ){
838 Int iProc = myDests_[idxRecv];
840 MPI_Irecv( (
char*)remoteData_[idxRecv], msgSize_, MPI_BYTE,
841 iProc, tag_,comm_, &myRequests_[idxRecv] );
842 this->numRecvPosted_++;
851 void Reduce( Int idxRecv, Int idReq){
853 blas::Axpy(msgSize_/
sizeof(T), ONE<T>(), remoteData_[idxRecv], 1, myData_, 1 );
861 MPI_Isend( NULL, 0, MPI_BYTE,
862 iProc, tag_,comm_, &sendRequest_ );
864 PROFILE_COMM(myGRank_,Granks_[iProc],tag_,0);
868 MPI_Isend( (
char*)myData_, msgSize_, MPI_BYTE,
869 iProc, tag_,comm_, &sendRequest_ );
871 PROFILE_COMM(myGRank_,Granks_[iProc],tag_,msgSize_);
875 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
876 statusOFS<<myRank_<<
" FWD to "<<iProc<<
" on tag "<<tag_<<std::endl;
886 template<
typename T>
889 virtual void buildTree(Int * ranks, Int rank_cnt){
892 Int idxEnd = rank_cnt;
896 this->myRoot_ = ranks[0];
898 if(this->myRank_==this->myRoot_){
899 this->myDests_.insert(this->myDests_.begin(),&ranks[1],&ranks[0]+rank_cnt);
902 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
903 statusOFS<<
"My root is "<<this->myRoot_<<std::endl;
904 statusOFS<<
"My dests are ";
905 for(
int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<
" ";}
906 statusOFS<<std::endl;
912 blas::Axpy(this->msgSize_/
sizeof(T), ONE<T>(), this->remoteData_[0], 1, this->myData_, 1 );
918 FTreeReduce(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):
TreeReduce<T>(pComm, ranks, rank_cnt, msgSize){
919 buildTree(ranks,rank_cnt);
922 virtual void PostFirstRecv()
927 if(this->isAllocated_ && this->GetDestCount()>this->numRecvPosted_){
928 MPI_Irecv( (
char*)this->remoteData_[0], this->msgSize_, MPI_BYTE,
929 MPI_ANY_SOURCE, this->tag_,this->comm_, &this->myRequests_[0] );
930 this->numRecvPosted_++;
934 virtual void AllocRecvBuffers(){
935 if(this->GetDestCount()>0){
936 this->remoteData_.Resize(1);
938 this->myRecvBuffers_.Resize(this->msgSize_);
940 this->remoteData_[0] = (T*)&(this->myRecvBuffers_[0]);
942 this->myRequests_.Resize(1);
943 SetValue(this->myRequests_,MPI_REQUEST_NULL);
944 this->myStatuses_.Resize(1);
945 this->recvIdx_.Resize(1);
948 this->sendRequest_ = MPI_REQUEST_NULL;
950 this->isAllocated_ =
true;
953 virtual bool Progress(){
955 if(!this->isAllocated_){
960 if(this->myRank_==this->myRoot_ && this->isAllocated_){
968 bool retVal = this->AccumulationDone();
969 if(this->isReady_ && !retVal){
977 MPI_Testsome(reqCnt,&this->myRequests_[0],&recvCount,&this->recvIdx_[0],&this->myStatuses_[0]);
980 for(Int i = 0;i<recvCount;++i ){
981 Int idx = this->recvIdx_[i];
983 if(idx!=MPI_UNDEFINED){
986 MPI_Get_count(&this->myStatuses_[i], MPI_BYTE, &size);
989 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
991 statusOFS<<this->myRank_<<
" RECVD from "<<this->myStatuses_[i].MPI_SOURCE<<
" on tag "<<this->tag_<<std::endl;
995 if(this->myData_==NULL){
997 this->myLocalBuffer_.Resize(this->msgSize_);
999 this->myData_ = (T*)&this->myLocalBuffer_[0];
1000 Int nelem = this->msgSize_/
sizeof(T);
1001 std::fill(this->myData_,this->myData_+nelem,ZERO<T>());
1012 this->PostFirstRecv();
1015 else if (this->isReady_ && this->sendRequest_ == MPI_REQUEST_NULL && this->myRoot_ != this->myRank_ && !this->fwded_){
1017 this->myRecvBuffers_.Clear();
1018 this->myRequests_.Clear();
1019 this->myStatuses_.Clear();
1020 this->recvIdx_.Clear();
1027 retVal = this->IsDone();
1030 this->myRecvBuffers_.Clear();
1031 this->myRequests_.Clear();
1032 this->myStatuses_.Clear();
1033 this->recvIdx_.Clear();
1052 template<
typename T>
1055 virtual void buildTree(Int * ranks, Int rank_cnt){
1057 Int idxEnd = rank_cnt;
1061 Int prevRoot = ranks[0];
1062 while(idxStart<idxEnd){
1063 Int curRoot = ranks[idxStart];
1064 Int listSize = idxEnd - idxStart;
1067 if(curRoot == this->myRank_){
1068 this->myRoot_ = prevRoot;
1073 Int halfList = floor(ceil(
double(listSize) / 2.0));
1074 Int idxStartL = idxStart+1;
1075 Int idxStartH = idxStart+halfList;
1077 if(curRoot == this->myRank_){
1078 if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
1079 Int childL = ranks[idxStartL];
1080 Int childR = ranks[idxStartH];
1082 this->myDests_.push_back(childL);
1083 this->myDests_.push_back(childR);
1085 else if ((idxEnd - idxStartH) > 0){
1086 Int childR = ranks[idxStartH];
1087 this->myDests_.push_back(childR);
1090 Int childL = ranks[idxStartL];
1091 this->myDests_.push_back(childL);
1093 this->myRoot_ = prevRoot;
1097 if( this->myRank_ < ranks[idxStartH]){
1098 idxStart = idxStartL;
1102 idxStart = idxStartH;
1109 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
1110 statusOFS<<
"My root is "<<this->myRoot_<<std::endl;
1111 statusOFS<<
"My dests are ";
1112 for(
int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<
" ";}
1113 statusOFS<<std::endl;
1117 BTreeReduce(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize):
TreeReduce<T>(pComm, ranks, rank_cnt, msgSize){
1118 buildTree(ranks,rank_cnt);
1128 template<
typename T>
1132 virtual void buildTree(Int * ranks, Int rank_cnt){
1135 Int idxEnd = rank_cnt;
1143 Int new_idx = (int)((rank_cnt - 0) * ( (double)this->rseed_ / (
double)RAND_MAX ) + 0);
1146 Int * new_start = &ranks[new_idx];
1152 std::rotate(&ranks[1], new_start, &ranks[0]+rank_cnt);
1156 Int prevRoot = ranks[0];
1157 while(idxStart<idxEnd){
1158 Int curRoot = ranks[idxStart];
1159 Int listSize = idxEnd - idxStart;
1162 if(curRoot == this->myRank_){
1163 this->myRoot_ = prevRoot;
1168 Int halfList = floor(ceil(
double(listSize) / 2.0));
1169 Int idxStartL = idxStart+1;
1170 Int idxStartH = idxStart+halfList;
1172 if(curRoot == this->myRank_){
1173 if ((idxEnd - idxStartH) > 0 && (idxStartH - idxStartL)>0){
1174 Int childL = ranks[idxStartL];
1175 Int childR = ranks[idxStartH];
1177 this->myDests_.push_back(childL);
1178 this->myDests_.push_back(childR);
1180 else if ((idxEnd - idxStartH) > 0){
1181 Int childR = ranks[idxStartH];
1182 this->myDests_.push_back(childR);
1185 Int childL = ranks[idxStartL];
1186 this->myDests_.push_back(childL);
1188 this->myRoot_ = prevRoot;
1194 TIMER_START(FIND_RANK);
1195 Int * pos = std::find(&ranks[idxStartL], &ranks[idxStartH], this->myRank_);
1196 TIMER_STOP(FIND_RANK);
1197 if( pos != &ranks[idxStartH]){
1198 idxStart = idxStartL;
1202 idxStart = idxStartH;
1209 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
1210 statusOFS<<
"My root is "<<this->myRoot_<<std::endl;
1211 statusOFS<<
"My dests are ";
1212 for(
int i =0;i<this->myDests_.size();++i){statusOFS<<this->myDests_[i]<<
" ";}
1213 statusOFS<<std::endl;
1217 ModBTreeReduce(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,
double rseed):
TreeReduce<T>(pComm, ranks, rank_cnt, msgSize){
1218 this->rseed_ = rseed;
1219 buildTree(ranks,rank_cnt);
1223 this->comm_ = Tree.comm_;
1224 this->myRank_ = Tree.myRank_;
1225 this->myRoot_ = Tree.myRoot_;
1226 this->msgSize_ = Tree.msgSize_;
1228 this->numRecv_ = Tree.numRecv_;
1229 this->tag_= Tree.tag_;
1230 this->mainRoot_= Tree.mainRoot_;
1231 this->isReady_ = Tree.isReady_;
1232 this->myDests_ = Tree.myDests_;
1235 this->myData_ = Tree.myData_;
1236 this->sendRequest_ = Tree.sendRequest_;
1237 this->fwded_= Tree.fwded_;
1238 this->isAllocated_= Tree.isAllocated_;
1239 this->numRecvPosted_= Tree.numRecvPosted_;
1241 this->myLocalBuffer_ = Tree.myLocalBuffer_;
1242 this->myRecvBuffers_ = Tree.myRecvBuffers_;
1243 this->remoteData_ = Tree.remoteData_;
1244 this->myRequests_ = Tree.myRequests_;
1245 this->myStatuses_ = Tree.myStatuses_;
1246 this->recvIdx_ = Tree.recvIdx_;
1247 this->rseed_ = Tree.rseed_;
1262 inline TreeBcast * TreeBcast::Create(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,
double rseed){
1265 MPI_Comm_size(pComm, &nprocs);
1271 if(nprocs<=FTREE_LIMIT){
1272 return new FTreeBcast(pComm,ranks,rank_cnt,msgSize);
1275 return new ModBTreeBcast(pComm,ranks,rank_cnt,msgSize, rseed);
1287 template<
typename T>
1288 inline TreeReduce<T> * TreeReduce<T>::Create(
const MPI_Comm & pComm, Int * ranks, Int rank_cnt, Int msgSize,
double rseed){
1291 MPI_Comm_size(pComm, &nprocs);
1294 if(nprocs<=FTREE_LIMIT){
1295 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
1296 statusOFS<<
"FLAT TREE USED"<<endl;
1298 return new FTreeReduce<T>(pComm,ranks,rank_cnt,msgSize);
1301 #if ( _DEBUGlevel_ >= 1 ) || defined(REDUCE_VERBOSE)
1302 statusOFS<<
"BINARY TREE USED"<<endl;
1304 return new ModBTreeReduce<T>(pComm,ranks,rank_cnt,msgSize, rseed);
Definition: TreeBcast.hpp:22
Definition: TreeBcast.hpp:887
Definition: TreeBcast.hpp:413
Profiling and timing using TAU.
void SetValue(NumMat< F > &M, F val)
SetValue sets a numerical matrix to a constant val.
Definition: NumMat_impl.hpp:171
Definition: TreeBcast.hpp:135
Definition: TreeBcast.hpp:280
Definition: TreeBcast.hpp:1129
Definition: TreeBcast.hpp:175
Definition: TreeBcast.hpp:506
Definition: TreeBcast.hpp:555
Definition: TreeBcast.hpp:1053