#ifndef __BUTTERFLY_ALLREDUCE__ #define __BUTTERFLY_ALLREDUCE__ #include int butterfly_Allreduce(void* local, void* result, int size, MPI_Datatype type, MPI_Op binop, MPI_Comm comm) { /* This stub ignores the input type, acts as if it were MPI_Double. * This stub ignores the input binop, acts as if it were MPI_Sum. */ int r,p; MPI_Status status; MPI_Comm_size(comm, &p); MPI_Comm_rank(comm, &r); double* R = static_cast(result); double* L = static_cast(local); double* R2; //// k = highest power of 2 which is not greater than p. int k; for (k=1; k <= p; k *= 2); k /=2; if ( r < k ) R2 = new double[size]; //// get data from rough edge beyond k if ( r < k && r+k < p ) { MPI_Recv(R2, size, type, r+k, 0, comm, &status); for(int i = 0; i < size; ++i) R[i] = L[i] + R2[i]; } else if ( r >= k ) MPI_Send(L, size, type, r-k, 0, comm); else // p - k <= r < k for(int i = 0; i < size; ++i) R[i] = L[i]; //// now procs [0..k) have R set, and the data from //// procs [k..p) is included. //// butterfly on procs [0..k), where k is an exact power of 2. if ( r < k ) for (int j = 1; j < k; j *= 2) { int rr = (r % (2*j) < j) ? r+j : r-j; MPI_Sendrecv(R, size, type, rr, 0, R2, size, type, rr, 0, comm, &status); for(int i = 0; i < size; ++i) R[i] += R2[i]; } //// give result to rough edge beyond k. if ( r < k && r+k < p ) MPI_Send(R, size, type, r+k, 0, comm); else if ( r >= k ) MPI_Recv(R, size, type, r-k, 0, comm, &status); // /*********** correctness checking code *************/ // if (r != 0) // MPI_Send(R, size, type, 0, 0, comm); // else for (int i = 1; i < p; ++i) // { // MPI_Recv(R2, size, type, i, 0, comm, &status); // for (int j = 0; j < size; ++j) // if ( R[j] != R2[j] ) // cerr << "problem on proc " << i << " in posn " << j // << ": it's R[j] = " << R2[j] // << "but 0's R[j] = " << R[j] << endl; // // } // /*********** end correctness checking code *************/ } #endif