// this is file testbutterfly.cc -bds 10/2002 #include #include #include #include "butterfly.h" /* The butterfly_Allreduce is found in porsche:~saunders/butterfly.h. It only works for datatype double, binop +. */ // time_it() is a tool for timing the various all-reduction functions. // The implementation is at the end of this file. void time_it(int (*reduce_func)(void*, void*, int, int, int, int), char* reducer_name, int reps, void* data, void* result, int size, MPI_Datatype type, MPI_Op binop, int root, MPI_Comm comm); main(int argc, char* argv[]) { MPI_Init(&argc, &argv); /////////////////////////////////////////////////////////////////// // Create a communicator which excludes process 0 (i.e. porsche). //// get group MPI_Group world_group; MPI_Comm_group(MPI_COMM_WORLD, &world_group); //// set up vector of ranks for new comm int wp; // size of world comm. MPI_Comm_size(MPI_COMM_WORLD, &wp); vector ranks(wp-1); for (int i = 1; i < wp; ++i) ranks[i-1] = i; //// make new group MPI_Group rack_group; MPI_Group_incl(world_group, wp-1, &ranks[0], &rack_group); //// make new comm MPI_Comm rack; MPI_Comm_create(MPI_COMM_WORLD, rack_group, &rack); /////////////////////////////////////////////////////////////////// // reduction timing tests int wr; // my rank in world comm. MPI_Comm_rank(MPI_COMM_WORLD, &wr); if (wr != 0) // leave porsche out of it altogether. { int r; MPI_Comm_rank(rack, &r); double data = r + 1, result, result2; ///////////////////////////////////////////// // reductions where there is one data item // int reps = 1; // Case 1: Using MPI's built-in reduce function. time_it(MPI_Allreduce, "Case 1a: MPI_Allreduce", reps, &data, &result, 1, MPI_DOUBLE, MPI_SUM, 0, rack); time_it(MPI_Allreduce, " 1a: MPI_Allreduce", reps, &data, &result, 1, MPI_DOUBLE, MPI_SUM, 0, rack); // Case 3: Using a reduce built from a butterfly pattern of send-recv's. time_it(butterfly_Allreduce, " 2a: butterfly_Allreduce", reps, &data, &result2, 1, MPI_DOUBLE, MPI_SUM, 0, rack); time_it(butterfly_Allreduce, " 2a: butterfly_Allreduce", reps, &data, &result2, 1, MPI_DOUBLE, MPI_SUM, 0, rack); ////////////////////////////////////////////////////////// // experiments on reductions using large arrays of data // int n = 100000; double D[n]; for(int i = 0; i < n; ++i) D[i] = r+1; double R[n]; double R2[n]; time_it(MPI_Allreduce, " 1b: MPI_Allreduce", reps, D, R, n, MPI_DOUBLE, MPI_SUM, 0, rack); time_it(MPI_Allreduce, " 1b: MPI_Allreduce", reps, D, R, n, MPI_DOUBLE, MPI_SUM, 0, rack); time_it(butterfly_Allreduce, " 2b: butterfly_Allreduce", reps, D, R2, n, MPI_DOUBLE, MPI_SUM, 0, rack); time_it(butterfly_Allreduce, " 2b: butterfly_Allreduce", reps, D, R2, n, MPI_DOUBLE, MPI_SUM, 0, rack); } MPI_Finalize(); } // time_it() definition: void time_it(int (*reduce_func)(void*, void*, int, int, int, int), char* reducer_name, int reps, void* data, void* result, int size, MPI_Datatype type, MPI_Op binop, int root, MPI_Comm comm) /* Known bugs: for the printout part, time_it() assumes the result datatype is double and it shows just the last value in the result array. */ { int r; MPI_Comm_rank(comm, &r); double start_time, elapsed_time; MPI_Barrier(comm); if (r == root) start_time = MPI_Wtime(); for (int i = 0; i < reps; ++i) reduce_func(data, result, size, type, binop, comm); if (r == root) { elapsed_time = MPI_Wtime() - start_time; cout << reducer_name << " computed " << static_cast(result)[size-1]; cout << " repeated " << reps << " times"; cout << " in " << elapsed_time << " seconds." << endl; } } // time_it()