Loading init.c +1 −1 Original line number Diff line number Diff line Loading @@ -366,7 +366,7 @@ void allocate_memory() { gridss_real = (double*) calloc(size_of_grid/2,sizeof(double)); gridss_img = (double*) calloc(size_of_grid/2,sizeof(double)); numa_allocate_shared_windows( Me, size_of_grid*sizeof(double)*1.1, size_of_grid*sizeof(double)*1.1); numa_allocate_shared_windows( &Me, size_of_grid*sizeof(double)*1.1, size_of_grid*sizeof(double)*1.1); // Create destination slab grid = (double*) calloc(size_of_grid,sizeof(double)); Loading numa.c +54 −54 Original line number Diff line number Diff line Loading @@ -75,10 +75,10 @@ int numa_init( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) } int numa_allocate_shared_windows( map_t *Me, MPI_Aint size, MPI_Aint host_size ) int numa_allocate_shared_windows( map_t *me, MPI_Aint size, MPI_Aint host_size ) { int SHMEMl = Me->SHMEMl; int SHMEMl = me->SHMEMl; MPI_Info winfo; MPI_Info_create(&winfo); Loading @@ -100,38 +100,38 @@ int numa_allocate_shared_windows( map_t *Me, MPI_Aint size, MPI_Aint host_size win_host_size = size; Me->win.size = win_host_size; MPI_Win_allocate_shared(Me->win.size, 1, winfo, *Me->COMM[SHMEMl], &(Me->win.ptr), &(Me->win.win)); me->win.size = win_host_size; MPI_Win_allocate_shared(me->win.size, 1, winfo, *me->COMM[SHMEMl], &(me->win.ptr), &(me->win.win)); MPI_Aint wsize = ( Me->Rank[SHMEMl] == 0 ? win_hostmaster_size : 0); MPI_Win_allocate_shared(wsize, 1, winfo, *Me->COMM[SHMEMl], &win_hostmaster_ptr, &win_hostmaster); MPI_Aint wsize = ( me->Rank[SHMEMl] == 0 ? win_hostmaster_size : 0); MPI_Win_allocate_shared(wsize, 1, winfo, *me->COMM[SHMEMl], &win_hostmaster_ptr, &win_hostmaster); Me->swins = (win_t*)malloc(Me->Ntasks[SHMEMl]*sizeof(win_t) ); Me->swins[Me->Rank[SHMEMl]] = Me->win; // Me->swins = (win_t*)malloc(Me->Ntasks[SHMEMl]*sizeof(win_t)); me->swins = (win_t*)malloc(me->Ntasks[SHMEMl]*sizeof(win_t) ); me->swins[me->Rank[SHMEMl]] = me->win; // me->swins = (win_t*)malloc(me->Ntasks[SHMEMl]*sizeof(win_t)); // get the addresses of all the windows from my siblings // at my shared-memory level // for( int t = 0; t < Me->Ntasks[SHMEMl]; t++ ) if( t != Me->Rank[SHMEMl] ) MPI_Win_shared_query( Me->win.win, t, &(Me->swins[t].size), &(Me->swins[t].disp), &(Me->swins[t].ptr) ); for( int t = 0; t < me->Ntasks[SHMEMl]; t++ ) if( t != me->Rank[SHMEMl] ) MPI_Win_shared_query( me->win.win, t, &(me->swins[t].size), &(me->swins[t].disp), &(me->swins[t].ptr) ); if( Me->Rank[SHMEMl] != 0 ) if( me->Rank[SHMEMl] != 0 ) MPI_Win_shared_query( win_hostmaster, 0, &(win_hostmaster_size), &win_hostmaster_disp, &win_hostmaster_ptr ); return 0; } int numa_shutdown( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) int numa_shutdown( int Rank, int Size, MPI_Comm *MYWORLD, map_t *me ) { // free every shared memory and window // MPI_Win_free(&(Me->win.win)); MPI_Win_free(&(me->win.win)); // free all the structures if needed // free(Me->Ranks_to_host); free(Me->swins); free(me->Ranks_to_host); free(me->swins); // anything else // Loading @@ -141,92 +141,92 @@ int numa_shutdown( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) } int numa_build_mapping( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) int numa_build_mapping( int Rank, int Size, MPI_Comm *MYWORLD, map_t *me ) { COMM[WORLD] = *MYWORLD; Me->Ntasks[WORLD] = Size; Me->Rank[WORLD] = Rank; Me->COMM[WORLD] = &COMM[WORLD]; me->Ntasks[WORLD] = Size; me->Rank[WORLD] = Rank; me->COMM[WORLD] = &COMM[WORLD]; Me->mycpu = get_cpu_id(); me->mycpu = get_cpu_id(); // --- find how many hosts we are running on; // that is needed to build the communicator // among the masters of each host // numa_map_hostnames( &COMM[WORLD], Rank, Size, Me ); numa_map_hostnames( &COMM[WORLD], Rank, Size, me ); Me->MAXl = ( Me->Nhosts > 1 ? HOSTS : myHOST ); me->MAXl = ( me->Nhosts > 1 ? HOSTS : myHOST ); // --- create the communicator for each host // MPI_Comm_split( COMM[WORLD], Me->myhost, Me->Rank[WORLD], &COMM[myHOST]); MPI_Comm_split( COMM[WORLD], me->myhost, me->Rank[WORLD], &COMM[myHOST]); MPI_Comm_size( COMM[myHOST], &Size ); MPI_Comm_rank( COMM[myHOST], &Rank ); Me->COMM[myHOST] = &COMM[myHOST]; Me->Rank[myHOST] = Rank; Me->Ntasks[myHOST] = Size; me->COMM[myHOST] = &COMM[myHOST]; me->Rank[myHOST] = Rank; me->Ntasks[myHOST] = Size; // with the following gathering we build-up the mapping Ranks_to_hosts, so that // we know which host each mpi rank (meaning the original rank) belongs to // MPI_Allgather( &Me->myhost, sizeof(Me->myhost), MPI_BYTE, Me->Ranks_to_host, sizeof(Me->myhost), MPI_BYTE, COMM[WORLD] ); MPI_Allgather( &me->myhost, sizeof(me->myhost), MPI_BYTE, me->Ranks_to_host, sizeof(me->myhost), MPI_BYTE, COMM[WORLD] ); Me -> Ranks_to_myhost = (int*)malloc(Me->Ntasks[myHOST]*sizeof(int)); me -> Ranks_to_myhost = (int*)malloc(me->Ntasks[myHOST]*sizeof(int)); MPI_Allgather( &global_rank, sizeof(global_rank), MPI_BYTE, Me->Ranks_to_myhost, sizeof(global_rank), MPI_BYTE, *Me->COMM[myHOST]); me->Ranks_to_myhost, sizeof(global_rank), MPI_BYTE, *me->COMM[myHOST]); // --- create the communicator for the // masters of each host // int Im_host_master = ( Me->Rank[myHOST] == 0 ); MPI_Comm_split( COMM[WORLD], Im_host_master, Me->Rank[WORLD], &COMM[HOSTS]); int Im_host_master = ( me->Rank[myHOST] == 0 ); MPI_Comm_split( COMM[WORLD], Im_host_master, me->Rank[WORLD], &COMM[HOSTS]); // // NOTE: by default, the Rank 0 in WORLD is also Rank 0 in HOSTS // if (Im_host_master) { Me->COMM[HOSTS] = &COMM[HOSTS]; Me->Ntasks[HOSTS] = Me->Nhosts; MPI_Comm_rank( COMM[HOSTS], &(Me->Rank[HOSTS])); me->COMM[HOSTS] = &COMM[HOSTS]; me->Ntasks[HOSTS] = me->Nhosts; MPI_Comm_rank( COMM[HOSTS], &(me->Rank[HOSTS])); } else { Me->COMM[HOSTS] = NULL; Me->Ntasks[HOSTS] = 0; Me->Rank[HOSTS] = -1; me->COMM[HOSTS] = NULL; me->Ntasks[HOSTS] = 0; me->Rank[HOSTS] = -1; } // --- create the communicator for the // numa node // MPI_Comm_split_type( COMM[myHOST], MPI_COMM_TYPE_SHARED, Me->Rank[myHOST], MPI_INFO_NULL, &COMM[NUMA]); Me->COMM[NUMA] = &COMM[NUMA]; MPI_Comm_size( COMM[NUMA], &(Me->Ntasks[NUMA])); MPI_Comm_rank( COMM[NUMA], &(Me->Rank[NUMA])); MPI_Comm_split_type( COMM[myHOST], MPI_COMM_TYPE_SHARED, me->Rank[myHOST], MPI_INFO_NULL, &COMM[NUMA]); me->COMM[NUMA] = &COMM[NUMA]; MPI_Comm_size( COMM[NUMA], &(me->Ntasks[NUMA])); MPI_Comm_rank( COMM[NUMA], &(me->Rank[NUMA])); // check whether NUMA == myHOST and determine // the maximum level of shared memory in the // topology // if ( Me->Ntasks[NUMA] == Me->Ntasks[myHOST] ) if ( me->Ntasks[NUMA] == me->Ntasks[myHOST] ) { // collapse levels from NUMA to myHOST // Me->Ntasks[ISLAND] = Me->Ntasks[NUMA]; // equating to NUMA as we know the rank better via MPI_SHARED Me->Rank[ISLAND] = Me->Rank[NUMA]; Me->COMM[ISLAND] = Me->COMM[NUMA]; me->Ntasks[ISLAND] = me->Ntasks[NUMA]; // equating to NUMA as we know the rank better via MPI_SHARED me->Rank[ISLAND] = me->Rank[NUMA]; me->COMM[ISLAND] = me->COMM[NUMA]; Me->Rank[myHOST] = Me->Rank[NUMA]; Me->COMM[myHOST] = Me->COMM[NUMA]; Me->SHMEMl = myHOST; me->Rank[myHOST] = me->Rank[NUMA]; me->COMM[myHOST] = me->COMM[NUMA]; me->SHMEMl = myHOST; } else { Loading @@ -234,15 +234,15 @@ int numa_build_mapping( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) // at this moment printf(">>> It seems that rank %d belongs to a node for which " " the node topology does not coincide \n", Rank ); Me->SHMEMl = NUMA; me->SHMEMl = NUMA; } int check_SHMEM_level = 1; int globalcheck_SHMEM_level; int globalmax_SHMEM_level; MPI_Allreduce( &(Me->SHMEMl), &globalmax_SHMEM_level, 1, MPI_INT, MPI_MAX, *MYWORLD ); MPI_Allreduce( &(me->SHMEMl), &globalmax_SHMEM_level, 1, MPI_INT, MPI_MAX, *MYWORLD ); check_SHMEM_level = ( (Me->SHMEMl == myHOST) && (globalmax_SHMEM_level == Me->SHMEMl) ); check_SHMEM_level = ( (me->SHMEMl == myHOST) && (globalmax_SHMEM_level == me->SHMEMl) ); MPI_Allreduce( &check_SHMEM_level, &globalcheck_SHMEM_level, 1, MPI_INT, MPI_MAX, *MYWORLD ); Loading Loading
init.c +1 −1 Original line number Diff line number Diff line Loading @@ -366,7 +366,7 @@ void allocate_memory() { gridss_real = (double*) calloc(size_of_grid/2,sizeof(double)); gridss_img = (double*) calloc(size_of_grid/2,sizeof(double)); numa_allocate_shared_windows( Me, size_of_grid*sizeof(double)*1.1, size_of_grid*sizeof(double)*1.1); numa_allocate_shared_windows( &Me, size_of_grid*sizeof(double)*1.1, size_of_grid*sizeof(double)*1.1); // Create destination slab grid = (double*) calloc(size_of_grid,sizeof(double)); Loading
numa.c +54 −54 Original line number Diff line number Diff line Loading @@ -75,10 +75,10 @@ int numa_init( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) } int numa_allocate_shared_windows( map_t *Me, MPI_Aint size, MPI_Aint host_size ) int numa_allocate_shared_windows( map_t *me, MPI_Aint size, MPI_Aint host_size ) { int SHMEMl = Me->SHMEMl; int SHMEMl = me->SHMEMl; MPI_Info winfo; MPI_Info_create(&winfo); Loading @@ -100,38 +100,38 @@ int numa_allocate_shared_windows( map_t *Me, MPI_Aint size, MPI_Aint host_size win_host_size = size; Me->win.size = win_host_size; MPI_Win_allocate_shared(Me->win.size, 1, winfo, *Me->COMM[SHMEMl], &(Me->win.ptr), &(Me->win.win)); me->win.size = win_host_size; MPI_Win_allocate_shared(me->win.size, 1, winfo, *me->COMM[SHMEMl], &(me->win.ptr), &(me->win.win)); MPI_Aint wsize = ( Me->Rank[SHMEMl] == 0 ? win_hostmaster_size : 0); MPI_Win_allocate_shared(wsize, 1, winfo, *Me->COMM[SHMEMl], &win_hostmaster_ptr, &win_hostmaster); MPI_Aint wsize = ( me->Rank[SHMEMl] == 0 ? win_hostmaster_size : 0); MPI_Win_allocate_shared(wsize, 1, winfo, *me->COMM[SHMEMl], &win_hostmaster_ptr, &win_hostmaster); Me->swins = (win_t*)malloc(Me->Ntasks[SHMEMl]*sizeof(win_t) ); Me->swins[Me->Rank[SHMEMl]] = Me->win; // Me->swins = (win_t*)malloc(Me->Ntasks[SHMEMl]*sizeof(win_t)); me->swins = (win_t*)malloc(me->Ntasks[SHMEMl]*sizeof(win_t) ); me->swins[me->Rank[SHMEMl]] = me->win; // me->swins = (win_t*)malloc(me->Ntasks[SHMEMl]*sizeof(win_t)); // get the addresses of all the windows from my siblings // at my shared-memory level // for( int t = 0; t < Me->Ntasks[SHMEMl]; t++ ) if( t != Me->Rank[SHMEMl] ) MPI_Win_shared_query( Me->win.win, t, &(Me->swins[t].size), &(Me->swins[t].disp), &(Me->swins[t].ptr) ); for( int t = 0; t < me->Ntasks[SHMEMl]; t++ ) if( t != me->Rank[SHMEMl] ) MPI_Win_shared_query( me->win.win, t, &(me->swins[t].size), &(me->swins[t].disp), &(me->swins[t].ptr) ); if( Me->Rank[SHMEMl] != 0 ) if( me->Rank[SHMEMl] != 0 ) MPI_Win_shared_query( win_hostmaster, 0, &(win_hostmaster_size), &win_hostmaster_disp, &win_hostmaster_ptr ); return 0; } int numa_shutdown( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) int numa_shutdown( int Rank, int Size, MPI_Comm *MYWORLD, map_t *me ) { // free every shared memory and window // MPI_Win_free(&(Me->win.win)); MPI_Win_free(&(me->win.win)); // free all the structures if needed // free(Me->Ranks_to_host); free(Me->swins); free(me->Ranks_to_host); free(me->swins); // anything else // Loading @@ -141,92 +141,92 @@ int numa_shutdown( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) } int numa_build_mapping( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) int numa_build_mapping( int Rank, int Size, MPI_Comm *MYWORLD, map_t *me ) { COMM[WORLD] = *MYWORLD; Me->Ntasks[WORLD] = Size; Me->Rank[WORLD] = Rank; Me->COMM[WORLD] = &COMM[WORLD]; me->Ntasks[WORLD] = Size; me->Rank[WORLD] = Rank; me->COMM[WORLD] = &COMM[WORLD]; Me->mycpu = get_cpu_id(); me->mycpu = get_cpu_id(); // --- find how many hosts we are running on; // that is needed to build the communicator // among the masters of each host // numa_map_hostnames( &COMM[WORLD], Rank, Size, Me ); numa_map_hostnames( &COMM[WORLD], Rank, Size, me ); Me->MAXl = ( Me->Nhosts > 1 ? HOSTS : myHOST ); me->MAXl = ( me->Nhosts > 1 ? HOSTS : myHOST ); // --- create the communicator for each host // MPI_Comm_split( COMM[WORLD], Me->myhost, Me->Rank[WORLD], &COMM[myHOST]); MPI_Comm_split( COMM[WORLD], me->myhost, me->Rank[WORLD], &COMM[myHOST]); MPI_Comm_size( COMM[myHOST], &Size ); MPI_Comm_rank( COMM[myHOST], &Rank ); Me->COMM[myHOST] = &COMM[myHOST]; Me->Rank[myHOST] = Rank; Me->Ntasks[myHOST] = Size; me->COMM[myHOST] = &COMM[myHOST]; me->Rank[myHOST] = Rank; me->Ntasks[myHOST] = Size; // with the following gathering we build-up the mapping Ranks_to_hosts, so that // we know which host each mpi rank (meaning the original rank) belongs to // MPI_Allgather( &Me->myhost, sizeof(Me->myhost), MPI_BYTE, Me->Ranks_to_host, sizeof(Me->myhost), MPI_BYTE, COMM[WORLD] ); MPI_Allgather( &me->myhost, sizeof(me->myhost), MPI_BYTE, me->Ranks_to_host, sizeof(me->myhost), MPI_BYTE, COMM[WORLD] ); Me -> Ranks_to_myhost = (int*)malloc(Me->Ntasks[myHOST]*sizeof(int)); me -> Ranks_to_myhost = (int*)malloc(me->Ntasks[myHOST]*sizeof(int)); MPI_Allgather( &global_rank, sizeof(global_rank), MPI_BYTE, Me->Ranks_to_myhost, sizeof(global_rank), MPI_BYTE, *Me->COMM[myHOST]); me->Ranks_to_myhost, sizeof(global_rank), MPI_BYTE, *me->COMM[myHOST]); // --- create the communicator for the // masters of each host // int Im_host_master = ( Me->Rank[myHOST] == 0 ); MPI_Comm_split( COMM[WORLD], Im_host_master, Me->Rank[WORLD], &COMM[HOSTS]); int Im_host_master = ( me->Rank[myHOST] == 0 ); MPI_Comm_split( COMM[WORLD], Im_host_master, me->Rank[WORLD], &COMM[HOSTS]); // // NOTE: by default, the Rank 0 in WORLD is also Rank 0 in HOSTS // if (Im_host_master) { Me->COMM[HOSTS] = &COMM[HOSTS]; Me->Ntasks[HOSTS] = Me->Nhosts; MPI_Comm_rank( COMM[HOSTS], &(Me->Rank[HOSTS])); me->COMM[HOSTS] = &COMM[HOSTS]; me->Ntasks[HOSTS] = me->Nhosts; MPI_Comm_rank( COMM[HOSTS], &(me->Rank[HOSTS])); } else { Me->COMM[HOSTS] = NULL; Me->Ntasks[HOSTS] = 0; Me->Rank[HOSTS] = -1; me->COMM[HOSTS] = NULL; me->Ntasks[HOSTS] = 0; me->Rank[HOSTS] = -1; } // --- create the communicator for the // numa node // MPI_Comm_split_type( COMM[myHOST], MPI_COMM_TYPE_SHARED, Me->Rank[myHOST], MPI_INFO_NULL, &COMM[NUMA]); Me->COMM[NUMA] = &COMM[NUMA]; MPI_Comm_size( COMM[NUMA], &(Me->Ntasks[NUMA])); MPI_Comm_rank( COMM[NUMA], &(Me->Rank[NUMA])); MPI_Comm_split_type( COMM[myHOST], MPI_COMM_TYPE_SHARED, me->Rank[myHOST], MPI_INFO_NULL, &COMM[NUMA]); me->COMM[NUMA] = &COMM[NUMA]; MPI_Comm_size( COMM[NUMA], &(me->Ntasks[NUMA])); MPI_Comm_rank( COMM[NUMA], &(me->Rank[NUMA])); // check whether NUMA == myHOST and determine // the maximum level of shared memory in the // topology // if ( Me->Ntasks[NUMA] == Me->Ntasks[myHOST] ) if ( me->Ntasks[NUMA] == me->Ntasks[myHOST] ) { // collapse levels from NUMA to myHOST // Me->Ntasks[ISLAND] = Me->Ntasks[NUMA]; // equating to NUMA as we know the rank better via MPI_SHARED Me->Rank[ISLAND] = Me->Rank[NUMA]; Me->COMM[ISLAND] = Me->COMM[NUMA]; me->Ntasks[ISLAND] = me->Ntasks[NUMA]; // equating to NUMA as we know the rank better via MPI_SHARED me->Rank[ISLAND] = me->Rank[NUMA]; me->COMM[ISLAND] = me->COMM[NUMA]; Me->Rank[myHOST] = Me->Rank[NUMA]; Me->COMM[myHOST] = Me->COMM[NUMA]; Me->SHMEMl = myHOST; me->Rank[myHOST] = me->Rank[NUMA]; me->COMM[myHOST] = me->COMM[NUMA]; me->SHMEMl = myHOST; } else { Loading @@ -234,15 +234,15 @@ int numa_build_mapping( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) // at this moment printf(">>> It seems that rank %d belongs to a node for which " " the node topology does not coincide \n", Rank ); Me->SHMEMl = NUMA; me->SHMEMl = NUMA; } int check_SHMEM_level = 1; int globalcheck_SHMEM_level; int globalmax_SHMEM_level; MPI_Allreduce( &(Me->SHMEMl), &globalmax_SHMEM_level, 1, MPI_INT, MPI_MAX, *MYWORLD ); MPI_Allreduce( &(me->SHMEMl), &globalmax_SHMEM_level, 1, MPI_INT, MPI_MAX, *MYWORLD ); check_SHMEM_level = ( (Me->SHMEMl == myHOST) && (globalmax_SHMEM_level == Me->SHMEMl) ); check_SHMEM_level = ( (me->SHMEMl == myHOST) && (globalmax_SHMEM_level == me->SHMEMl) ); MPI_Allreduce( &check_SHMEM_level, &globalcheck_SHMEM_level, 1, MPI_INT, MPI_MAX, *MYWORLD ); Loading