Loading numa.c 0 → 100644 +393 −0 Original line number Diff line number Diff line #include "allvars.h" #include "proto.h" #include <stdio.h> #include <unistd.h> #include <limits.h> map_t Me; MPI_Comm COMM[HLEVELS]; char *LEVEL_NAMES[HLEVELS] = {"NUMA", "ISLAND", "myHOST", "HOSTS", "WORLD"}; MPI_Aint win_host_master_size = 0; MPI_Aint win_host__size = 0; MPI_Aint win_host_master_size; MPI_Win win_host_master; int win_host_master_disp; void *win_host_master_ptr; int build_numa_mapping( int, int, MPI_Comm *, map_t *); int map_hostnames( MPI_Comm *, int, int, map_t *); int get_cpu_id( void ); int compare_string_int_int( const void *, const void * ); int init_numa( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) { // build up the numa hierarchy // build_numa_mapping( rank, size, MYWORLD, Me ); // initialize sizes for the persistent // shared windos win_host_master_size = WIN_HOST_MASTER_SIZE_DFLT*1024*1024; MPI_Aint win_host_size = WIN_HOST_SIZE_DFLT*1024*1024; // initialize the persistent shared windows // int SHMEMl = Me->SHMEMl; MPI_Info winfo; MPI_Info_create(&winfo); MPI_Info_set(winfo, "alloc_shared_noncontig", "true"); Me->win.size = win_host_size; MPI_Win_allocate_shared(Me->win.size, 1, winfo, *Me->COMM[SHMEMl], &(Me->win.ptr), &(Me->win.win)); MPI_Aint size = ( Me->Rank[SHMEMl] == 0 ? win_host_master_size : 0); MPI_Win_allocate_shared(size, 1, winfo, *Me->COMM[SHMEMl], &win_host_master_ptr, &win_host_master); Me->swins = (win_t*)malloc(Me->Ntasks[SHMEMl]*sizeof(win_t) ); // Me->swins = (win_t*)malloc(Me->Ntasks[SHMEMl]*sizeof(win_t)); // get the addresses of all the windows from my siblings // at my shared-memory level // for( int t = 0; t < Me->Ntasks[SHMEMl]; t++ ) if( t != Me->Rank[SHMEMl] ) MPI_Win_shared_query( Me->win.win, t, &(Me->swins[t].size), &(Me->swins[t].disp), &(Me->swins[t].ptr) ); if( Me->Rank[SHMEMl] != 0 ) MPI_Win_shared_query( win_host_master, 0, &(win_host_master_size), &win_host_master_disp, &win_host_master_ptr ); } int shutdown_numa( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) { // free every shared memory and window // MPI_Win_free(&(Me->win.win)); // free all the structures if needed // free(Me->Ranks_to_host); free(Me->swins); // anything else // // ... } int build_numa_mapping( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) { COMM[WORLD] = *MYWORLD; Me->Ntasks[WORLD] = Size; Me->Rank[WORLD] = Rank; Me->COMM[WORLD] = &COMM[WORLD]; Me->mycpu = get_cpu_id(); // --- find how many hosts we are running on; // that is needed to build the communicator // among the masters of each host // map_hostnames( &COMM[WORLD], Rank, Size, Me ); Me->MAXl = ( Me->Nhosts > 1 ? HOSTS : myHOST ); // --- create the communicator for each host // MPI_Comm_split( COMM[WORLD], Me->myhost, Me->Rank[WORLD], &COMM[myHOST]); MPI_Comm_size( COMM[myHOST], &Size ); MPI_Comm_rank( COMM[myHOST], &Rank ); Me->COMM[myHOST] = &COMM[myHOST]; Me->Rank[myHOST] = Rank; Me->Ntasks[myHOST] = Size; // --- create the communicator for the // masters of each host // int Im_host_master = ( Me->Rank[myHOST] == 0 ); MPI_Comm_split( COMM[WORLD], Im_host_master, Me->Rank[WORLD], &COMM[HOSTS]); // // NOTE: by default, the Rank 0 in WORLD is also Rank 0 in HOSTS // if (Im_host_master) { Me->COMM[HOSTS] = &COMM[HOSTS]; Me->Ntasks[HOSTS] = Me->Nhosts; MPI_Comm_rank( COMM[HOSTS], &(Me->Rank[HOSTS])); } else { Me->COMM[HOSTS] = NULL; Me->Ntasks[HOSTS] = 0; Me->Rank[HOSTS] = -1; } // --- create the communicator for the // numa node // MPI_Comm_split_type( COMM[myHOST], MPI_COMM_TYPE_SHARED, Me->Rank[myHOST], MPI_INFO_NULL, &COMM[NUMA]); Me->COMM[NUMA] = &COMM[NUMA]; MPI_Comm_size( COMM[NUMA], &(Me->Ntasks[NUMA])); MPI_Comm_rank( COMM[NUMA], &(Me->Rank[NUMA])); // check whether NUMA == myHOST and determine // the maximum level of shared memory in the // topology // if ( Me->Ntasks[NUMA] == Me->Ntasks[myHOST] ) { // collapse levels from NUMA to myHOST // Me->Ntasks[ISLAND] = Me->Ntasks[NUMA]; // equating to NUMA as we know the rank better via MPI_SHARED Me->Rank[ISLAND] = Me->Rank[NUMA]; Me->COMM[ISLAND] = Me->COMM[NUMA]; Me->Rank[myHOST] = Me->Rank[NUMA]; Me->COMM[myHOST] = Me->COMM[NUMA]; Me->SHMEMl = myHOST; } else { // actually we do not care for this case // at this moment printf(">>> It seems that rank %d belongs to a node for which " " the node topology does not coincide \n", Rank ); Me->SHMEMl = NUMA; } int check_SHMEM_level = 1; int globalcheck_SHMEM_level; int globalmax_SHMEM_level; MPI_Allreduce( &(Me->SHMEMl), &globalmax_SHMEM_level, 1, MPI_INT, MPI_MAX, *MYWORLD ); check_SHMEM_level = ( (Me->SHMEMl == myHOST) && (globalmax_SHMEM_level == Me->SHMEMl) ); MPI_Allreduce( &check_SHMEM_level, &globalcheck_SHMEM_level, 1, MPI_INT, MPI_MAX, *MYWORLD ); if( globalcheck_SHMEM_level < 1 ) { if( Rank == 0 ) { printf("There was an error in determining the topology hierarchy, " "SHMEM level is different for different MPI tasks\n"); return -1; } } return 0; } int map_hostnames( MPI_Comm *MY_WORLD, // the communicator to refer to int Rank, // the initial rank of the calling process in MYWORLD int Ntasks, // the number of tasks in MY_WORLD map_t *me) // address of the info structure for the calling task { // -------------------------------------------------- // --- init some global vars me -> Ranks_to_host = (int*)malloc(Ntasks*sizeof(int)); //me -> Ranks_to_host = (int*)malloc(Ntasks*sizeof(int)); me -> Nhosts = 0; me -> myhost = -1; // -------------------------------------------------- // --- find how many hosts we are using char myhostname[HOST_NAME_MAX+1]; gethostname( myhostname, HOST_NAME_MAX+1 ); // determine how much space to book for hostnames int myhostlen = strlen(myhostname)+1; int maxhostlen = 0; MPI_Allreduce ( &myhostlen, &maxhostlen, 1, MPI_INT, MPI_MAX, *MY_WORLD ); // collect hostnames // typedef struct { char hostname[maxhostlen]; int rank; } hostname_rank_t; hostname_rank_t mydata; hostname_rank_t *alldata = (hostname_rank_t*)calloc( Ntasks, sizeof(hostname_rank_t) ); mydata.rank = Rank; sprintf( mydata.hostname, "%s", myhostname); MPI_Allgather( &mydata, sizeof(hostname_rank_t), MPI_BYTE, alldata, sizeof(hostname_rank_t), MPI_BYTE, *MY_WORLD ); // sort the hostnames // 1) set the lenght of string for comparison int dummy = maxhostlen; compare_string_int_int( NULL, &dummy ); // 2) actually sort qsort( alldata, Ntasks, sizeof(hostname_rank_t), compare_string_int_int ); // now the array alldata is sorted by hostname, and inside each hostname the processes // running on each host are sorted by their node, and for each node they are sorted // by ht. // As a direct consequence, the running index on the alldata array can be considered // as the new global rank of each process // --- count how many diverse hosts we have, and register each rank to its host, so that // we can alway find all the tasks with their original rank char *prev = alldata[0].hostname; for ( int R = 0; R < Ntasks; R++ ) { if ( strcmp(alldata[R].hostname, prev) != 0 ) { me->Nhosts++; prev = alldata[R].hostname; } if ( alldata[R].rank == Rank ) // it's me me->myhost = me->Nhosts; // remember my host } me->Nhosts++; // with the following gathering we build-up the mapping Ranks_to_hosts, so that // we know which host each mpi rank (meaning the original rank) belongs to // MPI_Allgather( &me->myhost, sizeof(me->myhost), MPI_BYTE, me->Ranks_to_host, sizeof(me->myhost), MPI_BYTE, *MY_WORLD ); free( alldata ); return me->Nhosts; } int compare_string_int_int( const void *A, const void *B ) // used to sort structures made as // { char *s; // int b; // ... } // The sorting is hierarchical by *s first, then b // if necessary // The length of *s is set by calling // compare_string_int_int( NULL, len ) // before to use this routine in qsort-like calls { static int str_len = 0; if ( A == NULL ) { str_len = *(int*)B + 1; return 0; } // we do not use strncmp because str_len=0, // i.e. using this function without initializing it, // can be used to have a sorting only on // strings int order = strcmp( (char*)A, (char*)B ); if ( str_len && (!order) ) { int a = *(int*)((char*)A + str_len); int b = *(int*)((char*)B + str_len); order = a - b; if( !order ) { int a = *((int*)((char*)A + str_len)+1); int b = *((int*)((char*)B + str_len)+1); order = a - b; } } return order; } #define CPU_ID_ENTRY_IN_PROCSTAT 39 int read_proc__self_stat( int, int * ); int get_cpu_id( void ) { #if defined(_GNU_SOURCE) // GNU SOURCE ------------ return sched_getcpu( ); #else #ifdef SYS_getcpu // direct sys call --- int cpuid; if ( syscall( SYS_getcpu, &cpuid, NULL, NULL ) == -1 ) return -1; else return cpuid; #else unsigned val; if ( read_proc__self_stat( CPU_ID_ENTRY_IN_PROCSTAT, &val ) == -1 ) return -1; return (int)val; #endif // ----------------------- #endif } int read_proc__self_stat( int field, int *ret_val ) /* Other interesting fields: pid : 0 father : 1 utime : 13 cutime : 14 nthreads : 18 rss : 22 cpuid : 39 read man /proc page for fully detailed infos */ { // not used, just mnemonic // char *table[ 52 ] = { [0]="pid", [1]="father", [13]="utime", [14]="cutime", [18]="nthreads", [22]="rss", [38]="cpuid"}; *ret_val = 0; FILE *file = fopen( "/proc/self/stat", "r" ); if (file == NULL ) return -1; char *line = NULL; int ret; size_t len; ret = getline( &line, &len, file ); fclose(file); if( ret == -1 ) return -1; char *savetoken = line; char *token = strtok_r( line, " ", &savetoken); --field; do { token = strtok_r( NULL, " ", &savetoken); field--; } while( field ); *ret_val = atoi(token); free(line); return 0; } numa.h 0 → 100644 +52 −0 Original line number Diff line number Diff line #define NUMA 0 // my NUMA node communicator, includes all the sibling tasks that share memory #define ISLAND 1 // something between the host and the NUMA nodes, if present #define myHOST 2 // my host communicator, includes all the sibling tasks running on the same hosts #define HOSTS 3 // the communicator that includes only the masters of the hosts #define WORLD 4 // everybody is in (i.e. this is MPI_COMM_WORLD) #define HLEVELS 5 extern char *LEVEL_NAMES[HLEVELS]; typedef struct { MPI_Win win; MPI_Aint size; void *ptr; int disp; } win_t; typedef struct { int mycpu; // the core (hwthread) on which i'm running int nthreads; // how many (omp) thread do i have int myhost; // the host on which i'm running int Nhosts; int Ntasks[HLEVELS]; int *Ranks_to_host; // check if it is needed int Rank[HLEVELS]; int MAXl; // the maximum level of the hierarchy int SHMEMl; // the maximum hierarchy level that is in shared memory MPI_Comm *COMM[HLEVELS]; // ----------------------- // not yet used // int mynode; // the numa node on which i'm running // int ntasks_in_my_node; win_t win; // my shared-memory window win_t *swins; // the shared-memory windows of ther tasks in my host } map_t; extern map_t Me; extern MPI_Comm COMM[HLEVELS]; #define WIN_HOST_SIZE_DFLT 100 // in MB #define WIN_HOST_MASTER_SIZE_DFLT 100 // in MB extern MPI_Aint win_host_master_size; extern MPI_Win win_host_master; extern int win_host_master_disp; extern void *win_host_master_ptr; Loading
numa.c 0 → 100644 +393 −0 Original line number Diff line number Diff line #include "allvars.h" #include "proto.h" #include <stdio.h> #include <unistd.h> #include <limits.h> map_t Me; MPI_Comm COMM[HLEVELS]; char *LEVEL_NAMES[HLEVELS] = {"NUMA", "ISLAND", "myHOST", "HOSTS", "WORLD"}; MPI_Aint win_host_master_size = 0; MPI_Aint win_host__size = 0; MPI_Aint win_host_master_size; MPI_Win win_host_master; int win_host_master_disp; void *win_host_master_ptr; int build_numa_mapping( int, int, MPI_Comm *, map_t *); int map_hostnames( MPI_Comm *, int, int, map_t *); int get_cpu_id( void ); int compare_string_int_int( const void *, const void * ); int init_numa( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) { // build up the numa hierarchy // build_numa_mapping( rank, size, MYWORLD, Me ); // initialize sizes for the persistent // shared windos win_host_master_size = WIN_HOST_MASTER_SIZE_DFLT*1024*1024; MPI_Aint win_host_size = WIN_HOST_SIZE_DFLT*1024*1024; // initialize the persistent shared windows // int SHMEMl = Me->SHMEMl; MPI_Info winfo; MPI_Info_create(&winfo); MPI_Info_set(winfo, "alloc_shared_noncontig", "true"); Me->win.size = win_host_size; MPI_Win_allocate_shared(Me->win.size, 1, winfo, *Me->COMM[SHMEMl], &(Me->win.ptr), &(Me->win.win)); MPI_Aint size = ( Me->Rank[SHMEMl] == 0 ? win_host_master_size : 0); MPI_Win_allocate_shared(size, 1, winfo, *Me->COMM[SHMEMl], &win_host_master_ptr, &win_host_master); Me->swins = (win_t*)malloc(Me->Ntasks[SHMEMl]*sizeof(win_t) ); // Me->swins = (win_t*)malloc(Me->Ntasks[SHMEMl]*sizeof(win_t)); // get the addresses of all the windows from my siblings // at my shared-memory level // for( int t = 0; t < Me->Ntasks[SHMEMl]; t++ ) if( t != Me->Rank[SHMEMl] ) MPI_Win_shared_query( Me->win.win, t, &(Me->swins[t].size), &(Me->swins[t].disp), &(Me->swins[t].ptr) ); if( Me->Rank[SHMEMl] != 0 ) MPI_Win_shared_query( win_host_master, 0, &(win_host_master_size), &win_host_master_disp, &win_host_master_ptr ); } int shutdown_numa( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) { // free every shared memory and window // MPI_Win_free(&(Me->win.win)); // free all the structures if needed // free(Me->Ranks_to_host); free(Me->swins); // anything else // // ... } int build_numa_mapping( int Rank, int Size, MPI_Comm *MYWORLD, map_t *Me ) { COMM[WORLD] = *MYWORLD; Me->Ntasks[WORLD] = Size; Me->Rank[WORLD] = Rank; Me->COMM[WORLD] = &COMM[WORLD]; Me->mycpu = get_cpu_id(); // --- find how many hosts we are running on; // that is needed to build the communicator // among the masters of each host // map_hostnames( &COMM[WORLD], Rank, Size, Me ); Me->MAXl = ( Me->Nhosts > 1 ? HOSTS : myHOST ); // --- create the communicator for each host // MPI_Comm_split( COMM[WORLD], Me->myhost, Me->Rank[WORLD], &COMM[myHOST]); MPI_Comm_size( COMM[myHOST], &Size ); MPI_Comm_rank( COMM[myHOST], &Rank ); Me->COMM[myHOST] = &COMM[myHOST]; Me->Rank[myHOST] = Rank; Me->Ntasks[myHOST] = Size; // --- create the communicator for the // masters of each host // int Im_host_master = ( Me->Rank[myHOST] == 0 ); MPI_Comm_split( COMM[WORLD], Im_host_master, Me->Rank[WORLD], &COMM[HOSTS]); // // NOTE: by default, the Rank 0 in WORLD is also Rank 0 in HOSTS // if (Im_host_master) { Me->COMM[HOSTS] = &COMM[HOSTS]; Me->Ntasks[HOSTS] = Me->Nhosts; MPI_Comm_rank( COMM[HOSTS], &(Me->Rank[HOSTS])); } else { Me->COMM[HOSTS] = NULL; Me->Ntasks[HOSTS] = 0; Me->Rank[HOSTS] = -1; } // --- create the communicator for the // numa node // MPI_Comm_split_type( COMM[myHOST], MPI_COMM_TYPE_SHARED, Me->Rank[myHOST], MPI_INFO_NULL, &COMM[NUMA]); Me->COMM[NUMA] = &COMM[NUMA]; MPI_Comm_size( COMM[NUMA], &(Me->Ntasks[NUMA])); MPI_Comm_rank( COMM[NUMA], &(Me->Rank[NUMA])); // check whether NUMA == myHOST and determine // the maximum level of shared memory in the // topology // if ( Me->Ntasks[NUMA] == Me->Ntasks[myHOST] ) { // collapse levels from NUMA to myHOST // Me->Ntasks[ISLAND] = Me->Ntasks[NUMA]; // equating to NUMA as we know the rank better via MPI_SHARED Me->Rank[ISLAND] = Me->Rank[NUMA]; Me->COMM[ISLAND] = Me->COMM[NUMA]; Me->Rank[myHOST] = Me->Rank[NUMA]; Me->COMM[myHOST] = Me->COMM[NUMA]; Me->SHMEMl = myHOST; } else { // actually we do not care for this case // at this moment printf(">>> It seems that rank %d belongs to a node for which " " the node topology does not coincide \n", Rank ); Me->SHMEMl = NUMA; } int check_SHMEM_level = 1; int globalcheck_SHMEM_level; int globalmax_SHMEM_level; MPI_Allreduce( &(Me->SHMEMl), &globalmax_SHMEM_level, 1, MPI_INT, MPI_MAX, *MYWORLD ); check_SHMEM_level = ( (Me->SHMEMl == myHOST) && (globalmax_SHMEM_level == Me->SHMEMl) ); MPI_Allreduce( &check_SHMEM_level, &globalcheck_SHMEM_level, 1, MPI_INT, MPI_MAX, *MYWORLD ); if( globalcheck_SHMEM_level < 1 ) { if( Rank == 0 ) { printf("There was an error in determining the topology hierarchy, " "SHMEM level is different for different MPI tasks\n"); return -1; } } return 0; } int map_hostnames( MPI_Comm *MY_WORLD, // the communicator to refer to int Rank, // the initial rank of the calling process in MYWORLD int Ntasks, // the number of tasks in MY_WORLD map_t *me) // address of the info structure for the calling task { // -------------------------------------------------- // --- init some global vars me -> Ranks_to_host = (int*)malloc(Ntasks*sizeof(int)); //me -> Ranks_to_host = (int*)malloc(Ntasks*sizeof(int)); me -> Nhosts = 0; me -> myhost = -1; // -------------------------------------------------- // --- find how many hosts we are using char myhostname[HOST_NAME_MAX+1]; gethostname( myhostname, HOST_NAME_MAX+1 ); // determine how much space to book for hostnames int myhostlen = strlen(myhostname)+1; int maxhostlen = 0; MPI_Allreduce ( &myhostlen, &maxhostlen, 1, MPI_INT, MPI_MAX, *MY_WORLD ); // collect hostnames // typedef struct { char hostname[maxhostlen]; int rank; } hostname_rank_t; hostname_rank_t mydata; hostname_rank_t *alldata = (hostname_rank_t*)calloc( Ntasks, sizeof(hostname_rank_t) ); mydata.rank = Rank; sprintf( mydata.hostname, "%s", myhostname); MPI_Allgather( &mydata, sizeof(hostname_rank_t), MPI_BYTE, alldata, sizeof(hostname_rank_t), MPI_BYTE, *MY_WORLD ); // sort the hostnames // 1) set the lenght of string for comparison int dummy = maxhostlen; compare_string_int_int( NULL, &dummy ); // 2) actually sort qsort( alldata, Ntasks, sizeof(hostname_rank_t), compare_string_int_int ); // now the array alldata is sorted by hostname, and inside each hostname the processes // running on each host are sorted by their node, and for each node they are sorted // by ht. // As a direct consequence, the running index on the alldata array can be considered // as the new global rank of each process // --- count how many diverse hosts we have, and register each rank to its host, so that // we can alway find all the tasks with their original rank char *prev = alldata[0].hostname; for ( int R = 0; R < Ntasks; R++ ) { if ( strcmp(alldata[R].hostname, prev) != 0 ) { me->Nhosts++; prev = alldata[R].hostname; } if ( alldata[R].rank == Rank ) // it's me me->myhost = me->Nhosts; // remember my host } me->Nhosts++; // with the following gathering we build-up the mapping Ranks_to_hosts, so that // we know which host each mpi rank (meaning the original rank) belongs to // MPI_Allgather( &me->myhost, sizeof(me->myhost), MPI_BYTE, me->Ranks_to_host, sizeof(me->myhost), MPI_BYTE, *MY_WORLD ); free( alldata ); return me->Nhosts; } int compare_string_int_int( const void *A, const void *B ) // used to sort structures made as // { char *s; // int b; // ... } // The sorting is hierarchical by *s first, then b // if necessary // The length of *s is set by calling // compare_string_int_int( NULL, len ) // before to use this routine in qsort-like calls { static int str_len = 0; if ( A == NULL ) { str_len = *(int*)B + 1; return 0; } // we do not use strncmp because str_len=0, // i.e. using this function without initializing it, // can be used to have a sorting only on // strings int order = strcmp( (char*)A, (char*)B ); if ( str_len && (!order) ) { int a = *(int*)((char*)A + str_len); int b = *(int*)((char*)B + str_len); order = a - b; if( !order ) { int a = *((int*)((char*)A + str_len)+1); int b = *((int*)((char*)B + str_len)+1); order = a - b; } } return order; } #define CPU_ID_ENTRY_IN_PROCSTAT 39 int read_proc__self_stat( int, int * ); int get_cpu_id( void ) { #if defined(_GNU_SOURCE) // GNU SOURCE ------------ return sched_getcpu( ); #else #ifdef SYS_getcpu // direct sys call --- int cpuid; if ( syscall( SYS_getcpu, &cpuid, NULL, NULL ) == -1 ) return -1; else return cpuid; #else unsigned val; if ( read_proc__self_stat( CPU_ID_ENTRY_IN_PROCSTAT, &val ) == -1 ) return -1; return (int)val; #endif // ----------------------- #endif } int read_proc__self_stat( int field, int *ret_val ) /* Other interesting fields: pid : 0 father : 1 utime : 13 cutime : 14 nthreads : 18 rss : 22 cpuid : 39 read man /proc page for fully detailed infos */ { // not used, just mnemonic // char *table[ 52 ] = { [0]="pid", [1]="father", [13]="utime", [14]="cutime", [18]="nthreads", [22]="rss", [38]="cpuid"}; *ret_val = 0; FILE *file = fopen( "/proc/self/stat", "r" ); if (file == NULL ) return -1; char *line = NULL; int ret; size_t len; ret = getline( &line, &len, file ); fclose(file); if( ret == -1 ) return -1; char *savetoken = line; char *token = strtok_r( line, " ", &savetoken); --field; do { token = strtok_r( NULL, " ", &savetoken); field--; } while( field ); *ret_val = atoi(token); free(line); return 0; }
numa.h 0 → 100644 +52 −0 Original line number Diff line number Diff line #define NUMA 0 // my NUMA node communicator, includes all the sibling tasks that share memory #define ISLAND 1 // something between the host and the NUMA nodes, if present #define myHOST 2 // my host communicator, includes all the sibling tasks running on the same hosts #define HOSTS 3 // the communicator that includes only the masters of the hosts #define WORLD 4 // everybody is in (i.e. this is MPI_COMM_WORLD) #define HLEVELS 5 extern char *LEVEL_NAMES[HLEVELS]; typedef struct { MPI_Win win; MPI_Aint size; void *ptr; int disp; } win_t; typedef struct { int mycpu; // the core (hwthread) on which i'm running int nthreads; // how many (omp) thread do i have int myhost; // the host on which i'm running int Nhosts; int Ntasks[HLEVELS]; int *Ranks_to_host; // check if it is needed int Rank[HLEVELS]; int MAXl; // the maximum level of the hierarchy int SHMEMl; // the maximum hierarchy level that is in shared memory MPI_Comm *COMM[HLEVELS]; // ----------------------- // not yet used // int mynode; // the numa node on which i'm running // int ntasks_in_my_node; win_t win; // my shared-memory window win_t *swins; // the shared-memory windows of ther tasks in my host } map_t; extern map_t Me; extern MPI_Comm COMM[HLEVELS]; #define WIN_HOST_SIZE_DFLT 100 // in MB #define WIN_HOST_MASTER_SIZE_DFLT 100 // in MB extern MPI_Aint win_host_master_size; extern MPI_Win win_host_master; extern int win_host_master_disp; extern void *win_host_master_ptr;