diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index e5c4343efd..ad92d03d8c 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -845,6 +845,14 @@ static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_open matching_port = j; } rem_port_cnt++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + BTL_VERBOSE(("Using different subnets!")); + if (rem_port_cnt == btl_rank) { + matching_port = j; + } + rem_port_cnt++; + } } } @@ -911,6 +919,13 @@ static int init_ib_proc_nolock(mca_btl_openib_module_t* openib_btl, mca_btl_open break; else rem_port_cnt ++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + if (rem_port_cnt == btl_rank) + break; + else + rem_port_cnt ++; + } } } @@ -977,6 +992,13 @@ static int get_openib_btl_params(mca_btl_openib_module_t* openib_btl, int *port_ rank = port_cnt; } port_cnt++; + } else { + if (mca_btl_openib_component.allow_different_subnets) { + if (openib_btl == mca_btl_openib_component.openib_btls[j]) { + rank = port_cnt; + } + port_cnt++; + } } } *port_cnt_ptr = port_cnt; diff --git a/opal/mca/btl/openib/btl_openib.h b/opal/mca/btl/openib/btl_openib.h index 61f756f81b..5a8e5779a6 100644 --- a/opal/mca/btl/openib/btl_openib.h +++ b/opal/mca/btl/openib/btl_openib.h @@ -293,6 +293,9 @@ struct mca_btl_openib_component_t { char* default_recv_qps; /** GID index to use */ int gid_index; + /* Whether we want to allow connecting processes from different subnets. + * set to 'no' by default */ + bool allow_different_subnets; /** Whether we want a dynamically resizing srq, enabled by default */ bool enable_srq_resize; bool allow_max_memory_registration; diff --git a/opal/mca/btl/openib/btl_openib_mca.c b/opal/mca/btl/openib/btl_openib_mca.c index 8782331e11..ed74780797 100644 --- a/opal/mca/btl/openib/btl_openib_mca.c +++ b/opal/mca/btl/openib/btl_openib_mca.c @@ -703,6 +703,11 @@ int btl_openib_register_mca_params(void) 0, &mca_btl_openib_component.gid_index, REGINT_GE_ZERO)); + CHECK(reg_bool("allow_different_subnets", NULL, + "Allow connecting processes from different IB subnets." + "(0 = do not allow; 1 = allow)", + false, &mca_btl_openib_component.allow_different_subnets)); + #if MEMORY_LINUX_MALLOC_ALIGN_ENABLED tmp = mca_base_var_find ("opal", "memory", "linux", "memalign"); if (0 <= tmp) { diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c index 67a4fb2954..95af1e3de4 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c @@ -2109,7 +2109,8 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, opal_btl_ sin.sin_addr.s_addr = rdmacm_addr; sin.sin_port = (uint16_t) rdmacm_port; #else - rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num, 0, &server->gid); + rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num, + mca_btl_openib_component.gid_index, &server->gid); if (0 != rc) { BTL_ERROR(("local gid query failed")); goto out4;