Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
address comment using Class to do test, get rid of extraneous test, u…
Browse files Browse the repository at this point in the history
…se PCI-E as fallback for GPUs that are not linked by NVLink
  • Loading branch information
Carl Yang committed Jul 2, 2018
1 parent 3241d71 commit bd926bf
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 52 deletions.
40 changes: 38 additions & 2 deletions src/kvstore/gpu_topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <limits>
#include <random>
#include <stack>
#include <queue>
#include <string>
#include <unordered_set>
#include <unordered_map>
Expand Down Expand Up @@ -82,6 +83,39 @@ inline void PrintTopo(const std::string& str, const std::vector<size_t>& topo_ro
}
}

// Uses BFS to find whether undirected graph is connected or not given its
// adjacency matrix
// Note: only consider matrix values > 1, because we care about whether it is
// connected using only NVLink connections
template <typename T>
inline bool IsConnected(const std::vector<T>& matrix,
int num_gpus) {
int source = 0;
std::vector<bool> visited(num_gpus, false);
std::queue<int> work_list;

work_list.push(source);
visited[source] = true;
while (!work_list.empty()) {
int curr = work_list.front();
work_list.pop();

for (int i = 0; i < num_gpus; ++i) {
int neighbour = matrix[curr*num_gpus + i];
if (i != curr && neighbour > 1 && visited[i] == false) {
visited[i] = true;
work_list.push(i);
}
}
}

for (int i = 0; i < num_gpus; ++i) {
if (visited[i] == false)
return false;
}
return true;
}

// Generate adjacency matrix with row/col numbering from 0, 1, ..., n_gpu
// @input: devs is a vector of GPU contexts
// @output: matrix is adjacency matrix of link topology graph
Expand Down Expand Up @@ -129,9 +163,11 @@ inline void GetP2PWeight(const std::vector<Context>& devs,
max_value = max[i];
}

// If all GPUs have at least 1 NVLink connection, then we can use NVLink only
// If all GPUs are connected by NVLink, then we can use NVLink only
// to communicate instead of going over PCI-E
if (max_value > 0) {
bool connected = IsConnected(*matrix, num_gpus);

if (connected) {
for (auto& matrix_value : *matrix) {
matrix_value = (matrix_value == 1) ? 0 : matrix_value;
}
Expand Down
65 changes: 47 additions & 18 deletions tests/cpp/kvstore/gpu_topology_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -267,21 +267,6 @@ TEST(GpuTopology, TestEwisemult) {
ASSERT_EQ(y[i], correct_y[i]);
}

// ewiseaddTest
TEST(GpuTopology, TestEwiseadd) {
std::vector<int> x(8, 1);
std::vector<int> y(8, 0);
std::iota(y.begin(), y.end(), 0);
int alpha = 5;
std::vector<int> correct_y(8, 0);
std::iota(correct_y.begin(), correct_y.end(), 5);
mxnet::kvstore::ewiseadd(x, alpha, &y);

ASSERT_EQ(y.size(), correct_y.size());
for (unsigned i = 0; i < y.size(); ++i)
ASSERT_EQ(y[i], correct_y[i]);
}

// FindBestMoveTest
TEST(GpuTopology, TestFindBestMove) {
std::vector<int> W = {0, 2, 2, 3, 3, 1, 1, 1,
Expand Down Expand Up @@ -496,10 +481,9 @@ TEST(GpuTopology, TestUpdateWeight) {
ASSERT_EQ(W[i], correct_W[i]);
}
}
// Backtrack
// BacktrackGenerateBinaryTree

// ComputeTreesFromRoot
TEST(GpuTopology, TestComputeTreesFromRoot) {
TEST(GpuTopology, TestComputeTreesFromRoot1) {
std::vector<float> W = {0, 2, 2, 3, 3, 1, 1, 1,
2, 0, 3, 2, 1, 3, 1, 1,
2, 3, 0, 3, 1, 1, 2, 1,
Expand All @@ -524,6 +508,51 @@ TEST(GpuTopology, TestComputeTreesFromRoot) {
ASSERT_EQ(scan.size(), correct_scan_size);
}

// IsConnected
// Test on graph that is "disconnected" by NVLink
TEST(GpuTopology, TestIsConnected1) {
std::vector<float> W = {0, 0, 2, 0,
0, 0, 0, 2,
2, 0, 0, 0,
0, 2, 0, 0};
int num_gpus = 4;

bool connected = mxnet::kvstore::IsConnected(W, num_gpus);

bool correct_connected = false;
ASSERT_EQ(connected, correct_connected);
}

// IsConnected
// Test on graph that is "disconnected" by NVLink
TEST(GpuTopology, TestIsConnected2) {
std::vector<float> W = {1, 1, 2, 1,
1, 1, 1, 2,
2, 1, 1, 1,
1, 2, 1, 1};
int num_gpus = 4;

bool connected = mxnet::kvstore::IsConnected(W, num_gpus);

bool correct_connected = false;
ASSERT_EQ(connected, correct_connected);
}

// IsConnected
// Test on graph that is "disconnected" by NVLink
TEST(GpuTopology, TestIsConnected3) {
std::vector<float> W = {1, 1, 2, 2,
1, 1, 1, 2,
2, 1, 1, 1,
2, 2, 1, 1};
int num_gpus = 4;

bool connected = mxnet::kvstore::IsConnected(W, num_gpus);

bool correct_connected = true;
ASSERT_EQ(connected, correct_connected);
}

// ComputeTreesTest with backtracking
TEST(GpuTopology, TestComputeTrees1) {
std::mt19937 gen(1);
Expand Down
77 changes: 45 additions & 32 deletions tests/python/gpu/test_kvstore_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,21 @@
keys = [5, 7, 11]
str_keys = ['b', 'c', 'd']

class EnvManager:
def __init__(self, key, val):
self._key = key
self._next_val = val
self._prev_val = None

def __enter__(self):
try:
self._prev_val = os.environ[self._key]
except KeyError:
self._prev_val = ""
os.environ[self._key] = self._next_val

def __exit__(self, ptype, value, trace):
os.environ[self._key] = self._prev_val

def init_kv_with_str(stype='default', kv_type='local'):
"""init kv """
Expand Down Expand Up @@ -89,48 +104,46 @@ def check_rsp_pull(kv, count, ctxs, is_same_rowid=False, use_slice=False):

# test fails intermittently. temporarily disabled till it gets fixed. tracked at https://github.com/apache/incubator-mxnet/issues/9384
# check_rsp_push_pull('local')
os.environ["MXNET_KVSTORE_USETREE"] = ""
check_rsp_push_pull('device')
check_rsp_push_pull('device', is_push_cpu=False)
os.environ["MXNET_KVSTORE_USETREE"] = "1"
logging.info("Setting env to use tree reduce...")
check_rsp_push_pull('device')
check_rsp_push_pull('device', is_push_cpu=False)
envs = ["","1"]
key = "MXNET_KVSTORE_USETREE"
for val in envs:
with EnvManager(key, val):
check_rsp_push_pull('device')
check_rsp_push_pull('device', is_push_cpu=False)


def test_row_sparse_pull_single_device():
envs = ["","1"]
for env in envs:
os.environ["MXNET_KVSTORE_USETREE"] = env

kvstore = mx.kv.create('device')
copy = mx.nd.random_normal(shape=(4,4), ctx=mx.gpu(0))
grad = copy.tostype("row_sparse")
key = "MXNET_KVSTORE_USETREE"
for val in envs:
with EnvManager(key, val):
kvstore = mx.kv.create('device')
copy = mx.nd.random_normal(shape=(4,4), ctx=mx.gpu(0))
grad = copy.tostype("row_sparse")

key = 0
kvstore.init(key, grad)
idx = grad.indices
kvstore.push(key, grad)
kvstore.row_sparse_pull(key, out=grad, row_ids=idx)
k = 0
kvstore.init(k, grad)
idx = grad.indices
kvstore.push(k, grad)
kvstore.row_sparse_pull(k, out=grad, row_ids=idx)

assert_almost_equal(grad.asnumpy(), copy.asnumpy())
assert_almost_equal(grad.asnumpy(), copy.asnumpy())


def test_rsp_push_pull_large_rowid():
envs = ["","1"]
for env in envs:
os.environ["MXNET_KVSTORE_USETREE"] = env

num_rows = 793470
val = mx.nd.ones((num_rows, 1)).tostype('row_sparse').copyto(mx.gpu())
kv = mx.kv.create('device')
kv.init('a', val)
out = mx.nd.zeros((num_rows,1), stype='row_sparse').copyto(mx.gpu())
kv.push('a', val)
kv.row_sparse_pull('a', out=out, row_ids=mx.nd.arange(0, num_rows, dtype='int64'))
assert(out.indices.shape[0] == num_rows)

os.environ["MXNET_KVSTORE_USETREE"] = ""
key = "MXNET_KVSTORE_USETREE"
for val in envs:
with EnvManager(key, val):
num_rows = 793470
val = mx.nd.ones((num_rows, 1)).tostype('row_sparse').copyto(mx.gpu())
kv = mx.kv.create('device')
kv.init('a', val)
out = mx.nd.zeros((num_rows,1), stype='row_sparse').copyto(mx.gpu())
kv.push('a', val)
kv.row_sparse_pull('a', out=out, row_ids=mx.nd.arange(0, num_rows, dtype='int64'))
assert(out.indices.shape[0] == num_rows)

if __name__ == '__main__':
import nose
nose.runmodule()

0 comments on commit bd926bf

Please sign in to comment.