learning-at-home · justheuristic · Apr 19, 2021 · Apr 18, 2021 · Apr 18, 2021 · Apr 18, 2021
diff --git a/hivemind/client/averaging/load_balancing.py b/hivemind/client/averaging/load_balancing.py
@@ -6,6 +6,8 @@
 
 logger = get_logger(__name__)
 
+LOAD_BALANCING_LP_DECIMALS = 9
+
 
 def load_balance_peers(vector_size, throughputs: Sequence[Optional[float]], min_size: int = 0) -> Tuple[int, ...]:
     """
@@ -29,7 +31,7 @@ def load_balance_peers(vector_size, throughputs: Sequence[Optional[float]], min_
     return tuple(hagenbach_bishoff(vector_size, scores))
 
 
-def optimize_parts_lp(vector_size: int, throughputs: np.ndarray, min_size: int = 0, eps: float = 1e-15) -> np.ndarray:
+def optimize_parts_lp(vector_size: int, throughputs: np.ndarray, min_size: int = 0) -> np.ndarray:
     """
     This method solves an optimization problem to minimize the total allreduce time.
     In butterfly all-reduce, each peer acts both as a "client" and as an "aggregator":
@@ -47,35 +49,37 @@ def optimize_parts_lp(vector_size: int, throughputs: np.ndarray, min_size: int =
     :returns: a vector of "scores", i-th score is proportional to the fraction of weights assigned to i-th peer
     """
     assert np.all(throughputs >= 0) and np.any(throughputs > 0)
+    throughputs = np.asarray(throughputs, dtype=np.float64)
     permutation = np.argsort(-throughputs)
     throughputs = throughputs[permutation]
     is_nonzero = throughputs != 0
 
     group_size = len(throughputs)
     num_variables = group_size + 1  # [w_1, ..., w_N, xi]
 
-    c = np.zeros(num_variables)
+    c = np.zeros(num_variables, dtype=np.float64)
     c[-1] = 1.0  # optimize w.r.t. xi
 
     # the constraints below are tuples (A, b) such that Ax <= b
-    nonnegative_weights = -np.eye(group_size, M=num_variables), np.zeros(group_size)
+    nonnegative_weights = -np.eye(group_size, num_variables, dtype=c.dtype), np.zeros(group_size, c.dtype)
     weights_sum_to_one = c[None, :] - 1.0, np.array([-1.0])
-    coeff_per_variable = (group_size - 2.0) / np.maximum(throughputs, eps)
-    coeff_matrix_minus_xi = np.hstack([np.diag(coeff_per_variable), -np.ones((group_size, 1))])
+    coeff_per_variable = (group_size - 2.0) / np.maximum(throughputs, 10 ** -LOAD_BALANCING_LP_DECIMALS)
+    coeff_matrix_minus_xi = np.hstack([np.diag(coeff_per_variable), -np.ones((group_size, 1), c.dtype)])
     xi_is_maximum = coeff_matrix_minus_xi[is_nonzero], -1.0 / throughputs[is_nonzero]
-    force_max_weights = np.eye(group_size, M=num_variables), is_nonzero.astype(c.dtype)
+    force_max_weights = np.eye(group_size, M=num_variables, dtype=c.dtype), is_nonzero.astype(c.dtype)
 
     A, b = list(map(np.concatenate, zip(nonnegative_weights, weights_sum_to_one, xi_is_maximum, force_max_weights)))
 
-    solution = scipy.optimize.linprog(c, A_ub=A, b_ub=b)
+    solution = scipy.optimize.linprog(c, A_ub=A, b_ub=b, method='interior-point')
     if solution.success:
         peer_scores = solution.x[:group_size]
         # if some peers have less than min_size elements, transfer their share to other peers (if any)
         if np.max(peer_scores) >= min_size / float(vector_size):
             peer_scores[peer_scores < min_size / float(vector_size)] = 0.0
+        peer_scores = np.round(peer_scores, LOAD_BALANCING_LP_DECIMALS)
     else:
         logger.error(f"Failed to solve load-balancing for bandwidths {throughputs}.")
-        peer_scores = np.ones(group_size)
+        peer_scores = np.ones(group_size, c.dtype)
 
     return peer_scores[np.argsort(permutation)]