Skip to content

Commit

Permalink
refactor test script
Browse files Browse the repository at this point in the history
  • Loading branch information
ScXfjiang committed Feb 5, 2025
1 parent 9bd2cb6 commit da54ddb
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 4 deletions.
8 changes: 4 additions & 4 deletions build_rocm_python3
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ if [ -f /usertools/rocm.bazelrc ]; then
python3 tensorflow/tools/ci_build/update_version.py --nightly --rocm_version &&
bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
./bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm --nightly_flag &&
pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl
pip3 install --upgrade --force-reinstall $TF_PKG_LOC/tf_nightly_rocm*.whl
else
# Remove any previous builds and build release
rm -f $TF_PKG_LOC/tensorflow*.whl
python3 tensorflow/tools/ci_build/update_version.py --rocm_version &&
bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
./bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm --project_name tensorflow_rocm &&
pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl
pip3 install --upgrade --force-reinstall $TF_PKG_LOC/tensorflow*.whl
fi
else
# Legacy style: run configure then build
Expand All @@ -68,12 +68,12 @@ else
rm -f $TF_PKG_LOC/tf_nightly_rocm*.whl
bazel build $RESOURCE_OPTION --config=opt --config=rocm //tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm --nightly_flag &&
pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl
pip3 install --upgrade --force-reinstall $TF_PKG_LOC/tf_nightly_rocm*.whl
else
# Remove any previous builds and build release
rm -f $TF_PKG_LOC/tensorflow*.whl
bazel build $RESOURCE_OPTION --config=opt --config=rocm //tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm &&
pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl
pip3 install --upgrade --force-reinstall $TF_PKG_LOC/tensorflow*.whl
fi
fi
80 changes: 80 additions & 0 deletions reproduce/ks_mismatch_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from __future__ import print_function
import numpy as np
import tensorflow.compat.v1 as tf
import random
import copy

if int(tf.__version__[0]) >= 2:
tf.disable_v2_behavior()


def CompareCpuAndGpu():
# [batch, seqlen, nh, dim] = [80, 128, 8, 64]
[batch, seqlen, nh, dim] = [1, 16, 1, 2]
shape_input = [batch, seqlen, nh, dim]
shape_in_grad = [batch, nh, seqlen, seqlen]

inputs_cpu = {}
inputs_gpu = {}

q_placeholder = tf.placeholder(tf.float32, shape_input)
inputs_cpu[q_placeholder] = [
[
[[random.uniform(-2, 2) for i in range(dim)] for j in range(nh)]
for k in range(seqlen)
]
for t in range(batch)
]
inputs_gpu[q_placeholder] = copy.deepcopy(inputs_cpu[q_placeholder])

k_placeholder = tf.placeholder(tf.float32, shape_input)
inputs_cpu[k_placeholder] = [
[
[[random.uniform(-2, 2) for i in range(dim)] for j in range(nh)]
for k in range(seqlen)
]
for t in range(batch)
]
inputs_gpu[k_placeholder] = copy.deepcopy(inputs_cpu[k_placeholder])

in_grad_placeholder = tf.placeholder(tf.float32, shape_in_grad)
inputs_cpu[in_grad_placeholder] = [
[
[[random.uniform(-2, 2) for i in range(seqlen)] for j in range(seqlen)]
for k in range(nh)
]
for t in range(batch)
]
inputs_gpu[in_grad_placeholder] = copy.deepcopy(inputs_cpu[in_grad_placeholder])

def T(t):
return tf.transpose(t, [0, 2, 1, 3])

def calc_grad_cpu(q, k, in_grad):
with tf.xla.experimental.jit_scope(separate_compiled_gradients=True):
with tf.device("/CPU:0"):
qk = tf.matmul(T(q), T(k), transpose_b=True)
grad_q, grad_k = tf.gradients(qk, [q, k], in_grad)
return [qk, grad_q, grad_k]

def calc_grad_gpu(q, k, in_grad):
with tf.xla.experimental.jit_scope(separate_compiled_gradients=True):
with tf.device("/GPU:0"):
qk = tf.matmul(T(q), T(k), transpose_b=True)
grad_q, grad_k = tf.gradients(qk, [q, k], in_grad)
return [qk, grad_q, grad_k]

sess_config = tf.ConfigProto(allow_soft_placement=False, log_device_placement=False)
sess_config.gpu_options.allow_growth = True
with tf.Session(config=sess_config) as sess:
res1 = sess.run(calc_grad_cpu(q_placeholder, k_placeholder, in_grad_placeholder), feed_dict=inputs_cpu)
res2 = sess.run(calc_grad_gpu(q_placeholder, k_placeholder, in_grad_placeholder), feed_dict=inputs_gpu)

return res1, res2


if __name__ == "__main__":
res1, res2 = CompareCpuAndGpu()
for i in range(len(res1)):
print(np.allclose(res1[i], res2[i], rtol=4e-2, atol=4e-2))

0 comments on commit da54ddb

Please sign in to comment.