From da54ddb7fa462ac85a94e37be3ef133c9ef927d2 Mon Sep 17 00:00:00 2001 From: scxfjiang Date: Thu, 6 Feb 2025 03:39:02 +0800 Subject: [PATCH] refactor test script --- build_rocm_python3 | 8 ++-- reproduce/ks_mismatch_demo.py | 80 +++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 reproduce/ks_mismatch_demo.py diff --git a/build_rocm_python3 b/build_rocm_python3 index 27eedd2b916313..2a04f4d90dc2dc 100755 --- a/build_rocm_python3 +++ b/build_rocm_python3 @@ -50,14 +50,14 @@ if [ -f /usertools/rocm.bazelrc ]; then python3 tensorflow/tools/ci_build/update_version.py --nightly --rocm_version && bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:build_pip_package --verbose_failures && ./bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm --nightly_flag && - pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl + pip3 install --upgrade --force-reinstall $TF_PKG_LOC/tf_nightly_rocm*.whl else # Remove any previous builds and build release rm -f $TF_PKG_LOC/tensorflow*.whl python3 tensorflow/tools/ci_build/update_version.py --rocm_version && bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:build_pip_package --verbose_failures && ./bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm --project_name tensorflow_rocm && - pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl + pip3 install --upgrade --force-reinstall $TF_PKG_LOC/tensorflow*.whl fi else # Legacy style: run configure then build @@ -68,12 +68,12 @@ else rm -f $TF_PKG_LOC/tf_nightly_rocm*.whl bazel build $RESOURCE_OPTION --config=opt --config=rocm //tensorflow/tools/pip_package:build_pip_package --verbose_failures && bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm --nightly_flag && - pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl + pip3 install --upgrade --force-reinstall $TF_PKG_LOC/tf_nightly_rocm*.whl else # Remove any previous builds and build release rm -f $TF_PKG_LOC/tensorflow*.whl bazel build $RESOURCE_OPTION --config=opt --config=rocm //tensorflow/tools/pip_package:build_pip_package --verbose_failures && bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm && - pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl + pip3 install --upgrade --force-reinstall $TF_PKG_LOC/tensorflow*.whl fi fi diff --git a/reproduce/ks_mismatch_demo.py b/reproduce/ks_mismatch_demo.py new file mode 100644 index 00000000000000..878cfb8a1230fa --- /dev/null +++ b/reproduce/ks_mismatch_demo.py @@ -0,0 +1,80 @@ +from __future__ import print_function +import numpy as np +import tensorflow.compat.v1 as tf +import random +import copy + +if int(tf.__version__[0]) >= 2: + tf.disable_v2_behavior() + + +def CompareCpuAndGpu(): + # [batch, seqlen, nh, dim] = [80, 128, 8, 64] + [batch, seqlen, nh, dim] = [1, 16, 1, 2] + shape_input = [batch, seqlen, nh, dim] + shape_in_grad = [batch, nh, seqlen, seqlen] + + inputs_cpu = {} + inputs_gpu = {} + + q_placeholder = tf.placeholder(tf.float32, shape_input) + inputs_cpu[q_placeholder] = [ + [ + [[random.uniform(-2, 2) for i in range(dim)] for j in range(nh)] + for k in range(seqlen) + ] + for t in range(batch) + ] + inputs_gpu[q_placeholder] = copy.deepcopy(inputs_cpu[q_placeholder]) + + k_placeholder = tf.placeholder(tf.float32, shape_input) + inputs_cpu[k_placeholder] = [ + [ + [[random.uniform(-2, 2) for i in range(dim)] for j in range(nh)] + for k in range(seqlen) + ] + for t in range(batch) + ] + inputs_gpu[k_placeholder] = copy.deepcopy(inputs_cpu[k_placeholder]) + + in_grad_placeholder = tf.placeholder(tf.float32, shape_in_grad) + inputs_cpu[in_grad_placeholder] = [ + [ + [[random.uniform(-2, 2) for i in range(seqlen)] for j in range(seqlen)] + for k in range(nh) + ] + for t in range(batch) + ] + inputs_gpu[in_grad_placeholder] = copy.deepcopy(inputs_cpu[in_grad_placeholder]) + + def T(t): + return tf.transpose(t, [0, 2, 1, 3]) + + def calc_grad_cpu(q, k, in_grad): + with tf.xla.experimental.jit_scope(separate_compiled_gradients=True): + with tf.device("/CPU:0"): + qk = tf.matmul(T(q), T(k), transpose_b=True) + grad_q, grad_k = tf.gradients(qk, [q, k], in_grad) + return [qk, grad_q, grad_k] + + def calc_grad_gpu(q, k, in_grad): + with tf.xla.experimental.jit_scope(separate_compiled_gradients=True): + with tf.device("/GPU:0"): + qk = tf.matmul(T(q), T(k), transpose_b=True) + grad_q, grad_k = tf.gradients(qk, [q, k], in_grad) + return [qk, grad_q, grad_k] + + sess_config = tf.ConfigProto(allow_soft_placement=False, log_device_placement=False) + sess_config.gpu_options.allow_growth = True + with tf.Session(config=sess_config) as sess: + res1 = sess.run(calc_grad_cpu(q_placeholder, k_placeholder, in_grad_placeholder), feed_dict=inputs_cpu) + res2 = sess.run(calc_grad_gpu(q_placeholder, k_placeholder, in_grad_placeholder), feed_dict=inputs_gpu) + + return res1, res2 + + +if __name__ == "__main__": + res1, res2 = CompareCpuAndGpu() + for i in range(len(res1)): + print(np.allclose(res1[i], res2[i], rtol=4e-2, atol=4e-2)) + \ No newline at end of file