Skip to content

Commit

Permalink
sync
Browse files Browse the repository at this point in the history
  • Loading branch information
ScXfjiang committed Feb 5, 2025
1 parent 9bd2cb6 commit b3a0858
Show file tree
Hide file tree
Showing 2 changed files with 200 additions and 0 deletions.
114 changes: 114 additions & 0 deletions reproduce/ks_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from __future__ import print_function
import unittest
import numpy as np
import os
import tensorflow.compat.v1 as tf
import pdb
import random

if int(tf.__version__[0]) >= 2:
tf.disable_v2_behavior()


def test_group():
import random
'''[batch, seqlen, nh, dim] = [80, 128, 8, 64]'''
[batch, seqlen, nh, dim] = [1, 16, 1, 2]
inputs = {}
inputs1 = {}

shape_input = [batch, seqlen, nh, dim]
shape_loss = [batch, nh, seqlen, seqlen]

in_q = tf.placeholder(tf.float32, shape_input)
'''inputs[in_q] = [[[[random.uniform(-2, 2) for i in range(dim)] for j in range(nh)] for k in range(seqlen)] for t in range(batch)]'''
inputs[in_q] = [[[[1 for i in range(dim)] for j in range(nh)] for k in range(seqlen)] for t in range(batch)]
print('inputs[in_q]', inputs)
import copy
inputs1[in_q] = copy.deepcopy(inputs[in_q])

in_k = tf.placeholder(tf.float32, shape_input)
'''inputs[in_k] = [[[[random.uniform(-2, 2) for i in range(dim)] for j in range(nh)] for k in range(seqlen)] for t in range(batch)]'''
inputs[in_k] = [[[[1 for i in range(dim)] for j in range(nh)] for k in range(seqlen)] for t in range(batch)]
print('inputs[in_k]', inputs)

inputs1[in_k] = copy.deepcopy(inputs[in_k])

in_loss = tf.placeholder(tf.float32, shape_loss)
'''inputs[in_loss] = [[[[random.uniform(-2, 2) for i in range(seqlen)] for j in range(seqlen)] for k in range(nh)] for t in range(batch)]'''
inputs[in_loss] = [[[[i%2 for i in range(seqlen)] for j in range(seqlen)] for k in range(nh)] for t in range(batch)]
print('inputs[in_loss]', inputs)
inputs1[in_loss] = copy.deepcopy(inputs[in_loss])

def with_xla(q, k, loss):
with tf.xla.experimental.jit_scope(separate_compiled_gradients=True):
with tf.device('/GPU:0'):
def T(t):
return tf.transpose(t, [0, 2, 1, 3])
qk = tf.matmul(T(q), T(k), transpose_b=True)
print('qk.shape = ', qk.shape)
print('loss.shape = ', loss.shape)
'''temp_loss = tf.reduce_sum(qk)'''
'''print('temp_loss', temp_loss)'''
'''grad_q, grad_k = tf.gradients(loss, [q, k])'''
grad_q, grad_k = tf.gradients(qk, [q, k], loss)
'''grad_q, grad_k = tf.gradients(qk, [q, k])'''
return [qk, grad_q, grad_k]


def no_xla(q, k, loss):
with tf.xla.experimental.jit_scope(separate_compiled_gradients=True):
with tf.device('/CPU:0'):
def T(t):
return tf.transpose(t, [0, 2, 1, 3])
qk = tf.matmul(T(q), T(k), transpose_b=True)
print('qk.shape = ', qk.shape)
print('loss.shape = ', loss.shape)
'''temp_loss = tf.reduce_sum(qk)'''
'''grad_q, grad_k = tf.gradients(qk, [q, k])'''
grad_q, grad_k = tf.gradients(qk, [q, k],loss)
return [qk, grad_q, grad_k]

r1 = no_xla(in_q, in_k, in_loss)
r2 = with_xla(in_q, in_k, in_loss)

sess_config = tf.ConfigProto(allow_soft_placement=False,log_device_placement=False)
sess_config.gpu_options.allow_growth = True

with tf.Session(config=sess_config) as sess:
res1 = sess.run(r1, feed_dict=inputs)
res2 = sess.run(r2, feed_dict=inputs1)
print(np.allclose(inputs[in_q], inputs1[in_q]))
print(np.allclose(inputs[in_k], inputs1[in_k]))
print(np.allclose(inputs[in_loss], inputs1[in_loss]))
'''print("res1 = ", res1)'''
'''print("res2 = ", res2)'''
return res1, res2

class TestMemEffAttn(unittest.TestCase):
def test(self):
res1, res2 = test_group()

print('res1 = ', res1)
print('res2 = ', res2)
print('res1.qk = ', res1[0])
print('res2.qk = ', res2[0])
print('res1.grad_q = ', res1[1])
print('res2.grad_q = ', res2[1])
print('res1.grad_k = ', res1[2])
print('res2.grad_k = ', res2[2])

for a, b in zip(res1[1].flatten(), res2[1].flatten()):
if abs(a-b> 0.1) and abs(a -b) / abs(a) > 0.1:print('abnormal1: ', a, b)
num = 0
for a, b in zip(res1[2].flatten(), res2[2].flatten()):
if num < 20 and abs(a-b> 0.1) and abs(a -b) / abs(a) > 0.1:print('abnormal: ', a, b); num = num+1

for i in range(len(res1)):
print(np.allclose(res1[i], res2[i], rtol=4e-2,atol=4e-2))
self.assertTrue(np.allclose(res1[i], res2[i], rtol=4e-2,atol=4e-2))


if __name__ == "__main__":
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
unittest.main()
86 changes: 86 additions & 0 deletions reproduce/ks_mismatch_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from __future__ import print_function
import unittest
import numpy as np
import os
import tensorflow.compat.v1 as tf
import pdb
import random
if int(tf.__version__[0]) >= 2:
tf.disable_v2_behavior()

def test_group():
import random
'''[batch, seqlen, nh, dim] = [80, 128, 8, 64]'''
[batch, seqlen, nh, dim] = [1, 16, 1, 2]
inputs = {}
inputs1 = {}
shape_input = [batch, seqlen, nh, dim]
shape_loss = [batch, nh, seqlen, seqlen]
in_q = tf.placeholder(tf.float32, shape_input)
inputs[in_q] = [[[[random.uniform(-2, 2) for i in range(dim)] for j in range(nh)] for k in range(seqlen)] for t in range(batch)]
import copy
inputs1[in_q] = copy.deepcopy(inputs[in_q])
in_k = tf.placeholder(tf.float32, shape_input)
inputs[in_k] = [[[[random.uniform(-2, 2) for i in range(dim)] for j in range(nh)] for k in range(seqlen)] for t in range(batch)]
inputs1[in_k] = copy.deepcopy(inputs[in_k])
in_loss = tf.placeholder(tf.float32, shape_loss)
inputs[in_loss] = [[[[random.uniform(-2, 2) for i in range(seqlen)] for j in range(seqlen)] for k in range(nh)] for t in range(batch)]
inputs1[in_loss] = copy.deepcopy(inputs[in_loss])
def with_xla(q, k, loss):
with tf.xla.experimental.jit_scope(separate_compiled_gradients=True):
with tf.device('/GPU:0'):
def T(t):
return tf.transpose(t, [0, 2, 1, 3])
qk = tf.matmul(T(q), T(k), transpose_b=True)
'''loss = tf.reduce_sum(qk)'''
'''grad_q, grad_k = tf.gradients(loss, [q, k])'''
grad_q, grad_k = tf.gradients(qk, [q, k], loss)
return [qk, grad_q, grad_k]

def no_xla(q, k, loss):
with tf.xla.experimental.jit_scope(separate_compiled_gradients=True):
with tf.device('/CPU:0'):
def T(t):
return tf.transpose(t, [0, 2, 1, 3])
qk = tf.matmul(T(q), T(k), transpose_b=True)
'''loss = tf.reduce_sum(qk)'''
'''grad_q, grad_k = tf.gradients(loss, [q, k])'''
grad_q, grad_k = tf.gradients(qk, [q, k], loss)
return [qk, grad_q, grad_k]
r1 = no_xla(in_q, in_k, in_loss)
r2 = with_xla(in_q, in_k, in_loss)
sess_config = tf.ConfigProto(allow_soft_placement=False,log_device_placement=False)
sess_config.gpu_options.allow_growth = True
with tf.Session(config=sess_config) as sess:
res1 = sess.run(r1, feed_dict=inputs)
res2 = sess.run(r2, feed_dict=inputs1)
print(np.allclose(inputs[in_q], inputs1[in_q]))
print(np.allclose(inputs[in_k], inputs1[in_k]))
print(np.allclose(inputs[in_loss], inputs1[in_loss]))
'''print("res1 = ", res1)'''
'''print("res2 = ", res2)'''
return res1, res2
class TestMemEffAttn(unittest.TestCase):
def test(self):
res1, res2 = test_group()
print('res1 = ', res1)
print('res2 = ', res2)
print('res1.qk = ', res1[0])
print('res2.qk = ', res2[0])
print('res1.grad_q = ', res1[1])
print('res2.grad_q = ', res2[1])
print('res1.grad_k = ', res1[2])
print('res2.grad_k = ', res2[2])
for a, b in zip(res1[1].flatten(), res2[1].flatten()):
if abs(a-b> 0.1) and abs(a -b) / abs(a) > 0.1:print('abnormal1: ', a, b)
num = 0
for a, b in zip(res1[2].flatten(), res2[2].flatten()):
if num < 20 and abs(a-b> 0.1) and abs(a -b) / abs(a) > 0.1:print('abnormal: ', a, b); num = num+1
for i in range(len(res1)):
print(np.allclose(res1[i], res2[i], rtol=4e-2,atol=4e-2))
self.assertTrue(np.allclose(res1[i], res2[i], rtol=4e-2,atol=4e-2))

if __name__ == "__main__":
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
unittest.main()

0 comments on commit b3a0858

Please sign in to comment.