Skip to content

Commit

Permalink
Add examples/stream/10streams.py
Browse files Browse the repository at this point in the history
  • Loading branch information
sonots committed Oct 22, 2017
1 parent 1ff2219 commit f249296
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions examples/stream/10streams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import cupy
import time

device = cupy.cuda.Device()
memory_pool = cupy.cuda.MemoryPool()
cupy.cuda.set_allocator(memory_pool.malloc)
rand = cupy.random.generator.RandomState(seed=1)

n = 10
streams = []
events = []
zs = []

start_time = time.time()
for i in range(n):
stream = cupy.cuda.stream.Stream()
streams.append(stream)
with stream:
x = rand.normal(size=(1, 1024 * 256))
y = rand.normal(size=(1024 * 256, 1))
z = cupy.matmul(x, y)
zs.append(z)
events.append(stream.record())

reduction_stream = cupy.cuda.stream.Stream()
# Block the reduction_stream until all event occurs. This does not block host.
# This is not required when reduction is performed in the default (Stream.null)
# stream unless streams are created with `non_blocking=True` flag.
for i in range(n):
reduction_stream.wait_event(events[i])
with reduction_stream:
z = sum(zs)

device.synchronize()
elapsed_time = time.time() - start_time
print('elapsed time', elapsed_time)
print('total bytes', memory_pool.total_bytes())

0 comments on commit f249296

Please sign in to comment.