diff --git a/doc/source/misc.rst b/doc/source/misc.rst index 2cf70bcac949a51e58e90e90d8a89af912fe412c..3a3d3fd5735e440d1c3314b2ea7e1c4c9f29db34 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -8,6 +8,7 @@ Acknowledgments =============== * James Snyder provided a patch to make PyOpenCL work on OS X 10.6. +* Roger Pau Monné supplied the example :file:`examples/benchmark-all.py`. User-visible Changes ==================== diff --git a/examples/benchmark-all.py b/examples/benchmark-all.py new file mode 100644 index 0000000000000000000000000000000000000000..3fcb7f3981a3b52f27bcd230416f0f6b79ea74a8 --- /dev/null +++ b/examples/benchmark-all.py @@ -0,0 +1,87 @@ +# example provided by Roger Pau Monn'e + +import pyopencl as cl +import numpy +import numpy.linalg as la +import datetime +from time import time + +a = numpy.random.rand(1000).astype(numpy.float32) +b = numpy.random.rand(1000).astype(numpy.float32) +c_result = numpy.empty_like(a) + +# Speed in normal CPU usage +time1 = time() +for i in range(1000): + for j in range(1000): + c_result[i] = a[i] + b[i] + c_result[i] = c_result[i] * (a[i] + b[i]) + c_result[i] = c_result[i] * (a[i] / 2.0) +time2 = time() +print "Execution time of test without OpenCL: ", time2 - time1, "s" + + +for platform in cl.get_platforms(): + for device in platform.get_devices(): + dev_type = "unknown" + + for dev_type_candidate in dir(cl.device_type): + if getattr(cl.device_type, dev_type_candidate) == device.type: + dev_type = dev_type_candidate + + print "===============================================================" + print "Platform name:", platform.name + print "Platform profile:", platform.profile + print "Platform vendor:", platform.vendor + print "Platform version:", platform.version + print "---------------------------------------------------------------" + print "Device name:", device.name + print "Device type: ", dev_type + print "Device memory: ", device.global_mem_size//1024//1024, 'MB' + print "Device max clock speed:", device.max_clock_frequency, 'MHz' + print "Device compute units:", device.max_compute_units + + # Simnple speed test + ctx = cl.Context([device]) + queue = cl.CommandQueue(ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + + mf = cl.mem_flags + a_buf = cl.create_host_buffer( + ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, a) + b_buf = cl.create_host_buffer( + ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, b) + dest_buf = cl.create_buffer(ctx, mf.WRITE_ONLY, b.nbytes) + + prg = cl.create_program_with_source(ctx, """ + __kernel void sum(__global const float *a, + __global const float *b, __global float *c) + { + int loop; + int gid = get_global_id(0); + for(loop=1; loop<1000;loop++) + { + c[gid] = a[gid] + b[gid]; + c[gid] = c[gid] * (a[gid] + b[gid]); + c[gid] = c[gid] * (a[gid] / 2.0); + } + } + """).build() + c = numpy.empty_like(a) + + before = cl.enqueue_marker(queue) + after = prg.sum(queue, a.shape, a_buf, b_buf, dest_buf) + + cl.enqueue_read_buffer(queue, dest_buf, c).wait() + elapsed = 1e-9*(after.profile.end - before.profile.end) + + print "Execution time of test: %g s" % elapsed + + error = 0 + for i in range(1000): + if c[i] != c_result[i]: + error = 1 + if error: + print "Results doesn't match!!" + else: + print "Results OK"