Skip to content
Snippets Groups Projects
Commit 0ec05353 authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Only use 'go-faster' options in matrix multiply example on Nvidia.

parent 819c807e
No related merge requests found
...@@ -161,8 +161,12 @@ h_c = numpy.empty((c_height, c_width)).astype(numpy.float32) ...@@ -161,8 +161,12 @@ h_c = numpy.empty((c_height, c_width)).astype(numpy.float32)
kernel_params = {"block_size": block_size, kernel_params = {"block_size": block_size,
"w_a":a_width, "h_a":a_height, "w_b":b_width} "w_a":a_width, "h_a":a_height, "w_b":b_width}
if "NVIDIA" in queue.device.vendor:
options = "-cl-mad-enable -cl-fast-relaxed-math"
else:
options = None
prg = cl.Program(ctx, KERNEL_CODE % kernel_params, prg = cl.Program(ctx, KERNEL_CODE % kernel_params,
).build(options="-cl-mad-enable -cl-fast-relaxed-math") ).build(options=options)
kernel = prg.matrixMul kernel = prg.matrixMul
#print prg.binaries[0] #print prg.binaries[0]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment