From 4853b6d5cbbd0024a747b98580907a710edee9a0 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 29 Oct 2011 16:10:40 -0400 Subject: [PATCH] Fix image arguments. --- loopy/codegen/__init__.py | 6 +- test/test_linalg.py | 887 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 890 insertions(+), 3 deletions(-) create mode 100644 test/test_linalg.py diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 093df9921..b093c44df 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -221,10 +221,10 @@ def generate_code(kernel): arg_decl = Const(arg_decl) arg_decl = CLGlobal(arg_decl) elif isinstance(arg, ImageArg): - if arg.name in kernel.input_vectors(): - mode = "r" - else: + if arg.name in kernel.get_written_variables(): mode = "w" + else: + mode = "r" arg_decl = CLImage(arg.dimensions, mode, arg.name) diff --git a/test/test_linalg.py b/test/test_linalg.py new file mode 100644 index 000000000..c44ce8ddc --- /dev/null +++ b/test/test_linalg.py @@ -0,0 +1,887 @@ +from __future__ import division + +import numpy as np +import numpy.linalg as la +import pyopencl as cl +import pyopencl.array as cl_array +import pyopencl.clrandom as cl_random +import loopy as lp + +from pyopencl.tools import pytest_generate_tests_for_pyopencl \ + as pytest_generate_tests + + + + +def make_well_conditioned_dev_matrix(queue, shape, dtype=np.float32, + order="C", ran_factor=1, id_factor=5, inc_factor=0, od=0): + if isinstance(shape, int): + shape = (shape, shape) + l = max(shape) + eye_ish = id_factor*np.eye(l, k=od) + if inc_factor: + eye_ish[np.arange(l), np.arange(l)] = inc_factor*np.arange(l) + ary = np.asarray( + ran_factor*np.random.randn(*shape) + + eye_ish[:shape[0], :shape[1]], + dtype=dtype, order=order) + + return cl_array.to_device(queue, ary) + + + + +DO_CHECK = True + +DEBUG_PREAMBLE = r""" + #pragma OPENCL EXTENSION cl_amd_printf: enable + #define MY_J (j_outer*64+j_inner_outer*16+j_inner_inner) + #define MY_I (i_outer*16+i_inner) + #define IFDIAG if (MY_I == MY_J) + #define TST(S) if (MY_J == 144 && MY_I == 16-48) \ + for (int aa = 0; aa < 16: ++ab) \ + for (int bb = 0; bb < 16: ++bb) + """ + + + + +def check_error(refsol, sol): + if not DO_CHECK: + return + + if sol.shape == 2: + norm_order = "fro" + else: + norm_order = 2 + + rel_err = la.norm(refsol-sol, norm_order)/la.norm(refsol, norm_order) + if rel_err > 1e-5 or np.isinf(rel_err) or np.isnan(rel_err): + if 1: + import matplotlib.pyplot as pt + pt.imshow(refsol-sol) + pt.colorbar() + pt.show() + elif 0: + print "---------------------------" + print "ACTUAL" + print "---------------------------" + np.set_printoptions(threshold=1000000, linewidth=200) + print sol[:16,:16] + print "---------------------------" + print "CORRECT" + print "---------------------------" + print refsol[:16,:16] + raise RuntimeError("check failed, rel err=%g" % rel_err) + + + + +def get_suitable_size(ctx): + dev, = ctx.devices + if dev.type == cl.device_type.CPU: + return 160 + else: + return 1600 + + + + +def test_axpy(ctx_factory): + dtype = np.float32 + ctx = ctx_factory() + order = "C" + queue = cl.CommandQueue(ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + + n = 20*1024**2 + + knl = lp.make_kernel(ctx.devices[0], + "[n] -> {[i]: 0<=i {[i,j,k]: 0<=i,j,k {[i,j]: 0<=i,j {[i,j,k]: 0<=i,j,k 1: + exec(sys.argv[1]) + else: + from py.test.cmdline import main + main([__file__]) -- GitLab