diff --git a/doc/reference.rst b/doc/reference.rst index 0f03ab58e21cb3d4fd6fc75384285276c76bce6e..164a44b5eaa58f65490842e78f45c9ea8ab972fa 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -458,9 +458,11 @@ following always works:: .. autofunction:: show_dependency_graph -Flags ------ +Options +------- + +.. autoclass:: Options -.. autoclass:: Flags +.. autofunction:: set_options .. vim: tw=75:spell diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 5927b3311937f53f5ff33d20e0342b65ff700cb9..194c3f80edb57950b779326d6fddbf72afb69f1e 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -153,11 +153,12 @@ for good measure. >>> assert (out.get() == (2*x_vec_dev).get()).all() We can have loopy print the OpenCL kernel it generated -by passing :attr:`loopy.Flags.write_cl`. +by passing :attr:`loopy.Options.write_cl`. .. doctest:: - >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl") + >>> knl = lp.set_options(knl, "write_cl") + >>> evt, (out,) = knl(queue, a=x_vec_dev) <BLANKLINE> #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) @@ -177,7 +178,7 @@ call (the first being the :class:`pyopencl.Event` associated with the execution of the kernel). (If the ordering of the output tuple is not clear, it can be specified or turned into a :class:`dict`. See the *kernel_data* argument of :func:`loopy.make_kernel` and -:attr:`loopy.Flags.return_dict`.) +:attr:`loopy.Options.return_dict`.) For convenience, loopy kernels also directly accept :mod:`numpy` arrays: @@ -190,7 +191,7 @@ Notice how both *out* nor *a* are :mod:`numpy` arrays, but neither needed to be transferred to or from the device. Checking for numpy arrays and transferring them if needed comes at a potential performance cost. If you would like to make sure that you avoid this cost, pass -:attr:`loopy.Flags.no_numpy`. +:attr:`loopy.Options.no_numpy`. Further notice how *n*, while technically being an argument, did not need to be passed, as loopy is able to find *n* from the shape of the input @@ -198,11 +199,12 @@ argument *a*. For efficiency, loopy generates Python code that handles kernel invocation. If you are suspecting that this code is causing you an issue, you can -inspect that code, too, using :attr:`loopy.Flags.write_wrapper`: +inspect that code, too, using :attr:`loopy.Options.write_wrapper`: .. doctest:: - >>> evt, (out,) = knl(queue, a=x_vec_host, flags="write_wrapper") + >>> knl = lp.set_options(knl, write_wrapper=True, write_cl=False) + >>> evt, (out,) = knl(queue, a=x_vec_host) from __future__ import division ... def invoke_loopy_kernel_loopy_kernel(cl_kernel, queue, allocator=None, wait_for=None, out_host=None, a=None, n=None, out=None): @@ -341,7 +343,8 @@ Let us take a look at the generated code for the above kernel: .. doctest:: - >>> evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl") + >>> knl = lp.set_options(knl, "write_cl") + >>> evt, (out,) = knl(queue, a=a_mat_dev) <BLANKLINE> #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) @@ -392,7 +395,8 @@ Now the intended code is generated and our test passes. .. doctest:: - >>> evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl") + >>> knl = lp.set_options(knl, "write_cl") + >>> evt, (out,) = knl(queue, a=a_mat_dev) <BLANKLINE> #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) @@ -431,8 +435,9 @@ zero-fill kernel? ... """) + >>> knl = lp.set_options(knl, "write_cl") >>> with IncludeWarningsInDoctest(): - ... evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl") + ... evt, (out,) = knl(queue, a=a_mat_dev) <BLANKLINE> ... for (int i = 0; i <= (-1 + n); ++i) @@ -460,7 +465,7 @@ ambiguous. .. doctest:: >>> with IncludeWarningsInDoctest(): - ... evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl") + ... evt, (out,) = knl(queue, a=a_mat_dev) <BLANKLINE> ... for (int j = 0; j <= (-1 + n); ++j) @@ -512,7 +517,8 @@ Consider this example: ... "{ [i]: 0<=i<n }", ... "a[i] = 0", assumptions="n>=0") >>> knl = lp.split_iname(knl, "i", 16) - >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl") + >>> knl = lp.set_options(knl, "write_cl") + >>> evt, (out,) = knl(queue, a=x_vec_dev) <BLANKLINE> ... for (int i_outer = 0; i_outer <= (-1 + ((15 + n) / 16)); ++i_outer) @@ -539,7 +545,7 @@ relation to loop nesting. For example, it's perfectly possible to request .. doctest:: >>> knl = lp.set_loop_priority(knl, "i_inner,i_outer") - >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl") + >>> evt, (out,) = knl(queue, a=x_vec_dev) <BLANKLINE> ... for (int i_inner = 0; i_inner <= 15; ++i_inner) @@ -564,7 +570,8 @@ commonly called 'loop tiling': >>> knl = lp.split_iname(knl, "i", 16) >>> knl = lp.split_iname(knl, "j", 16) >>> knl = lp.set_loop_priority(knl, "i_outer,j_outer,i_inner") - >>> evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl") + >>> knl = lp.set_options(knl, "write_cl") + >>> evt, (out,) = knl(queue, a=a_mat_dev) <BLANKLINE> ... for (int i_outer = 0; i_outer <= (-1 + ((15 + n) / 16)); ++i_outer) @@ -602,7 +609,8 @@ loop's tag to ``"unr"``: >>> orig_knl = knl >>> knl = lp.split_iname(knl, "i", 4) >>> knl = lp.tag_inames(knl, dict(i_inner="unr")) - >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl") + >>> knl = lp.set_options(knl, "write_cl") + >>> evt, (out,) = knl(queue, a=x_vec_dev) <BLANKLINE> ... for (int i_outer = 0; i_outer <= (-1 + ((3 + n) / 4)); ++i_outer) @@ -675,7 +683,8 @@ Let's try this out on our vector fill kernel by creating workgroups of size ... "a[i] = 0", assumptions="n>=0") >>> knl = lp.split_iname(knl, "i", 128, ... outer_tag="g.0", inner_tag="l.0") - >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl") + >>> knl = lp.set_options(knl, "write_cl") + >>> evt, (out,) = knl(queue, a=x_vec_dev) <BLANKLINE> ... __kernel void __attribute__ ((reqd_work_group_size(128, 1, 1))) loopy_kernel(__global float *restrict a, int const n) @@ -696,7 +705,7 @@ those for us: >>> glob, loc = knl.get_grid_sizes() >>> print glob - (Aff("[n] -> { [([(127 + n)/128])] }"),) + (Aff("[n] -> { [(floor((127 + n)/128))] }"),) >>> print loc (Aff("[n] -> { [(128)] }"),) @@ -720,7 +729,8 @@ assumption: >>> orig_knl = knl >>> knl = lp.split_iname(knl, "i", 4) >>> knl = lp.tag_inames(knl, dict(i_inner="unr")) - >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl") + >>> knl = lp.set_options(knl, "write_cl") + >>> evt, (out,) = knl(queue, a=x_vec_dev) <BLANKLINE> ... for (int i_outer = 0; i_outer <= (-1 + ((3 + n) / 4)); ++i_outer) @@ -745,7 +755,8 @@ to :func:`split_iname`: >>> knl = orig_knl >>> knl = lp.split_iname(knl, "i", 4, slabs=(0, 1), inner_tag="unr") - >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl") + >>> knl = lp.set_options(knl, "write_cl") + >>> evt, (out,) = knl(queue, a=x_vec_dev) <BLANKLINE> for (int i_outer = 0; i_outer <= (-1 + ((3 + n) / 4)); ++i_outer) { @@ -878,7 +889,8 @@ When we ask to see the code, the issue becomes apparent: .. doctest:: - >>> evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl") + >>> knl = lp.set_options(knl, "write_cl") + >>> evt, (out,) = knl(queue, a=a_mat_dev) <BLANKLINE> #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) diff --git a/examples/rank-one.py b/examples/rank-one.py index c362b64cba17d0154ef881401938612670ae1932..726c44dea927c7dd9f7305fe59b12a27bc820269 100644 --- a/examples/rank-one.py +++ b/examples/rank-one.py @@ -14,7 +14,7 @@ knl = lp.make_kernel(queue.device, a = np.arange(200, dtype=np.float32) b = np.arange(200, dtype=np.float32) -evt, (c,) = knl(queue, a=a, b=b, flags="write_cl") +evt, (c,) = knl(queue, a=a, b=b, options="write_cl") # SETUPEND orig_knl = knl @@ -26,7 +26,7 @@ knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.1") # SPLITEND -evt, (c,) = knl(queue, a=a, b=b, flags="write_cl") +evt, (c,) = knl(queue, a=a, b=b, options="write_cl") split_knl = knl @@ -35,7 +35,7 @@ knl = lp.add_prefetch(knl, "a") knl = lp.add_prefetch(knl, "b") # PREFETCH1END -evt, (c,) = knl(queue, a=a, b=b, flags="write_cl") +evt, (c,) = knl(queue, a=a, b=b, options="write_cl") knl = split_knl @@ -44,7 +44,7 @@ knl = lp.add_prefetch(knl, "a", ["i_inner"]) knl = lp.add_prefetch(knl, "b", ["j_inner"]) # PREFETCH2END -evt, (c,) = knl(queue, a=a, b=b, flags="write_cl") +evt, (c,) = knl(queue, a=a, b=b, options="write_cl") knl = orig_knl @@ -68,4 +68,4 @@ knl = lp.split_iname(knl, "a_dim_0", 16, outer_tag="l.1", inner_tag="l.0") # PREFETCH3END -evt, (c,) = knl(queue, a=a, b=b, flags="write_cl") +evt, (c,) = knl(queue, a=a, b=b, options="write_cl") diff --git a/loopy/__init__.py b/loopy/__init__.py index 0389fb8644872adde8ca30b3c17078ff7ec134be..406dadc73ccacd97761a9bd8c15bdd883019ecb6 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -63,7 +63,7 @@ from loopy.preprocess import (preprocess_kernel, realize_reduction, from loopy.schedule import generate_loop_schedules from loopy.codegen import generate_code from loopy.compiled import CompiledKernel -from loopy.flags import Flags +from loopy.options import Options from loopy.auto_test import auto_test_vs_ref __all__ = [ @@ -101,7 +101,7 @@ __all__ = [ "auto_test_vs_ref", - "Flags", + "Options", "make_kernel", @@ -1259,6 +1259,41 @@ def fix_parameters(kernel, **value_dict): # }}} +# {{{ set_options + +def set_options(kernel, *args, **kwargs): + """Return a new kernel with the options given as keyword arguments, or from + a string representation passed in as the first (and only) positional + argument. + + See also :class:`Options`. + """ + + if args and kwargs: + raise TypeError("cannot pass both positional and keyword arguments") + + new_opt = kernel.options.copy() + + if kwargs: + for key, val in kwargs.iteritems(): + if not hasattr(new_opt, key): + raise ValueError("unknown option '%s'" % key) + + setattr(new_opt, key, val) + else: + if len(args) != 1: + raise TypeError("exactly one positional argument is required if " + "no keyword args are given") + arg, = args + + from loopy.options import make_options + new_opt.update(make_options(arg)) + + return kernel.copy(options=new_opt) + +# }}} + + # {{{ library registration def register_preamble_generators(kernel, preamble_generators): diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 03f7ac0a3e6132e028622ee680c17cccec4ec12e..239a69d8794167785d27f731707393a0360f6ca1 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -335,8 +335,7 @@ def _enumerate_cl_devices_for_ref_test(): def auto_test_vs_ref( ref_knl, ctx, test_knl, op_count=[], op_label=[], parameters={}, print_ref_code=False, print_code=True, warmup_rounds=2, - iflags=None, dump_binary=False, - options=[], + dump_binary=False, fills_entire_output=True, do_check=True, check_result=None ): """Compare results of `ref_knl` to the kernels generated by @@ -400,7 +399,7 @@ def auto_test_vs_ref( ref_sched_kernel = knl break - ref_compiled = CompiledKernel(ref_ctx, ref_sched_kernel, options=options) + ref_compiled = CompiledKernel(ref_ctx, ref_sched_kernel) if print_ref_code: print 75*"-" print "Reference Code:" @@ -488,8 +487,7 @@ def auto_test_vs_ref( from loopy.preprocess import infer_unknown_types kernel = infer_unknown_types(kernel, expect_completion=True) - compiled = CompiledKernel(ctx, kernel, options=options, - iflags=iflags) + compiled = CompiledKernel(ctx, kernel) if args is None: cl_kernel_info = compiled.cl_kernel_info(frozenset()) diff --git a/loopy/codegen/expression.py b/loopy/codegen/expression.py index a996f7d1461adb8cd628116932a9dcbd6786e307..b5ff4aab46429b913d004e7384cd2678dd5d2756 100644 --- a/loopy/codegen/expression.py +++ b/loopy/codegen/expression.py @@ -368,7 +368,7 @@ class LoopyCCodeMapper(RecursiveMapper): def map_variable(self, expr, enclosing_prec, type_context): if expr.name in self.var_subst_map: - if self.kernel.flags.annotate_inames: + if self.kernel.options.annotate_inames: return " /* %s */ %s" % ( expr.name, self.rec(self.var_subst_map[expr.name], diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 793808960d152dcdc5d29f7552e5654906a39e7c..e10348a561a3ef101dd3fbed923947bd88f53cfa 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -98,7 +98,7 @@ def generate_expr_instruction_code(kernel, insn, codegen_state): ccm(expr, prec=None, type_context=dtype_to_type_context(target_dtype), needed_dtype=target_dtype)) - if kernel.flags.trace_assignments or kernel.flags.trace_assignment_values: + if kernel.options.trace_assignments or kernel.options.trace_assignment_values: from cgen import Statement as S gs, ls = kernel.get_grid_sizes() @@ -122,7 +122,7 @@ def generate_expr_instruction_code(kernel, insn, codegen_state): ccm(i, prec=None, type_context="i") for i in assignee_indices) - if kernel.flags.trace_assignment_values: + if kernel.options.trace_assignment_values: if target_dtype.kind == "i": printf_format += " = %d" printf_args.append(lhs_code) @@ -144,7 +144,7 @@ def generate_expr_instruction_code(kernel, insn, codegen_state): printf_format, printf_args_str)) from cgen import Block - if kernel.flags.trace_assignment_values: + if kernel.options.trace_assignment_values: result = Block([result, printf_insn]) else: # print first, execute later -> helps find segfaults diff --git a/loopy/compiled.py b/loopy/compiled.py index 2fdcd2f3d1685330d93535667c0d25ef052c54f5..47bcac8b87fcf508821b40aea3149acf651a03a1 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -131,7 +131,7 @@ def python_dtype_str(dtype): # {{{ integer arg finding from shapes -def generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, flags): +def generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, options): # a mapping from integer argument names to a list of tuples # (arg_name, expression), where expression is a # unary function of kernel.arg_dict[arg_name] @@ -199,7 +199,7 @@ def generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, flags): # {{{ integer arg finding from offsets -def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags): +def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, options): gen("# {{{ find integer arguments from offsets") gen("") @@ -215,7 +215,7 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags) gen("%s = 0" % arg.name) gen("else:") with Indentation(gen): - if not flags.no_numpy: + if not options.no_numpy: gen("_lpy_offset = getattr(%s, \"offset\", 0)" % impl_array_name) else: @@ -223,7 +223,7 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags) base_arg = kernel.impl_arg_to_arg[impl_array_name] - if not flags.skip_arg_checks: + if not options.skip_arg_checks: gen("%s, _lpy_remdr = divmod(_lpy_offset, %d)" % (arg.name, base_arg.dtype.itemsize)) @@ -235,7 +235,7 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags) gen("%s = _lpy_offset // %d)" % (arg.name, base_arg.dtype.itemsize)) - if not flags.skip_arg_checks: + if not options.skip_arg_checks: gen("del _lpy_offset") gen("# }}}") @@ -246,7 +246,7 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags) # {{{ integer arg finding from strides -def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags): +def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, options): gen("# {{{ find integer arguments from strides") gen("") @@ -256,7 +256,7 @@ def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags) gen("if %s is None:" % arg.name) with Indentation(gen): - if not flags.skip_arg_checks: + if not options.skip_arg_checks: gen("if %s is None:" % impl_array_name) with Indentation(gen): gen("raise RuntimeError(\"required stride '%s' for " @@ -266,7 +266,7 @@ def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags) base_arg = kernel.impl_arg_to_arg[impl_array_name] - if not flags.skip_arg_checks: + if not options.skip_arg_checks: gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)" % (arg.name, impl_array_name, stride_impl_axis, base_arg.dtype.itemsize)) @@ -290,7 +290,7 @@ def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags) # {{{ value arg setup -def generate_value_arg_setup(gen, kernel, impl_arg_info, flags): +def generate_value_arg_setup(gen, kernel, impl_arg_info, options): import loopy as lp from loopy.kernel.array import ArrayBase @@ -302,7 +302,7 @@ def generate_value_arg_setup(gen, kernel, impl_arg_info, flags): gen("# {{{ process %s" % arg.name) gen("") - if not flags.skip_arg_checks: + if not options.skip_arg_checks: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"input argument '%s' must " @@ -329,7 +329,7 @@ def generate_value_arg_setup(gen, kernel, impl_arg_info, flags): # {{{ array arg setup -def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): +def generate_array_arg_setup(gen, kernel, impl_arg_info, options): import loopy as lp from loopy.kernel.array import ArrayBase @@ -339,7 +339,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): gen("# {{{ set up array arguments") gen("") - if not flags.no_numpy: + if not options.no_numpy: gen("_lpy_encountered_numpy = False") gen("_lpy_encountered_dev = False") gen("") @@ -356,7 +356,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): if not issubclass(arg.arg_class, ArrayBase): continue - if not flags.no_numpy: + if not options.no_numpy: gen("if isinstance(%s, _lpy_np.ndarray):" % arg.name) with Indentation(gen): gen("# synchronous, nothing to worry about") @@ -370,21 +370,21 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): gen("") - if not flags.skip_arg_checks and not is_written: + if not options.skip_arg_checks and not is_written: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"input argument '%s' must " "be supplied\")" % arg.name) gen("") - if is_written and arg.arg_class is lp.ImageArg and not flags.skip_arg_checks: + if is_written and arg.arg_class is lp.ImageArg and not options.skip_arg_checks: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"written image '%s' must " "be supplied\")" % arg.name) gen("") - if is_written and arg.shape is None and not flags.skip_arg_checks: + if is_written and arg.shape is None and not options.skip_arg_checks: gen("if %s is None:" % arg.name) with Indentation(gen): gen("raise RuntimeError(\"written argument '%s' has " @@ -413,7 +413,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): gen("_lpy_strides_%d = %s" % (i, strify( itemsize*arg.unvec_strides[i]))) - if not flags.skip_arg_checks: + if not options.skip_arg_checks: for i in xrange(num_axes): gen("assert _lpy_strides_%d > 0, " "\"'%s' has negative stride in axis %d\"" @@ -440,7 +440,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): strides=strify(sym_strides), dtype=python_dtype_str(arg.dtype))) - if not flags.skip_arg_checks: + if not options.skip_arg_checks: for i in xrange(num_axes): gen("del _lpy_shape_%d" % i) gen("del _lpy_strides_%d" % i) @@ -455,7 +455,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): # {{{ argument checking if arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \ - and not flags.skip_arg_checks: + and not options.skip_arg_checks: if possibly_made_by_loopy: gen("if not _lpy_made_by_loopy:") else: @@ -501,7 +501,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): # }}} - if possibly_made_by_loopy and not flags.skip_arg_checks: + if possibly_made_by_loopy and not options.skip_arg_checks: gen("del _lpy_made_by_loopy") gen("") @@ -520,10 +520,10 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags): # }}} -def generate_invoker(kernel, impl_arg_info, flags): +def generate_invoker(kernel, impl_arg_info, options): system_args = [ "cl_kernel", "queue", "allocator=None", "wait_for=None", - # ignored if flags.no_numpy + # ignored if options.no_numpy "out_host=None" ] @@ -545,12 +545,12 @@ def generate_invoker(kernel, impl_arg_info, flags): gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)") gen("") - generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, flags) - generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags) - generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags) + generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, options) + generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, options) + generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, options) - generate_value_arg_setup(gen, kernel, impl_arg_info, flags) - generate_array_arg_setup(gen, kernel, impl_arg_info, flags) + generate_value_arg_setup(gen, kernel, impl_arg_info, options) + generate_array_arg_setup(gen, kernel, impl_arg_info, options) # {{{ generate invocation @@ -575,7 +575,7 @@ def generate_invoker(kernel, impl_arg_info, flags): # {{{ output - if not flags.no_numpy: + if not options.no_numpy: gen("if out_host is None and (_lpy_encountered_numpy " "and not _lpy_encountered_dev):") with Indentation(gen): @@ -591,7 +591,7 @@ def generate_invoker(kernel, impl_arg_info, flags): gen("") - if flags.return_dict: + if options.return_dict: gen("return _lpy_evt, {%s}" % ", ".join("\"%s\": %s" % (arg.name, arg.name) for arg in impl_arg_info @@ -608,15 +608,15 @@ def generate_invoker(kernel, impl_arg_info, flags): # }}} - if flags.write_wrapper: + if options.write_wrapper: output = gen.get() - if flags.highlight_wrapper: + if options.highlight_wrapper: output = get_highlighted_python_code(output) - if flags.write_wrapper is True: + if options.write_wrapper is True: print output else: - with open(flags.write_wrapper, "w") as outf: + with open(options.write_wrapper, "w") as outf: outf.write(output) return gen.get_function() @@ -632,39 +632,17 @@ class _CLKernelInfo(Record): class CompiledKernel: - def __init__(self, context, kernel, options=[], codegen_kwargs={}, - flags=None, iflags=None): + def __init__(self, context, kernel, codegen_kwargs={}): """ :arg kernel: may be a loopy.LoopKernel, a generator returning kernels (a warning will be issued if more than one is returned). If the kernel has not yet been loop-scheduled, that is done, too, with no specific arguments. - :arg iflags: An :class:`loopy.Flags` instance, or a dictionary - of arguments with which a :class:`loopy.Flags` instance - can be initialized. """ self.context = context self.codegen_kwargs = codegen_kwargs - self.options = list(options) - - if flags is not None and iflags is not None: - raise TypeError("cannot specify flags and iflags at the same time") - - if iflags is not None: - from warnings import warn - warn("The 'iflags' argument is deprecated", DeprecationWarning, - stacklevel=2) - - flags = iflags - - from loopy.flags import make_flags - my_flags = kernel.flags.copy() - my_flags.update(make_flags(flags)) - - self.flags = my_flags - - self.kernel = kernel.copy(flags=my_flags) + self.kernel = kernel self.packing_controller = SeparateArrayPackingController(kernel) @@ -710,24 +688,24 @@ class CompiledKernel: from loopy.codegen import generate_code code, impl_arg_info = generate_code(kernel, **self.codegen_kwargs) - if self.flags.write_cl: + if self.kernel.options.write_cl: output = code - if self.flags.highlight_cl: + if self.kernel.options.highlight_cl: output = get_highlighted_cl_code(output) - if self.flags.write_cl is True: + if self.kernel.options.write_cl is True: print output else: - with open(self.flags.write_cl, "w") as outf: + with open(self.kernel.options.write_cl, "w") as outf: outf.write(output) - if self.flags.edit_cl: + if self.kernel.options.edit_cl: from pytools import invoke_editor code = invoke_editor(code, "code.cl") cl_program = cl.Program(self.context, code) cl_kernel = getattr( - cl_program.build(options=self.options), + cl_program.build(options=kernel.options.cl_build_options), kernel.name) return _CLKernelInfo( @@ -735,7 +713,7 @@ class CompiledKernel: cl_kernel=cl_kernel, impl_arg_info=impl_arg_info, invoker=generate_invoker( - kernel, impl_arg_info, self.flags)) + kernel, impl_arg_info, self.kernel.options)) # {{{ debugging aids @@ -783,7 +761,7 @@ class CompiledKernel: are written as part of the kernel). The order is given by the order of kernel arguments. If this order is unspecified (such as when kernel arguments are inferred automatically), - enable :attr:`loopy.Flags.return_dict` to make *output* a + enable :attr:`loopy.Options.return_dict` to make *output* a :class:`dict` instead, with keys of argument names and values of the returned arrays. """ diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index b4c773418da0e12efdb158bb92e0b4562849b09e..e51a7b3d814f56f62b9ea636712efabcc6c72d5d 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -134,9 +134,9 @@ class LoopKernel(Record): .. attribute:: cache_manager .. attribute:: isl_context - .. attribute:: flags + .. attribute:: options - An instance of :class:`loopy.Flags` + An instance of :class:`loopy.Options` """ # {{{ constructor @@ -165,7 +165,7 @@ class LoopKernel(Record): cache_manager=None, index_dtype=np.int32, isl_context=None, - flags=None, + options=None, # When kernels get intersected in slab decomposition, # their grid sizes shouldn't change. This provides @@ -269,7 +269,7 @@ class LoopKernel(Record): symbol_manglers=symbol_manglers, index_dtype=index_dtype, isl_context=isl_context, - flags=flags) + options=options) # }}} @@ -959,17 +959,12 @@ class LoopKernel(Record): # {{{ direct execution @memoize_method - def get_compiled_kernel(self, ctx, options, flags): + def get_compiled_kernel(self, ctx): from loopy.compiled import CompiledKernel - return CompiledKernel(ctx, self, options=options, flags=flags) + return CompiledKernel(ctx, self) def __call__(self, queue, **kwargs): - flags = kwargs.pop("flags", None) - options = kwargs.pop("options", ()) - - assert isinstance(options, tuple) - - return self.get_compiled_kernel(queue.context, options, flags)( + return self.get_compiled_kernel(queue.context)( queue, **kwargs) # }}} diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index dc9329dc0110d4b54db4ad50cf72bcdd0478b056..1d5c3d6680adde98fbb494479b229745c4ea8209 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1000,7 +1000,7 @@ def make_kernel(device, domains, instructions, kernel_data=["..."], **kwargs): length 16. :arg silenced_warnings: a list (or semicolon-separated string) or warnings to silence - :arg flags: an instance of :class:`loopy.Flags` or an equivalent + :arg options: an instance of :class:`loopy.Options` or an equivalent string representation """ @@ -1008,10 +1008,20 @@ def make_kernel(device, domains, instructions, kernel_data=["..."], **kwargs): default_order = kwargs.pop("default_order", "C") default_offset = kwargs.pop("default_offset", 0) silenced_warnings = kwargs.pop("silenced_warnings", []) + options = kwargs.pop("options", None) flags = kwargs.pop("flags", None) - from loopy.flags import make_flags - flags = make_flags(flags) + if flags is not None: + if options is not None: + raise TypeError("may not pass both 'options' and 'flags'") + + from warnings import warn + warn("'flags' is deprecated. Use 'options' instead", + DeprecationWarning, stacklevel=2) + options = flags + + from loopy.options import make_options + options = make_options(options) if isinstance(silenced_warnings, str): silenced_warnings = silenced_warnings.split(";") @@ -1094,7 +1104,7 @@ def make_kernel(device, domains, instructions, kernel_data=["..."], **kwargs): knl = LoopKernel(device, domains, instructions, kernel_args, temporary_variables=temporary_variables, silenced_warnings=silenced_warnings, - flags=flags, + options=options, **kwargs) check_for_nonexistent_iname_deps(knl) diff --git a/loopy/flags.py b/loopy/options.py similarity index 80% rename from loopy/flags.py rename to loopy/options.py index 1ccaf7b3314cb76358c3d261ccc0d3e2916ff182..9e656ad63f52e4d613e63f2f71f7e5ccea434454 100644 --- a/loopy/flags.py +++ b/loopy/options.py @@ -27,12 +27,12 @@ from pytools import Record import re -class Flags(Record): +class Options(Record): """ - Unless otherwise specified, these flags are Boolean-valued + Unless otherwise specified, these options are Boolean-valued (i.e. on/off). - .. rubric:: Code-generation flags + .. rubric:: Code-generation options .. attribute:: annotate_inames @@ -51,7 +51,7 @@ class Flags(Record): Like :attr:`trace_assignments`, but also trace the assigned values. - .. rubric:: Invocation-related flags + .. rubric:: Invocation-related options .. attribute:: skip_arg_checks @@ -100,11 +100,19 @@ class Flags(Record): :envvar:`EDITOR`) on the generated kernel code, allowing for tweaks before the code is passed on to the OpenCL implementation for compilation. + + .. attribute:: cl_build_options + + Options to pass to the OpenCL compiler when building the kernel. + A list of strings. """ def __init__( - # All of these should default to False for the string-based - # interface of make_flags (below) to make sense. + # All Boolean flags in here should default to False for the + # string-based interface of make_options (below) to make sense. + + # All defaults are further required to be False when cast to bool + # for the update() functionality to work. self, @@ -115,7 +123,7 @@ class Flags(Record): skip_arg_checks=False, no_numpy=False, return_dict=False, write_wrapper=False, highlight_wrapper=False, write_cl=False, highlight_cl=False, - edit_cl=False + edit_cl=False, cl_build_options=[], ): Record.__init__( self, @@ -128,7 +136,7 @@ class Flags(Record): return_dict=return_dict, write_wrapper=write_wrapper, highlight_wrapper=highlight_wrapper, write_cl=write_cl, highlight_cl=highlight_cl, - edit_cl=edit_cl, + edit_cl=edit_cl, cl_build_options=cl_build_options, ) def update(self, other): @@ -139,12 +147,12 @@ class Flags(Record): KEY_VAL_RE = re.compile("^([a-zA-Z0-9]+)=(.*)$") -def make_flags(flags_arg): - if flags_arg is None: - return Flags() - elif isinstance(flags_arg, str): - iflags_args = {} - for key_val in flags_arg.split(","): +def make_options(options_arg): + if options_arg is None: + return Options() + elif isinstance(options_arg, str): + ioptions_args = {} + for key_val in options_arg.split(","): kv_match = KEY_VAL_RE.match(key_val) if kv_match is not None: key = kv_match.group(1) @@ -154,10 +162,10 @@ def make_flags(flags_arg): except ValueError: pass - iflags_args[key] = val + ioptions_args[key] = val else: - iflags_args[key_val] = True + ioptions_args[key_val] = True - return Flags(**iflags_args) - elif not isinstance(flags_arg, Flags): - return Flags(**flags_arg) + return Options(**ioptions_args) + elif not isinstance(options_arg, Options): + return Options(**options_arg)