diff --git a/doc/reference.rst b/doc/reference.rst
index 0f03ab58e21cb3d4fd6fc75384285276c76bce6e..164a44b5eaa58f65490842e78f45c9ea8ab972fa 100644
--- a/doc/reference.rst
+++ b/doc/reference.rst
@@ -458,9 +458,11 @@ following always works::
 
 .. autofunction:: show_dependency_graph
 
-Flags
------
+Options
+-------
+
+.. autoclass:: Options
 
-.. autoclass:: Flags
+.. autofunction:: set_options
 
 .. vim: tw=75:spell
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index 5927b3311937f53f5ff33d20e0342b65ff700cb9..194c3f80edb57950b779326d6fddbf72afb69f1e 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -153,11 +153,12 @@ for good measure.
     >>> assert (out.get() == (2*x_vec_dev).get()).all()
 
 We can have loopy print the OpenCL kernel it generated
-by passing :attr:`loopy.Flags.write_cl`.
+by passing :attr:`loopy.Options.write_cl`.
 
 .. doctest::
 
-    >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl")
+    >>> knl = lp.set_options(knl, "write_cl")
+    >>> evt, (out,) = knl(queue, a=x_vec_dev)
     <BLANKLINE>
     #define lid(N) ((int) get_local_id(N))
     #define gid(N) ((int) get_group_id(N))
@@ -177,7 +178,7 @@ call (the first being the :class:`pyopencl.Event` associated with the
 execution of the kernel). (If the ordering of the output tuple is not
 clear, it can be specified or turned into a :class:`dict`. See the
 *kernel_data* argument of :func:`loopy.make_kernel` and
-:attr:`loopy.Flags.return_dict`.)
+:attr:`loopy.Options.return_dict`.)
 
 For convenience, loopy kernels also directly accept :mod:`numpy` arrays:
 
@@ -190,7 +191,7 @@ Notice how both *out* nor *a* are :mod:`numpy` arrays, but neither needed
 to be transferred to or from the device.  Checking for numpy arrays and
 transferring them if needed comes at a potential performance cost.  If you
 would like to make sure that you avoid this cost, pass
-:attr:`loopy.Flags.no_numpy`.
+:attr:`loopy.Options.no_numpy`.
 
 Further notice how *n*, while technically being an argument, did not need
 to be passed, as loopy is able to find *n* from the shape of the input
@@ -198,11 +199,12 @@ argument *a*.
 
 For efficiency, loopy generates Python code that handles kernel invocation.
 If you are suspecting that this code is causing you an issue, you can
-inspect that code, too, using :attr:`loopy.Flags.write_wrapper`:
+inspect that code, too, using :attr:`loopy.Options.write_wrapper`:
 
 .. doctest::
 
-    >>> evt, (out,) = knl(queue, a=x_vec_host, flags="write_wrapper")
+    >>> knl = lp.set_options(knl, write_wrapper=True, write_cl=False)
+    >>> evt, (out,) = knl(queue, a=x_vec_host)
     from __future__ import division
     ...
     def invoke_loopy_kernel_loopy_kernel(cl_kernel, queue, allocator=None, wait_for=None, out_host=None, a=None, n=None, out=None):
@@ -341,7 +343,8 @@ Let us take a look at the generated code for the above kernel:
 
 .. doctest::
 
-    >>> evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl")
+    >>> knl = lp.set_options(knl, "write_cl")
+    >>> evt, (out,) = knl(queue, a=a_mat_dev)
     <BLANKLINE>
     #define lid(N) ((int) get_local_id(N))
     #define gid(N) ((int) get_group_id(N))
@@ -392,7 +395,8 @@ Now the intended code is generated and our test passes.
 
 .. doctest::
 
-    >>> evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl")
+    >>> knl = lp.set_options(knl, "write_cl")
+    >>> evt, (out,) = knl(queue, a=a_mat_dev)
     <BLANKLINE>
     #define lid(N) ((int) get_local_id(N))
     #define gid(N) ((int) get_group_id(N))
@@ -431,8 +435,9 @@ zero-fill kernel?
     ...     """)
 
 
+    >>> knl = lp.set_options(knl, "write_cl")
     >>> with IncludeWarningsInDoctest():
-    ...     evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl")
+    ...     evt, (out,) = knl(queue, a=a_mat_dev)
     <BLANKLINE>
     ...
       for (int i = 0; i <= (-1 + n); ++i)
@@ -460,7 +465,7 @@ ambiguous.
 .. doctest::
 
     >>> with IncludeWarningsInDoctest():
-    ...     evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl")
+    ...     evt, (out,) = knl(queue, a=a_mat_dev)
     <BLANKLINE>
     ...
       for (int j = 0; j <= (-1 + n); ++j)
@@ -512,7 +517,8 @@ Consider this example:
     ...     "{ [i]: 0<=i<n }",
     ...     "a[i] = 0", assumptions="n>=0")
     >>> knl = lp.split_iname(knl, "i", 16)
-    >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl")
+    >>> knl = lp.set_options(knl, "write_cl")
+    >>> evt, (out,) = knl(queue, a=x_vec_dev)
     <BLANKLINE>
     ...
       for (int i_outer = 0; i_outer <= (-1 + ((15 + n) / 16)); ++i_outer)
@@ -539,7 +545,7 @@ relation to loop nesting. For example, it's perfectly possible to request
 .. doctest::
 
     >>> knl = lp.set_loop_priority(knl, "i_inner,i_outer")
-    >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl")
+    >>> evt, (out,) = knl(queue, a=x_vec_dev)
     <BLANKLINE>
     ...
       for (int i_inner = 0; i_inner <= 15; ++i_inner)
@@ -564,7 +570,8 @@ commonly called 'loop tiling':
     >>> knl = lp.split_iname(knl, "i", 16)
     >>> knl = lp.split_iname(knl, "j", 16)
     >>> knl = lp.set_loop_priority(knl, "i_outer,j_outer,i_inner")
-    >>> evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl")
+    >>> knl = lp.set_options(knl, "write_cl")
+    >>> evt, (out,) = knl(queue, a=a_mat_dev)
     <BLANKLINE>
     ...
       for (int i_outer = 0; i_outer <= (-1 + ((15 + n) / 16)); ++i_outer)
@@ -602,7 +609,8 @@ loop's tag to ``"unr"``:
     >>> orig_knl = knl
     >>> knl = lp.split_iname(knl, "i", 4)
     >>> knl = lp.tag_inames(knl, dict(i_inner="unr"))
-    >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl")
+    >>> knl = lp.set_options(knl, "write_cl")
+    >>> evt, (out,) = knl(queue, a=x_vec_dev)
     <BLANKLINE>
     ...
       for (int i_outer = 0; i_outer <= (-1 + ((3 + n) / 4)); ++i_outer)
@@ -675,7 +683,8 @@ Let's try this out on our vector fill kernel by creating workgroups of size
     ...     "a[i] = 0", assumptions="n>=0")
     >>> knl = lp.split_iname(knl, "i", 128,
     ...         outer_tag="g.0", inner_tag="l.0")
-    >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl")
+    >>> knl = lp.set_options(knl, "write_cl")
+    >>> evt, (out,) = knl(queue, a=x_vec_dev)
     <BLANKLINE>
     ...
     __kernel void __attribute__ ((reqd_work_group_size(128, 1, 1))) loopy_kernel(__global float *restrict a, int const n)
@@ -696,7 +705,7 @@ those for us:
 
     >>> glob, loc = knl.get_grid_sizes()
     >>> print glob
-    (Aff("[n] -> { [([(127 + n)/128])] }"),)
+    (Aff("[n] -> { [(floor((127 + n)/128))] }"),)
     >>> print loc
     (Aff("[n] -> { [(128)] }"),)
 
@@ -720,7 +729,8 @@ assumption:
     >>> orig_knl = knl
     >>> knl = lp.split_iname(knl, "i", 4)
     >>> knl = lp.tag_inames(knl, dict(i_inner="unr"))
-    >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl")
+    >>> knl = lp.set_options(knl, "write_cl")
+    >>> evt, (out,) = knl(queue, a=x_vec_dev)
     <BLANKLINE>
     ...
       for (int i_outer = 0; i_outer <= (-1 + ((3 + n) / 4)); ++i_outer)
@@ -745,7 +755,8 @@ to :func:`split_iname`:
 
     >>> knl = orig_knl
     >>> knl = lp.split_iname(knl, "i", 4, slabs=(0, 1), inner_tag="unr")
-    >>> evt, (out,) = knl(queue, a=x_vec_dev, flags="write_cl")
+    >>> knl = lp.set_options(knl, "write_cl")
+    >>> evt, (out,) = knl(queue, a=x_vec_dev)
     <BLANKLINE>
       for (int i_outer = 0; i_outer <= (-1 + ((3 + n) / 4)); ++i_outer)
       {
@@ -878,7 +889,8 @@ When we ask to see the code, the issue becomes apparent:
 
 .. doctest::
 
-    >>> evt, (out,) = knl(queue, a=a_mat_dev, flags="write_cl")
+    >>> knl = lp.set_options(knl, "write_cl")
+    >>> evt, (out,) = knl(queue, a=a_mat_dev)
     <BLANKLINE>
     #define lid(N) ((int) get_local_id(N))
     #define gid(N) ((int) get_group_id(N))
diff --git a/examples/rank-one.py b/examples/rank-one.py
index c362b64cba17d0154ef881401938612670ae1932..726c44dea927c7dd9f7305fe59b12a27bc820269 100644
--- a/examples/rank-one.py
+++ b/examples/rank-one.py
@@ -14,7 +14,7 @@ knl = lp.make_kernel(queue.device,
 a = np.arange(200, dtype=np.float32)
 b = np.arange(200, dtype=np.float32)
 
-evt, (c,) = knl(queue, a=a, b=b, flags="write_cl")
+evt, (c,) = knl(queue, a=a, b=b, options="write_cl")
 # SETUPEND
 
 orig_knl = knl
@@ -26,7 +26,7 @@ knl = lp.split_iname(knl, "j", 16,
         outer_tag="g.1", inner_tag="l.1")
 # SPLITEND
 
-evt, (c,) = knl(queue, a=a, b=b, flags="write_cl")
+evt, (c,) = knl(queue, a=a, b=b, options="write_cl")
 
 split_knl = knl
 
@@ -35,7 +35,7 @@ knl = lp.add_prefetch(knl, "a")
 knl = lp.add_prefetch(knl, "b")
 # PREFETCH1END
 
-evt, (c,) = knl(queue, a=a, b=b, flags="write_cl")
+evt, (c,) = knl(queue, a=a, b=b, options="write_cl")
 
 knl = split_knl
 
@@ -44,7 +44,7 @@ knl = lp.add_prefetch(knl, "a", ["i_inner"])
 knl = lp.add_prefetch(knl, "b", ["j_inner"])
 # PREFETCH2END
 
-evt, (c,) = knl(queue, a=a, b=b, flags="write_cl")
+evt, (c,) = knl(queue, a=a, b=b, options="write_cl")
 
 knl = orig_knl
 
@@ -68,4 +68,4 @@ knl = lp.split_iname(knl, "a_dim_0", 16,
         outer_tag="l.1", inner_tag="l.0")
 # PREFETCH3END
 
-evt, (c,) = knl(queue, a=a, b=b, flags="write_cl")
+evt, (c,) = knl(queue, a=a, b=b, options="write_cl")
diff --git a/loopy/__init__.py b/loopy/__init__.py
index 0389fb8644872adde8ca30b3c17078ff7ec134be..406dadc73ccacd97761a9bd8c15bdd883019ecb6 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -63,7 +63,7 @@ from loopy.preprocess import (preprocess_kernel, realize_reduction,
 from loopy.schedule import generate_loop_schedules
 from loopy.codegen import generate_code
 from loopy.compiled import CompiledKernel
-from loopy.flags import Flags
+from loopy.options import Options
 from loopy.auto_test import auto_test_vs_ref
 
 __all__ = [
@@ -101,7 +101,7 @@ __all__ = [
 
         "auto_test_vs_ref",
 
-        "Flags",
+        "Options",
 
         "make_kernel",
 
@@ -1259,6 +1259,41 @@ def fix_parameters(kernel, **value_dict):
 # }}}
 
 
+# {{{ set_options
+
+def set_options(kernel, *args, **kwargs):
+    """Return a new kernel with the options given as keyword arguments, or from
+    a string representation passed in as the first (and only) positional
+    argument.
+
+    See also :class:`Options`.
+    """
+
+    if args and kwargs:
+        raise TypeError("cannot pass both positional and keyword arguments")
+
+    new_opt = kernel.options.copy()
+
+    if kwargs:
+        for key, val in kwargs.iteritems():
+            if not hasattr(new_opt, key):
+                raise ValueError("unknown option '%s'" % key)
+
+            setattr(new_opt, key, val)
+    else:
+        if len(args) != 1:
+            raise TypeError("exactly one positional argument is required if "
+                    "no keyword args are given")
+        arg, = args
+
+        from loopy.options import make_options
+        new_opt.update(make_options(arg))
+
+    return kernel.copy(options=new_opt)
+
+# }}}
+
+
 # {{{ library registration
 
 def register_preamble_generators(kernel, preamble_generators):
diff --git a/loopy/auto_test.py b/loopy/auto_test.py
index 03f7ac0a3e6132e028622ee680c17cccec4ec12e..239a69d8794167785d27f731707393a0360f6ca1 100644
--- a/loopy/auto_test.py
+++ b/loopy/auto_test.py
@@ -335,8 +335,7 @@ def _enumerate_cl_devices_for_ref_test():
 def auto_test_vs_ref(
         ref_knl, ctx, test_knl, op_count=[], op_label=[], parameters={},
         print_ref_code=False, print_code=True, warmup_rounds=2,
-        iflags=None, dump_binary=False,
-        options=[],
+        dump_binary=False,
         fills_entire_output=True, do_check=True, check_result=None
         ):
     """Compare results of `ref_knl` to the kernels generated by
@@ -400,7 +399,7 @@ def auto_test_vs_ref(
             ref_sched_kernel = knl
             break
 
-        ref_compiled = CompiledKernel(ref_ctx, ref_sched_kernel, options=options)
+        ref_compiled = CompiledKernel(ref_ctx, ref_sched_kernel)
         if print_ref_code:
             print 75*"-"
             print "Reference Code:"
@@ -488,8 +487,7 @@ def auto_test_vs_ref(
         from loopy.preprocess import infer_unknown_types
         kernel = infer_unknown_types(kernel, expect_completion=True)
 
-        compiled = CompiledKernel(ctx, kernel, options=options,
-                iflags=iflags)
+        compiled = CompiledKernel(ctx, kernel)
 
         if args is None:
             cl_kernel_info = compiled.cl_kernel_info(frozenset())
diff --git a/loopy/codegen/expression.py b/loopy/codegen/expression.py
index a996f7d1461adb8cd628116932a9dcbd6786e307..b5ff4aab46429b913d004e7384cd2678dd5d2756 100644
--- a/loopy/codegen/expression.py
+++ b/loopy/codegen/expression.py
@@ -368,7 +368,7 @@ class LoopyCCodeMapper(RecursiveMapper):
 
     def map_variable(self, expr, enclosing_prec, type_context):
         if expr.name in self.var_subst_map:
-            if self.kernel.flags.annotate_inames:
+            if self.kernel.options.annotate_inames:
                 return " /* %s */ %s" % (
                         expr.name,
                         self.rec(self.var_subst_map[expr.name],
diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py
index 793808960d152dcdc5d29f7552e5654906a39e7c..e10348a561a3ef101dd3fbed923947bd88f53cfa 100644
--- a/loopy/codegen/instruction.py
+++ b/loopy/codegen/instruction.py
@@ -98,7 +98,7 @@ def generate_expr_instruction_code(kernel, insn, codegen_state):
             ccm(expr, prec=None, type_context=dtype_to_type_context(target_dtype),
                 needed_dtype=target_dtype))
 
-    if kernel.flags.trace_assignments or kernel.flags.trace_assignment_values:
+    if kernel.options.trace_assignments or kernel.options.trace_assignment_values:
         from cgen import Statement as S
 
         gs, ls = kernel.get_grid_sizes()
@@ -122,7 +122,7 @@ def generate_expr_instruction_code(kernel, insn, codegen_state):
                     ccm(i, prec=None, type_context="i")
                     for i in assignee_indices)
 
-        if kernel.flags.trace_assignment_values:
+        if kernel.options.trace_assignment_values:
             if target_dtype.kind == "i":
                 printf_format += " = %d"
                 printf_args.append(lhs_code)
@@ -144,7 +144,7 @@ def generate_expr_instruction_code(kernel, insn, codegen_state):
                     printf_format, printf_args_str))
 
         from cgen import Block
-        if kernel.flags.trace_assignment_values:
+        if kernel.options.trace_assignment_values:
             result = Block([result, printf_insn])
         else:
             # print first, execute later -> helps find segfaults
diff --git a/loopy/compiled.py b/loopy/compiled.py
index 2fdcd2f3d1685330d93535667c0d25ef052c54f5..47bcac8b87fcf508821b40aea3149acf651a03a1 100644
--- a/loopy/compiled.py
+++ b/loopy/compiled.py
@@ -131,7 +131,7 @@ def python_dtype_str(dtype):
 
 # {{{ integer arg finding from shapes
 
-def generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, flags):
+def generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, options):
     # a mapping from integer argument names to a list of tuples
     # (arg_name, expression), where expression is a
     # unary function of kernel.arg_dict[arg_name]
@@ -199,7 +199,7 @@ def generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, flags):
 
 # {{{ integer arg finding from offsets
 
-def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags):
+def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, options):
     gen("# {{{ find integer arguments from offsets")
     gen("")
 
@@ -215,7 +215,7 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags)
                     gen("%s = 0" % arg.name)
                 gen("else:")
                 with Indentation(gen):
-                    if not flags.no_numpy:
+                    if not options.no_numpy:
                         gen("_lpy_offset = getattr(%s, \"offset\", 0)"
                                 % impl_array_name)
                     else:
@@ -223,7 +223,7 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags)
 
                     base_arg = kernel.impl_arg_to_arg[impl_array_name]
 
-                    if not flags.skip_arg_checks:
+                    if not options.skip_arg_checks:
                         gen("%s, _lpy_remdr = divmod(_lpy_offset, %d)"
                                 % (arg.name, base_arg.dtype.itemsize))
 
@@ -235,7 +235,7 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags)
                         gen("%s = _lpy_offset // %d)"
                                 % (arg.name, base_arg.dtype.itemsize))
 
-                    if not flags.skip_arg_checks:
+                    if not options.skip_arg_checks:
                         gen("del _lpy_offset")
 
     gen("# }}}")
@@ -246,7 +246,7 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags)
 
 # {{{ integer arg finding from strides
 
-def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags):
+def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, options):
     gen("# {{{ find integer arguments from strides")
     gen("")
 
@@ -256,7 +256,7 @@ def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags)
 
             gen("if %s is None:" % arg.name)
             with Indentation(gen):
-                if not flags.skip_arg_checks:
+                if not options.skip_arg_checks:
                     gen("if %s is None:" % impl_array_name)
                     with Indentation(gen):
                         gen("raise RuntimeError(\"required stride '%s' for "
@@ -266,7 +266,7 @@ def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags)
 
                     base_arg = kernel.impl_arg_to_arg[impl_array_name]
 
-                    if not flags.skip_arg_checks:
+                    if not options.skip_arg_checks:
                         gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)"
                                 % (arg.name, impl_array_name, stride_impl_axis,
                                     base_arg.dtype.itemsize))
@@ -290,7 +290,7 @@ def generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags)
 
 # {{{ value arg setup
 
-def generate_value_arg_setup(gen, kernel, impl_arg_info, flags):
+def generate_value_arg_setup(gen, kernel, impl_arg_info, options):
     import loopy as lp
     from loopy.kernel.array import ArrayBase
 
@@ -302,7 +302,7 @@ def generate_value_arg_setup(gen, kernel, impl_arg_info, flags):
         gen("# {{{ process %s" % arg.name)
         gen("")
 
-        if not flags.skip_arg_checks:
+        if not options.skip_arg_checks:
             gen("if %s is None:" % arg.name)
             with Indentation(gen):
                 gen("raise RuntimeError(\"input argument '%s' must "
@@ -329,7 +329,7 @@ def generate_value_arg_setup(gen, kernel, impl_arg_info, flags):
 
 # {{{ array arg setup
 
-def generate_array_arg_setup(gen, kernel, impl_arg_info, flags):
+def generate_array_arg_setup(gen, kernel, impl_arg_info, options):
     import loopy as lp
 
     from loopy.kernel.array import ArrayBase
@@ -339,7 +339,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags):
     gen("# {{{ set up array arguments")
     gen("")
 
-    if not flags.no_numpy:
+    if not options.no_numpy:
         gen("_lpy_encountered_numpy = False")
         gen("_lpy_encountered_dev = False")
         gen("")
@@ -356,7 +356,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags):
         if not issubclass(arg.arg_class, ArrayBase):
             continue
 
-        if not flags.no_numpy:
+        if not options.no_numpy:
             gen("if isinstance(%s, _lpy_np.ndarray):" % arg.name)
             with Indentation(gen):
                 gen("# synchronous, nothing to worry about")
@@ -370,21 +370,21 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags):
 
             gen("")
 
-        if not flags.skip_arg_checks and not is_written:
+        if not options.skip_arg_checks and not is_written:
             gen("if %s is None:" % arg.name)
             with Indentation(gen):
                 gen("raise RuntimeError(\"input argument '%s' must "
                         "be supplied\")" % arg.name)
                 gen("")
 
-        if is_written and arg.arg_class is lp.ImageArg and not flags.skip_arg_checks:
+        if is_written and arg.arg_class is lp.ImageArg and not options.skip_arg_checks:
             gen("if %s is None:" % arg.name)
             with Indentation(gen):
                 gen("raise RuntimeError(\"written image '%s' must "
                         "be supplied\")" % arg.name)
                 gen("")
 
-        if is_written and arg.shape is None and not flags.skip_arg_checks:
+        if is_written and arg.shape is None and not options.skip_arg_checks:
             gen("if %s is None:" % arg.name)
             with Indentation(gen):
                 gen("raise RuntimeError(\"written argument '%s' has "
@@ -413,7 +413,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags):
                     gen("_lpy_strides_%d = %s" % (i, strify(
                         itemsize*arg.unvec_strides[i])))
 
-                if not flags.skip_arg_checks:
+                if not options.skip_arg_checks:
                     for i in xrange(num_axes):
                         gen("assert _lpy_strides_%d > 0, "
                                 "\"'%s' has negative stride in axis %d\""
@@ -440,7 +440,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags):
                             strides=strify(sym_strides),
                             dtype=python_dtype_str(arg.dtype)))
 
-                if not flags.skip_arg_checks:
+                if not options.skip_arg_checks:
                     for i in xrange(num_axes):
                         gen("del _lpy_shape_%d" % i)
                         gen("del _lpy_strides_%d" % i)
@@ -455,7 +455,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags):
         # {{{ argument checking
 
         if arg.arg_class in [lp.GlobalArg, lp.ConstantArg] \
-                and not flags.skip_arg_checks:
+                and not options.skip_arg_checks:
             if possibly_made_by_loopy:
                 gen("if not _lpy_made_by_loopy:")
             else:
@@ -501,7 +501,7 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags):
 
         # }}}
 
-        if possibly_made_by_loopy and not flags.skip_arg_checks:
+        if possibly_made_by_loopy and not options.skip_arg_checks:
             gen("del _lpy_made_by_loopy")
             gen("")
 
@@ -520,10 +520,10 @@ def generate_array_arg_setup(gen, kernel, impl_arg_info, flags):
 # }}}
 
 
-def generate_invoker(kernel, impl_arg_info, flags):
+def generate_invoker(kernel, impl_arg_info, options):
     system_args = [
             "cl_kernel", "queue", "allocator=None", "wait_for=None",
-            # ignored if flags.no_numpy
+            # ignored if options.no_numpy
             "out_host=None"
             ]
 
@@ -545,12 +545,12 @@ def generate_invoker(kernel, impl_arg_info, flags):
         gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)")
     gen("")
 
-    generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, flags)
-    generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, flags)
-    generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, flags)
+    generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, options)
+    generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, options)
+    generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, options)
 
-    generate_value_arg_setup(gen, kernel, impl_arg_info, flags)
-    generate_array_arg_setup(gen, kernel, impl_arg_info, flags)
+    generate_value_arg_setup(gen, kernel, impl_arg_info, options)
+    generate_array_arg_setup(gen, kernel, impl_arg_info, options)
 
     # {{{ generate invocation
 
@@ -575,7 +575,7 @@ def generate_invoker(kernel, impl_arg_info, flags):
 
     # {{{ output
 
-    if not flags.no_numpy:
+    if not options.no_numpy:
         gen("if out_host is None and (_lpy_encountered_numpy "
                 "and not _lpy_encountered_dev):")
         with Indentation(gen):
@@ -591,7 +591,7 @@ def generate_invoker(kernel, impl_arg_info, flags):
 
         gen("")
 
-    if flags.return_dict:
+    if options.return_dict:
         gen("return _lpy_evt, {%s}"
                 % ", ".join("\"%s\": %s" % (arg.name, arg.name)
                     for arg in impl_arg_info
@@ -608,15 +608,15 @@ def generate_invoker(kernel, impl_arg_info, flags):
 
     # }}}
 
-    if flags.write_wrapper:
+    if options.write_wrapper:
         output = gen.get()
-        if flags.highlight_wrapper:
+        if options.highlight_wrapper:
             output = get_highlighted_python_code(output)
 
-        if flags.write_wrapper is True:
+        if options.write_wrapper is True:
             print output
         else:
-            with open(flags.write_wrapper, "w") as outf:
+            with open(options.write_wrapper, "w") as outf:
                 outf.write(output)
 
     return gen.get_function()
@@ -632,39 +632,17 @@ class _CLKernelInfo(Record):
 
 
 class CompiledKernel:
-    def __init__(self, context, kernel, options=[], codegen_kwargs={},
-            flags=None, iflags=None):
+    def __init__(self, context, kernel, codegen_kwargs={}):
         """
         :arg kernel: may be a loopy.LoopKernel, a generator returning kernels
             (a warning will be issued if more than one is returned). If the
             kernel has not yet been loop-scheduled, that is done, too, with no
             specific arguments.
-        :arg iflags: An :class:`loopy.Flags` instance, or a dictionary
-            of arguments with which a :class:`loopy.Flags` instance
-            can be initialized.
         """
 
         self.context = context
         self.codegen_kwargs = codegen_kwargs
-        self.options = list(options)
-
-        if flags is not None and iflags is not None:
-            raise TypeError("cannot specify flags and iflags at the same time")
-
-        if iflags is not None:
-            from warnings import warn
-            warn("The 'iflags' argument is deprecated", DeprecationWarning,
-                    stacklevel=2)
-
-            flags = iflags
-
-        from loopy.flags import make_flags
-        my_flags = kernel.flags.copy()
-        my_flags.update(make_flags(flags))
-
-        self.flags = my_flags
-
-        self.kernel = kernel.copy(flags=my_flags)
+        self.kernel = kernel
 
         self.packing_controller = SeparateArrayPackingController(kernel)
 
@@ -710,24 +688,24 @@ class CompiledKernel:
         from loopy.codegen import generate_code
         code, impl_arg_info = generate_code(kernel, **self.codegen_kwargs)
 
-        if self.flags.write_cl:
+        if self.kernel.options.write_cl:
             output = code
-            if self.flags.highlight_cl:
+            if self.kernel.options.highlight_cl:
                 output = get_highlighted_cl_code(output)
 
-            if self.flags.write_cl is True:
+            if self.kernel.options.write_cl is True:
                 print output
             else:
-                with open(self.flags.write_cl, "w") as outf:
+                with open(self.kernel.options.write_cl, "w") as outf:
                     outf.write(output)
 
-        if self.flags.edit_cl:
+        if self.kernel.options.edit_cl:
             from pytools import invoke_editor
             code = invoke_editor(code, "code.cl")
 
         cl_program = cl.Program(self.context, code)
         cl_kernel = getattr(
-                cl_program.build(options=self.options),
+                cl_program.build(options=kernel.options.cl_build_options),
                 kernel.name)
 
         return _CLKernelInfo(
@@ -735,7 +713,7 @@ class CompiledKernel:
                 cl_kernel=cl_kernel,
                 impl_arg_info=impl_arg_info,
                 invoker=generate_invoker(
-                    kernel, impl_arg_info, self.flags))
+                    kernel, impl_arg_info, self.kernel.options))
 
     # {{{ debugging aids
 
@@ -783,7 +761,7 @@ class CompiledKernel:
             are written as part of the kernel). The order is given
             by the order of kernel arguments. If this order is unspecified
             (such as when kernel arguments are inferred automatically),
-            enable :attr:`loopy.Flags.return_dict` to make *output* a
+            enable :attr:`loopy.Options.return_dict` to make *output* a
             :class:`dict` instead, with keys of argument names and values
             of the returned arrays.
         """
diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index b4c773418da0e12efdb158bb92e0b4562849b09e..e51a7b3d814f56f62b9ea636712efabcc6c72d5d 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -134,9 +134,9 @@ class LoopKernel(Record):
 
     .. attribute:: cache_manager
     .. attribute:: isl_context
-    .. attribute:: flags
+    .. attribute:: options
 
-        An instance of :class:`loopy.Flags`
+        An instance of :class:`loopy.Options`
     """
 
     # {{{ constructor
@@ -165,7 +165,7 @@ class LoopKernel(Record):
             cache_manager=None,
             index_dtype=np.int32,
             isl_context=None,
-            flags=None,
+            options=None,
 
             # When kernels get intersected in slab decomposition,
             # their grid sizes shouldn't change. This provides
@@ -269,7 +269,7 @@ class LoopKernel(Record):
                 symbol_manglers=symbol_manglers,
                 index_dtype=index_dtype,
                 isl_context=isl_context,
-                flags=flags)
+                options=options)
 
     # }}}
 
@@ -959,17 +959,12 @@ class LoopKernel(Record):
     # {{{ direct execution
 
     @memoize_method
-    def get_compiled_kernel(self, ctx, options, flags):
+    def get_compiled_kernel(self, ctx):
         from loopy.compiled import CompiledKernel
-        return CompiledKernel(ctx, self, options=options, flags=flags)
+        return CompiledKernel(ctx, self)
 
     def __call__(self, queue, **kwargs):
-        flags = kwargs.pop("flags", None)
-        options = kwargs.pop("options", ())
-
-        assert isinstance(options, tuple)
-
-        return self.get_compiled_kernel(queue.context, options, flags)(
+        return self.get_compiled_kernel(queue.context)(
                 queue, **kwargs)
 
     # }}}
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index dc9329dc0110d4b54db4ad50cf72bcdd0478b056..1d5c3d6680adde98fbb494479b229745c4ea8209 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -1000,7 +1000,7 @@ def make_kernel(device, domains, instructions, kernel_data=["..."], **kwargs):
         length 16.
     :arg silenced_warnings: a list (or semicolon-separated string) or warnings
         to silence
-    :arg flags: an instance of :class:`loopy.Flags` or an equivalent
+    :arg options: an instance of :class:`loopy.Options` or an equivalent
         string representation
     """
 
@@ -1008,10 +1008,20 @@ def make_kernel(device, domains, instructions, kernel_data=["..."], **kwargs):
     default_order = kwargs.pop("default_order", "C")
     default_offset = kwargs.pop("default_offset", 0)
     silenced_warnings = kwargs.pop("silenced_warnings", [])
+    options = kwargs.pop("options", None)
     flags = kwargs.pop("flags", None)
 
-    from loopy.flags import make_flags
-    flags = make_flags(flags)
+    if flags is not None:
+        if options is not None:
+            raise TypeError("may not pass both 'options' and 'flags'")
+
+        from warnings import warn
+        warn("'flags' is deprecated. Use 'options' instead",
+                DeprecationWarning, stacklevel=2)
+        options = flags
+
+    from loopy.options import make_options
+    options = make_options(options)
 
     if isinstance(silenced_warnings, str):
         silenced_warnings = silenced_warnings.split(";")
@@ -1094,7 +1104,7 @@ def make_kernel(device, domains, instructions, kernel_data=["..."], **kwargs):
     knl = LoopKernel(device, domains, instructions, kernel_args,
             temporary_variables=temporary_variables,
             silenced_warnings=silenced_warnings,
-            flags=flags,
+            options=options,
             **kwargs)
 
     check_for_nonexistent_iname_deps(knl)
diff --git a/loopy/flags.py b/loopy/options.py
similarity index 80%
rename from loopy/flags.py
rename to loopy/options.py
index 1ccaf7b3314cb76358c3d261ccc0d3e2916ff182..9e656ad63f52e4d613e63f2f71f7e5ccea434454 100644
--- a/loopy/flags.py
+++ b/loopy/options.py
@@ -27,12 +27,12 @@ from pytools import Record
 import re
 
 
-class Flags(Record):
+class Options(Record):
     """
-    Unless otherwise specified, these flags are Boolean-valued
+    Unless otherwise specified, these options are Boolean-valued
     (i.e. on/off).
 
-    .. rubric:: Code-generation flags
+    .. rubric:: Code-generation options
 
     .. attribute:: annotate_inames
 
@@ -51,7 +51,7 @@ class Flags(Record):
         Like :attr:`trace_assignments`, but also trace the
         assigned values.
 
-    .. rubric:: Invocation-related flags
+    .. rubric:: Invocation-related options
 
     .. attribute:: skip_arg_checks
 
@@ -100,11 +100,19 @@ class Flags(Record):
         :envvar:`EDITOR`) on the generated kernel code,
         allowing for tweaks before the code is passed on to
         the OpenCL implementation for compilation.
+
+    .. attribute:: cl_build_options
+
+        Options to pass to the OpenCL compiler when building the kernel.
+        A list of strings.
     """
 
     def __init__(
-            # All of these should default to False for the string-based
-            # interface of make_flags (below) to make sense.
+            # All Boolean flags in here should default to False for the
+            # string-based interface of make_options (below) to make sense.
+
+            # All defaults are further required to be False when cast to bool
+            # for the update() functionality to work.
 
             self,
 
@@ -115,7 +123,7 @@ class Flags(Record):
             skip_arg_checks=False, no_numpy=False, return_dict=False,
             write_wrapper=False, highlight_wrapper=False,
             write_cl=False, highlight_cl=False,
-            edit_cl=False
+            edit_cl=False, cl_build_options=[],
             ):
         Record.__init__(
                 self,
@@ -128,7 +136,7 @@ class Flags(Record):
                 return_dict=return_dict,
                 write_wrapper=write_wrapper, highlight_wrapper=highlight_wrapper,
                 write_cl=write_cl, highlight_cl=highlight_cl,
-                edit_cl=edit_cl,
+                edit_cl=edit_cl, cl_build_options=cl_build_options,
                 )
 
     def update(self, other):
@@ -139,12 +147,12 @@ class Flags(Record):
 KEY_VAL_RE = re.compile("^([a-zA-Z0-9]+)=(.*)$")
 
 
-def make_flags(flags_arg):
-    if flags_arg is None:
-        return Flags()
-    elif isinstance(flags_arg, str):
-        iflags_args = {}
-        for key_val in flags_arg.split(","):
+def make_options(options_arg):
+    if options_arg is None:
+        return Options()
+    elif isinstance(options_arg, str):
+        ioptions_args = {}
+        for key_val in options_arg.split(","):
             kv_match = KEY_VAL_RE.match(key_val)
             if kv_match is not None:
                 key = kv_match.group(1)
@@ -154,10 +162,10 @@ def make_flags(flags_arg):
                 except ValueError:
                     pass
 
-                iflags_args[key] = val
+                ioptions_args[key] = val
             else:
-                iflags_args[key_val] = True
+                ioptions_args[key_val] = True
 
-        return Flags(**iflags_args)
-    elif not isinstance(flags_arg, Flags):
-        return Flags(**flags_arg)
+        return Options(**ioptions_args)
+    elif not isinstance(options_arg, Options):
+        return Options(**options_arg)