diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index 1bdf70c29bf8ed8bbf42b1fc5edfdeb411f64aaa..9d525605b7a41c39400cd4d4beb7e64e38d766f2 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -227,9 +227,9 @@ inspect that code, too, using :attr:`loopy.Options.write_wrapper`:
     <BLANKLINE>
         if n is None:
             if a is not None:
-                n = int(a.shape[0])
+                n = a.shape[0]
             elif out is not None:
-                n = int(out.shape[0])
+                n = out.shape[0]
     <BLANKLINE>
         # }}}
     ...
diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py
index d117daf5109324a88b7c3a6c08bf2d9961ae3ae4..6eef793c7cf8294e7f8fad11a0757d59898d7ac7 100644
--- a/loopy/codegen/__init__.py
+++ b/loopy/codegen/__init__.py
@@ -456,7 +456,10 @@ def generate_code_v2(kernel):
             allow_complex=allow_complex,
             var_name_generator=kernel.get_var_name_generator(),
             is_generating_device_code=False,
-            gen_program_name=kernel.name,
+            gen_program_name=(
+                kernel.target.host_program_name_prefix
+                + kernel.name
+                + kernel.target.host_program_name_suffix),
             schedule_index_end=len(kernel.schedule))
 
     from loopy.codegen.result import generate_host_or_device_program
diff --git a/loopy/compiled.py b/loopy/compiled.py
index 3ef3ee27dd77daff398303a1715882199b8ec6d4..55feff66a83cbca5dacfb6717e7cb94fc69ed51d 100644
--- a/loopy/compiled.py
+++ b/loopy/compiled.py
@@ -192,7 +192,7 @@ def generate_integer_arg_finding_from_shapes(gen, kernel, implemented_data_info)
             for arg_name, value_expr in sources:
                 gen("%s %s is not None:" % (if_stmt, arg_name))
                 with Indentation(gen):
-                    gen("%s = int(%s)"
+                    gen("%s = %s"
                             % (iarg_name, StringifyMapper()(value_expr)))
 
                 if_stmt = "elif"
@@ -226,10 +226,10 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, implemented_data_info
                 gen("else:")
                 with Indentation(gen):
                     if not options.no_numpy:
-                        gen("_lpy_offset = int(getattr(%s, \"offset\", 0))"
+                        gen("_lpy_offset = getattr(%s, \"offset\", 0)"
                                 % impl_array_name)
                     else:
-                        gen("_lpy_offset = int(%s.offset)" % impl_array_name)
+                        gen("_lpy_offset = %s.offset" % impl_array_name)
 
                     base_arg = kernel.impl_arg_to_arg[impl_array_name]
 
@@ -287,9 +287,8 @@ def generate_integer_arg_finding_from_strides(gen, kernel, implemented_data_info
                                 "not divisible by its dtype itemsize\""
                                 % (stride_impl_axis, impl_array_name))
                         gen("del _lpy_remdr")
-                        gen("%s = int(%s)" % (arg.name, arg.name))
                     else:
-                        gen("%s = int(_lpy_offset // %d)"
+                        gen("%s = _lpy_offset // %d"
                                 % (arg.name, base_arg.dtype.itemsize))
 
     gen("# }}}")
@@ -542,8 +541,10 @@ def generate_arg_setup(gen, kernel, implemented_data_info, options):
 # }}}
 
 
-def generate_invoker(kernel, implemented_data_info, host_code):
+def generate_invoker(kernel, codegen_result):
     options = kernel.options
+    implemented_data_info = codegen_result.implemented_data_info
+    host_code = codegen_result.host_code()
 
     system_args = [
             "_lpy_cl_kernels", "queue", "allocator=None", "wait_for=None",
@@ -580,7 +581,7 @@ def generate_invoker(kernel, implemented_data_info, host_code):
 
     gen("_lpy_evt = {kernel_name}({args})"
             .format(
-                kernel_name=kernel.name,
+                kernel_name=codegen_result.host_program.name,
                 args=", ".join(
                     ["_lpy_cl_kernels", "queue"]
                     + args
@@ -754,11 +755,7 @@ class CompiledKernel:
                 kernel=kernel,
                 cl_kernels=cl_kernels,
                 implemented_data_info=codegen_result.implemented_data_info,
-                invoker=generate_invoker(
-                    kernel,
-                    codegen_result.implemented_data_info,
-                    codegen_result.host_code(),
-                    ))
+                invoker=generate_invoker(kernel, codegen_result))
 
     # {{{ debugging aids
 
diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py
index b20967f67dac4482ddd3a0a148e35f6e9d744be8..3ec3a50b11f72a2975ac4366d495326bfcb69b37 100644
--- a/loopy/target/__init__.py
+++ b/loopy/target/__init__.py
@@ -81,7 +81,9 @@ class TargetBase(object):
 
     # }}}
 
+    host_program_name_prefix = ""
     host_program_name_suffix = "_outer"
+    device_program_name_prefix = ""
     device_program_name_suffix = ""
 
     def split_kernel_at_global_barriers(self):
diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index d9f420405ad4c0905dd8c47554bb2cf1f24bd87f..493cef0634ea455f6a48c66e4cfbcb0da9b46572 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -29,6 +29,7 @@ import six
 import numpy as np  # noqa
 from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder
 from loopy.diagnostic import LoopyError
+from cgen import Pointer
 
 from pytools import memoize_method
 
@@ -176,6 +177,12 @@ class CTarget(TargetBase):
     # }}}
 
 
+class _ConstRestrictPointer(Pointer):
+    def get_decl_pair(self):
+        sub_tp, sub_decl = self.subdecl.get_decl_pair()
+        return sub_tp, ("*const restrict %s" % sub_decl)
+
+
 class CASTBuilder(ASTBuilderBase):
     # {{{ library
 
@@ -238,12 +245,7 @@ class CASTBuilder(ASTBuilderBase):
         base_storage_to_scope = {}
         base_storage_to_align_bytes = {}
 
-        from cgen import ArrayOf, Pointer, Initializer, AlignedAttribute, Value, Line
-
-        class ConstRestrictPointer(Pointer):
-            def get_decl_pair(self):
-                sub_tp, sub_decl = self.subdecl.get_decl_pair()
-                return sub_tp, ("*const restrict %s" % sub_decl)
+        from cgen import ArrayOf, Initializer, AlignedAttribute, Value, Line
 
         for tv in sorted(
                 six.itervalues(kernel.temporary_variables),
@@ -286,8 +288,8 @@ class CASTBuilder(ASTBuilderBase):
                     # not use them to shovel data from one representation to the
                     # other. That counts, right?
 
-                    cast_decl = ConstRestrictPointer(cast_decl)
-                    temp_var_decl = ConstRestrictPointer(temp_var_decl)
+                    cast_decl = _ConstRestrictPointer(cast_decl)
+                    temp_var_decl = _ConstRestrictPointer(temp_var_decl)
 
                     cast_tp, cast_d = cast_decl.get_decl_pair()
                     temp_var_decl = Initializer(
diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py
index 7939dc764114387a2d1f2a10bf361fd7d06c13ba..779abc02e7d3aa86149128a686d45c4aada8e2b0 100644
--- a/loopy/target/pyopencl.py
+++ b/loopy/target/pyopencl.py
@@ -269,6 +269,9 @@ class PyOpenCLTarget(OpenCLTarget):
     warnings) and support for complex numbers.
     """
 
+    host_program_name_prefix = "_lpy_host_"
+    host_program_name_suffix = ""
+
     def __init__(self, device=None, pyopencl_module_name="_lpy_cl"):
         # This ensures the dtype registry is populated.
         import pyopencl.tools  # noqa
@@ -446,9 +449,14 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info):
                 Raise('RuntimeError("input argument \'{name}\' '
                         'must be supplied")'.format(name=idi.name))))
 
-        if sys.version_info < (2, 7) and idi.dtype.is_integral():
-            gen(Comment("cast to long to avoid trouble with struct packing"))
-            gen(Assign(idi.name, "long(%s)" % idi.name))
+        if idi.dtype.is_integral():
+            gen(Comment("cast to Python int to avoid trouble with struct packing or Boost.Python"))
+            if sys.version_info < (3,):
+                py_type = "long"
+            else:
+                py_type = "int"
+
+            gen(Assign(idi.name, "%s(%s)" % (py_type, idi.name)))
             gen(Line())
 
         if idi.dtype.is_composite():
diff --git a/loopy/version.py b/loopy/version.py
index 627329e2c54a62ed2af6358f1d7e94fd7e9624c5..ce1cf30894964e17a8831588d7aaac91b9cabb6a 100644
--- a/loopy/version.py
+++ b/loopy/version.py
@@ -32,4 +32,4 @@ except ImportError:
 else:
     _islpy_version = islpy.version.VERSION_TEXT
 
-DATA_MODEL_VERSION = "v29-islpy%s" % _islpy_version
+DATA_MODEL_VERSION = "v32-islpy%s" % _islpy_version