diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 1bdf70c29bf8ed8bbf42b1fc5edfdeb411f64aaa..9d525605b7a41c39400cd4d4beb7e64e38d766f2 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -227,9 +227,9 @@ inspect that code, too, using :attr:`loopy.Options.write_wrapper`: <BLANKLINE> if n is None: if a is not None: - n = int(a.shape[0]) + n = a.shape[0] elif out is not None: - n = int(out.shape[0]) + n = out.shape[0] <BLANKLINE> # }}} ... diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index d117daf5109324a88b7c3a6c08bf2d9961ae3ae4..6eef793c7cf8294e7f8fad11a0757d59898d7ac7 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -456,7 +456,10 @@ def generate_code_v2(kernel): allow_complex=allow_complex, var_name_generator=kernel.get_var_name_generator(), is_generating_device_code=False, - gen_program_name=kernel.name, + gen_program_name=( + kernel.target.host_program_name_prefix + + kernel.name + + kernel.target.host_program_name_suffix), schedule_index_end=len(kernel.schedule)) from loopy.codegen.result import generate_host_or_device_program diff --git a/loopy/compiled.py b/loopy/compiled.py index 3ef3ee27dd77daff398303a1715882199b8ec6d4..55feff66a83cbca5dacfb6717e7cb94fc69ed51d 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -192,7 +192,7 @@ def generate_integer_arg_finding_from_shapes(gen, kernel, implemented_data_info) for arg_name, value_expr in sources: gen("%s %s is not None:" % (if_stmt, arg_name)) with Indentation(gen): - gen("%s = int(%s)" + gen("%s = %s" % (iarg_name, StringifyMapper()(value_expr))) if_stmt = "elif" @@ -226,10 +226,10 @@ def generate_integer_arg_finding_from_offsets(gen, kernel, implemented_data_info gen("else:") with Indentation(gen): if not options.no_numpy: - gen("_lpy_offset = int(getattr(%s, \"offset\", 0))" + gen("_lpy_offset = getattr(%s, \"offset\", 0)" % impl_array_name) else: - gen("_lpy_offset = int(%s.offset)" % impl_array_name) + gen("_lpy_offset = %s.offset" % impl_array_name) base_arg = kernel.impl_arg_to_arg[impl_array_name] @@ -287,9 +287,8 @@ def generate_integer_arg_finding_from_strides(gen, kernel, implemented_data_info "not divisible by its dtype itemsize\"" % (stride_impl_axis, impl_array_name)) gen("del _lpy_remdr") - gen("%s = int(%s)" % (arg.name, arg.name)) else: - gen("%s = int(_lpy_offset // %d)" + gen("%s = _lpy_offset // %d" % (arg.name, base_arg.dtype.itemsize)) gen("# }}}") @@ -542,8 +541,10 @@ def generate_arg_setup(gen, kernel, implemented_data_info, options): # }}} -def generate_invoker(kernel, implemented_data_info, host_code): +def generate_invoker(kernel, codegen_result): options = kernel.options + implemented_data_info = codegen_result.implemented_data_info + host_code = codegen_result.host_code() system_args = [ "_lpy_cl_kernels", "queue", "allocator=None", "wait_for=None", @@ -580,7 +581,7 @@ def generate_invoker(kernel, implemented_data_info, host_code): gen("_lpy_evt = {kernel_name}({args})" .format( - kernel_name=kernel.name, + kernel_name=codegen_result.host_program.name, args=", ".join( ["_lpy_cl_kernels", "queue"] + args @@ -754,11 +755,7 @@ class CompiledKernel: kernel=kernel, cl_kernels=cl_kernels, implemented_data_info=codegen_result.implemented_data_info, - invoker=generate_invoker( - kernel, - codegen_result.implemented_data_info, - codegen_result.host_code(), - )) + invoker=generate_invoker(kernel, codegen_result)) # {{{ debugging aids diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index b20967f67dac4482ddd3a0a148e35f6e9d744be8..3ec3a50b11f72a2975ac4366d495326bfcb69b37 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -81,7 +81,9 @@ class TargetBase(object): # }}} + host_program_name_prefix = "" host_program_name_suffix = "_outer" + device_program_name_prefix = "" device_program_name_suffix = "" def split_kernel_at_global_barriers(self): diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index d9f420405ad4c0905dd8c47554bb2cf1f24bd87f..493cef0634ea455f6a48c66e4cfbcb0da9b46572 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -29,6 +29,7 @@ import six import numpy as np # noqa from loopy.target import TargetBase, ASTBuilderBase, DummyHostASTBuilder from loopy.diagnostic import LoopyError +from cgen import Pointer from pytools import memoize_method @@ -176,6 +177,12 @@ class CTarget(TargetBase): # }}} +class _ConstRestrictPointer(Pointer): + def get_decl_pair(self): + sub_tp, sub_decl = self.subdecl.get_decl_pair() + return sub_tp, ("*const restrict %s" % sub_decl) + + class CASTBuilder(ASTBuilderBase): # {{{ library @@ -238,12 +245,7 @@ class CASTBuilder(ASTBuilderBase): base_storage_to_scope = {} base_storage_to_align_bytes = {} - from cgen import ArrayOf, Pointer, Initializer, AlignedAttribute, Value, Line - - class ConstRestrictPointer(Pointer): - def get_decl_pair(self): - sub_tp, sub_decl = self.subdecl.get_decl_pair() - return sub_tp, ("*const restrict %s" % sub_decl) + from cgen import ArrayOf, Initializer, AlignedAttribute, Value, Line for tv in sorted( six.itervalues(kernel.temporary_variables), @@ -286,8 +288,8 @@ class CASTBuilder(ASTBuilderBase): # not use them to shovel data from one representation to the # other. That counts, right? - cast_decl = ConstRestrictPointer(cast_decl) - temp_var_decl = ConstRestrictPointer(temp_var_decl) + cast_decl = _ConstRestrictPointer(cast_decl) + temp_var_decl = _ConstRestrictPointer(temp_var_decl) cast_tp, cast_d = cast_decl.get_decl_pair() temp_var_decl = Initializer( diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 7939dc764114387a2d1f2a10bf361fd7d06c13ba..779abc02e7d3aa86149128a686d45c4aada8e2b0 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -269,6 +269,9 @@ class PyOpenCLTarget(OpenCLTarget): warnings) and support for complex numbers. """ + host_program_name_prefix = "_lpy_host_" + host_program_name_suffix = "" + def __init__(self, device=None, pyopencl_module_name="_lpy_cl"): # This ensures the dtype registry is populated. import pyopencl.tools # noqa @@ -446,9 +449,14 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): Raise('RuntimeError("input argument \'{name}\' ' 'must be supplied")'.format(name=idi.name)))) - if sys.version_info < (2, 7) and idi.dtype.is_integral(): - gen(Comment("cast to long to avoid trouble with struct packing")) - gen(Assign(idi.name, "long(%s)" % idi.name)) + if idi.dtype.is_integral(): + gen(Comment("cast to Python int to avoid trouble with struct packing or Boost.Python")) + if sys.version_info < (3,): + py_type = "long" + else: + py_type = "int" + + gen(Assign(idi.name, "%s(%s)" % (py_type, idi.name))) gen(Line()) if idi.dtype.is_composite(): diff --git a/loopy/version.py b/loopy/version.py index 627329e2c54a62ed2af6358f1d7e94fd7e9624c5..ce1cf30894964e17a8831588d7aaac91b9cabb6a 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v29-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v32-islpy%s" % _islpy_version