diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 6a7a977a1fdb6a7e4726da17391dfab0f3b09616..25082f88a10a7e3276c1ac73251633ee9ac93e29 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1208,7 +1208,7 @@ happens when the instruction schedule is generated. To see the schedule, we should call :func:`loopy.get_one_scheduled_kernel`: >>> prog = lp.preprocess_kernel(prog) - >>> knl = lp.get_one_scheduled_kernel(prog.root_kernel, prog.program_callables_info) + >>> knl = lp.get_one_scheduled_kernel(prog.root_kernel, prog.callables_table) >>> prog = prog.with_root_kernel(knl) >>> print(prog) --------------------------------------------------------------------------- @@ -1240,7 +1240,7 @@ that :func:`loopy.get_one_scheduled_kernel` needs to be called one more time to put those instructions into the schedule. >>> prog = lp.save_and_reload_temporaries(prog) - >>> knl = lp.get_one_scheduled_kernel(prog.root_kernel, prog.program_callables_info) # Schedule added instructions + >>> knl = lp.get_one_scheduled_kernel(prog.root_kernel, prog.callables_table) # Schedule added instructions >>> prog = prog.with_root_kernel(knl) >>> print(prog) --------------------------------------------------------------------------- diff --git a/examples/python/global_barrier_removal.py b/examples/python/global_barrier_removal.py index cc4926feeeb3815da1c66a2548bf3235df8f1fcc..884fb0bd19c48fe15bd12898e4b684990c986ff4 100644 --- a/examples/python/global_barrier_removal.py +++ b/examples/python/global_barrier_removal.py @@ -24,7 +24,7 @@ from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) from loopy.schedule import get_one_scheduled_kernel -knl = get_one_scheduled_kernel(knl.root_kernel, knl.program_callables_info) +knl = get_one_scheduled_kernel(knl.root_kernel, knl.callables_table) # map schedule onto host or device print(knl) diff --git a/loopy/check.py b/loopy/check.py index bfcd7aa2632a719dc5be23e393a43cc21207c14c..64cf80a4e74c38ebac0b29cd0dc460c030baa130 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -206,7 +206,7 @@ def check_multiple_tags_allowed(kernel): "tags: {1}".format(iname, tags)) -def check_for_double_use_of_hw_axes(kernel, program_callables_info): +def check_for_double_use_of_hw_axes(kernel, callables_table): from loopy.kernel.data import UniqueTag from loopy.kernel.instruction import CallInstruction from loopy.kernel.function_interface import CallableKernel @@ -224,7 +224,7 @@ def check_for_double_use_of_hw_axes(kernel, program_callables_info): # check usage of iname tags in the callee kernel if isinstance(insn, CallInstruction): - in_knl_callable = program_callables_info[ + in_knl_callable = callables_table[ insn.expression.function.name] if isinstance(in_knl_callable, CallableKernel): # check for collision in iname_tag keys in the instruction @@ -712,13 +712,13 @@ def check_variable_access_ordered(kernel): # }}} -def pre_schedule_checks(kernel, program_callables_info): +def pre_schedule_checks(kernel, callables_table): try: logger.debug("%s: pre-schedule check: start" % kernel.name) check_for_duplicate_insn_ids(kernel) check_for_orphaned_user_hardware_axes(kernel) - check_for_double_use_of_hw_axes(kernel, program_callables_info) + check_for_double_use_of_hw_axes(kernel, callables_table) check_insn_attributes(kernel) check_loop_priority_inames_known(kernel) check_multiple_tags_allowed(kernel) @@ -746,7 +746,7 @@ def pre_schedule_checks(kernel, program_callables_info): # {{{ check for unused hw axes -def _check_for_unused_hw_axes_in_kernel_chunk(kernel, program_callables_info, +def _check_for_unused_hw_axes_in_kernel_chunk(kernel, callables_table, sched_index=None): from loopy.schedule import (CallKernel, RunInstruction, Barrier, EnterLoop, LeaveLoop, ReturnFromKernel, @@ -763,7 +763,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, program_callables_info, _, past_end_i = gather_schedule_block(kernel.schedule, sched_index) group_size, local_size = kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at(kernel.schedule, sched_index), - program_callables_info) + callables_table) group_axes = set(ax for ax, length in enumerate(group_size)) local_axes = set(ax for ax, length in enumerate(local_size)) @@ -781,7 +781,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, program_callables_info, sched_item = kernel.schedule[i] if isinstance(sched_item, CallKernel): i = _check_for_unused_hw_axes_in_kernel_chunk(kernel, - program_callables_info, i) + callables_table, i) elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] @@ -832,10 +832,10 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, program_callables_info, return past_end_i -def check_for_unused_hw_axes_in_insns(kernel, program_callables_info): +def check_for_unused_hw_axes_in_insns(kernel, callables_table): if kernel.schedule: _check_for_unused_hw_axes_in_kernel_chunk(kernel, - program_callables_info) + callables_table) # }}} @@ -989,15 +989,15 @@ def check_that_shapes_and_strides_are_arguments(kernel): # }}} -def pre_codegen_checks(kernel, program_callables_info): +def pre_codegen_checks(kernel, callables_table): try: logger.debug("pre-codegen check %s: start" % kernel.name) - check_for_unused_hw_axes_in_insns(kernel, program_callables_info) + check_for_unused_hw_axes_in_insns(kernel, callables_table) check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel) check_that_temporaries_are_defined_in_subkernels_where_used(kernel) check_that_all_insns_are_scheduled(kernel) - kernel.target.pre_codegen_check(kernel, program_callables_info) + kernel.target.pre_codegen_check(kernel, callables_table) check_that_shapes_and_strides_are_arguments(kernel) logger.debug("pre-codegen check %s: done" % kernel.name) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index d0b19a1eb3cf3ad835f41ccf276c719563a86c77..250e7215a83148f8731f91c4a6b155745c37ce0c 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -192,16 +192,16 @@ class CodeGenerationState(object): .. attribute:: schedule_index_end - .. attribute:: program_callables_info + .. attribute:: callables_table - An instance of :class:`loopy.ProgramCallablesInfo`. + An instance of :class:`loopy.CallablesTable`. """ def __init__(self, kernel, implemented_data_info, implemented_domain, implemented_predicates, seen_dtypes, seen_functions, seen_atomic_dtypes, var_subst_map, allow_complex, - program_callables_info, + callables_table, vectorization_info=None, var_name_generator=None, is_generating_device_code=None, gen_program_name=None, @@ -215,7 +215,7 @@ class CodeGenerationState(object): self.seen_atomic_dtypes = seen_atomic_dtypes self.var_subst_map = var_subst_map.copy() self.allow_complex = allow_complex - self.program_callables_info = program_callables_info + self.callables_table = callables_table self.vectorization_info = vectorization_info self.var_name_generator = var_name_generator self.is_generating_device_code = is_generating_device_code @@ -263,7 +263,7 @@ class CodeGenerationState(object): seen_atomic_dtypes=self.seen_atomic_dtypes, var_subst_map=var_subst_map or self.var_subst_map, allow_complex=self.allow_complex, - program_callables_info=self.program_callables_info, + callables_table=self.callables_table, vectorization_info=vectorization_info, var_name_generator=self.var_name_generator, is_generating_device_code=is_generating_device_code, @@ -385,19 +385,19 @@ class PreambleInfo(ImmutableRecord): # {{{ main code generation entrypoint -def generate_code_for_a_single_kernel(kernel, program_callables_info): +def generate_code_for_a_single_kernel(kernel, callables_table): """ :returns: a :class:`CodeGenerationResult` :param kernel: An instance of :class:`loopy.LoopKernel`. - :param program_callables_info: An instance of - :class:`loopy.ProgramCallablesInfo`. + :param callables_table: An instance of + :class:`loopy.CallablesTable`. """ from loopy.kernel import KernelState if kernel.schedule is None: from loopy.schedule import get_one_scheduled_kernel - kernel = get_one_scheduled_kernel(kernel, program_callables_info) + kernel = get_one_scheduled_kernel(kernel, callables_table) if kernel.state != KernelState.SCHEDULED: raise LoopyError("cannot generate code for a kernel that has not been " @@ -419,7 +419,7 @@ def generate_code_for_a_single_kernel(kernel, program_callables_info): # }}} from loopy.check import pre_codegen_checks - pre_codegen_checks(kernel, program_callables_info) + pre_codegen_checks(kernel, callables_table) logger.info("%s: generate code: start" % kernel.name) @@ -479,7 +479,7 @@ def generate_code_for_a_single_kernel(kernel, program_callables_info): + kernel.name + kernel.target.host_program_name_suffix), schedule_index_end=len(kernel.schedule), - program_callables_info=program_callables_info) + callables_table=callables_table) from loopy.codegen.result import generate_host_or_device_program @@ -556,17 +556,17 @@ def generate_code_v2(program): codegen_results = {} - for func_id, in_knl_callable in program.program_callables_info.items(): + for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): codegen_results[func_id] = ( generate_code_for_a_single_kernel(in_knl_callable.subkernel, - program.program_callables_info)) + program.callables_table)) device_preambles = set() for cgr in codegen_results.values(): device_preambles.update(cgr.device_preambles) - for in_knl_callable in program.program_callables_info.values(): + for in_knl_callable in program.callables_table.values(): for preamble in in_knl_callable.generate_preambles(program.target): device_preambles.update([preamble]) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 90bdbda31410e1d61b2a3e2f504a8dfcb74c23ed..81a672a14e9da6294ceb593ddb379fafa15bb134 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -116,7 +116,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at(kernel.schedule, sched_index), - codegen_state.program_callables_info) + codegen_state.callables_table) return merge_codegen_results(codegen_state, [ codegen_result, diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 39cf20c7db3c36d7ad098f6b494d5a4c0020b97e..c282de79bf1d62b07c6177f5f3fd0472e59e1775 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -248,7 +248,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, return next_func(codegen_state) global_size, local_size = kernel.get_grid_sizes_for_insn_ids( - insn_ids_for_block, codegen_state.program_callables_info) + insn_ids_for_block, codegen_state.callables_table) hw_inames_left = hw_inames_left[:] iname = hw_inames_left.pop() diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 410f13322cc702e82ee2b8fd620b677782b898c4..70079d31874922e87a25f62ed889ee2b6e9e2602 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -1036,7 +1036,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): constants_only=True))) @memoize_method - def get_grid_sizes_for_insn_ids(self, insn_ids, program_callables_info, + def get_grid_sizes_for_insn_ids(self, insn_ids, callables_table, ignore_auto=False): """Return a tuple (global_size, local_size) containing a grid that could accommodate execution of all instructions whose IDs are given @@ -1048,7 +1048,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): if self.overridden_get_grid_sizes_for_insn_ids: return self.overridden_get_grid_sizes_for_insn_ids( insn_ids, - program_callables_info, + callables_table, ignore_auto=ignore_auto) all_inames_by_insns = set() @@ -1135,7 +1135,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): @memoize_method def get_grid_sizes_for_insn_ids_as_exprs(self, insn_ids, - program_callables_info, ignore_auto=False): + callables_table, ignore_auto=False): """Return a tuple (global_size, local_size) containing a grid that could accommodate execution of all instructions whose IDs are given in *insn_ids*. @@ -1146,7 +1146,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): """ grid_size, group_size = self.get_grid_sizes_for_insn_ids( - insn_ids, program_callables_info, ignore_auto) + insn_ids, callables_table, ignore_auto) def tup_to_exprs(tup): from loopy.symbolic import pw_aff_to_expr @@ -1154,7 +1154,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): return tup_to_exprs(grid_size), tup_to_exprs(group_size) - def get_grid_size_upper_bounds(self, program_callables_info, ignore_auto=False): + def get_grid_size_upper_bounds(self, callables_table, ignore_auto=False): """Return a tuple (global_size, local_size) containing a grid that could accommodate execution of *all* instructions in the kernel. @@ -1162,10 +1162,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): """ return self.get_grid_sizes_for_insn_ids( frozenset(insn.id for insn in self.instructions), - program_callables_info, + callables_table, ignore_auto=ignore_auto) - def get_grid_size_upper_bounds_as_exprs(self, program_callables_info, + def get_grid_size_upper_bounds_as_exprs(self, callables_table, ignore_auto=False): """Return a tuple (global_size, local_size) containing a grid that could accommodate execution of *all* instructions in the kernel. @@ -1175,7 +1175,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): return self.get_grid_sizes_for_insn_ids_as_exprs( frozenset(insn.id for insn in self.instructions), - program_callables_info, + callables_table, ignore_auto=ignore_auto) # }}} diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 268bdaa1c12a5d87891bbcd9748dccdb5e9fc235..362fbcefcf32ef31db54683ac42921cf242e032b 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -157,7 +157,7 @@ class GridOverrideForCalleeKernel(ImmutableRecord): self.local_size = local_size self.global_size = global_size - def __call__(self, insn_ids, program_callables_info, ignore_auto=True): + def __call__(self, insn_ids, callables_table, ignore_auto=True): return self.local_size, self.global_size # }}} @@ -214,7 +214,7 @@ class InKernelCallable(ImmutableRecord): update_persistent_hash = LoopKernel.update_persistent_hash - def with_types(self, arg_id_to_dtype, caller_kernel, program_callables_info): + def with_types(self, arg_id_to_dtype, caller_kernel, callables_table): """ :arg arg_id_to_type: a mapping from argument identifiers (integers for positional arguments, names for keyword @@ -234,7 +234,7 @@ class InKernelCallable(ImmutableRecord): raise NotImplementedError() - def with_descrs(self, arg_id_to_descr, program_callables_info): + def with_descrs(self, arg_id_to_descr, callables_table): """ :arg arg_id_to_descr: a mapping from argument identifiers (integers for positional arguments, names for keyword @@ -363,16 +363,16 @@ class ScalarCallable(InKernelCallable): return (self.arg_id_to_dtype, self.arg_id_to_descr, self.name_in_target) - def with_types(self, arg_id_to_dtype, caller_kernel, program_callables_info): + def with_types(self, arg_id_to_dtype, caller_kernel, callables_table): raise LoopyError("No type inference information present for " "the function %s." % (self.name)) - def with_descrs(self, arg_id_to_descr, program_callables_info): + def with_descrs(self, arg_id_to_descr, callables_table): arg_id_to_descr[-1] = ValueArgDescriptor() return ( self.copy(arg_id_to_descr=arg_id_to_descr), - program_callables_info) + callables_table) def with_hw_axes_sizes(self, global_size, local_size): return self.copy() @@ -564,7 +564,7 @@ class ManglerCallable(ScalarCallable): return (self.name, self.function_mangler, self.arg_id_to_dtype, self.arg_id_to_descr, self.name_in_target) - def with_types(self, arg_id_to_dtype, kernel, program_callables_info): + def with_types(self, arg_id_to_dtype, kernel, callables_table): if self.arg_id_to_dtype is not None: # specializing an already specialized function. for arg_id, dtype in arg_id_to_dtype.items(): @@ -588,7 +588,7 @@ class ManglerCallable(ScalarCallable): return ( self.copy(name_in_target=mangle_result.target_name, arg_id_to_dtype=new_arg_id_to_dtype), - program_callables_info) + callables_table) else: # The function mangler does not agree with the arg id to dtypes # provided. Indicating that is illegal. diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 3f4defc565e676ab87b7261f370996e4dd04c8d1..006ac6ba365b02ef4c3f0fa205f47a41e1e24791 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -755,7 +755,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): # }}} -def assign_automatic_axes(kernel, program_callables_info, axis=0, local_size=None): +def assign_automatic_axes(kernel, callables_table, axis=0, local_size=None): logger.debug("%s: assign automatic axes" % kernel.name) # TODO: do the tag removal rigorously, might be easier after switching # to set() from tuple() @@ -769,7 +769,7 @@ def assign_automatic_axes(kernel, program_callables_info, axis=0, local_size=Non if local_size is None: _, local_size = kernel.get_grid_size_upper_bounds_as_exprs( - program_callables_info, ignore_auto=True) + callables_table, ignore_auto=True) # {{{ axis assignment helper function @@ -797,7 +797,7 @@ def assign_automatic_axes(kernel, program_callables_info, axis=0, local_size=Non return assign_automatic_axes( kernel.copy(iname_to_tags=new_iname_to_tags), - program_callables_info, + callables_table, axis=recursion_axis) if axis is None: @@ -849,7 +849,7 @@ def assign_automatic_axes(kernel, program_callables_info, axis=0, local_size=Non iname, inner_length=local_size[axis], outer_tag=None, inner_tag=new_tag, do_tagged_check=False), - program_callables_info=program_callables_info, + callables_table=callables_table, axis=recursion_axis, local_size=local_size) if not kernel.iname_tags_of_type(iname, AutoLocalIndexTagBase): @@ -871,7 +871,7 @@ def assign_automatic_axes(kernel, program_callables_info, axis=0, local_size=Non del new_iname_to_tags[iname] return assign_automatic_axes(kernel.copy(iname_to_tags=new_iname_to_tags), - program_callables_info, axis=recursion_axis, local_size=local_size) + callables_table, axis=recursion_axis, local_size=local_size) # }}} @@ -940,7 +940,7 @@ def assign_automatic_axes(kernel, program_callables_info, axis=0, local_size=Non return kernel else: return assign_automatic_axes(kernel, - program_callables_info=program_callables_info, axis=axis+1, + callables_table=callables_table, axis=axis+1, local_size=local_size) # }}} diff --git a/loopy/library/function.py b/loopy/library/function.py index f3fb5f8cd41b79425f9e04e9bbd173c6539fa8b1..f225b62f9f77b889c7137d69ff7e3944268641fa 100644 --- a/loopy/library/function.py +++ b/loopy/library/function.py @@ -26,33 +26,33 @@ from loopy.kernel.function_interface import ScalarCallable class MakeTupleCallable(ScalarCallable): - def with_types(self, arg_id_to_dtype, kernel, program_callables_info): + def with_types(self, arg_id_to_dtype, kernel, callables_table): new_arg_id_to_dtype = arg_id_to_dtype.copy() for i in range(len(arg_id_to_dtype)): if i in arg_id_to_dtype and arg_id_to_dtype[i] is not None: new_arg_id_to_dtype[-i-1] = new_arg_id_to_dtype[i] return (self.copy(arg_id_to_dtype=new_arg_id_to_dtype, - name_in_target="loopy_make_tuple"), program_callables_info) + name_in_target="loopy_make_tuple"), callables_table) - def with_descrs(self, arg_id_to_descr, program_callables_info): + def with_descrs(self, arg_id_to_descr, callables_table): from loopy.kernel.function_interface import ValueArgDescriptor new_arg_id_to_descr = dict(((id, ValueArgDescriptor()), (-id-1, ValueArgDescriptor())) for id in arg_id_to_descr.keys()) return ( self.copy(arg_id_to_descr=new_arg_id_to_descr), - program_callables_info) + callables_table) class IndexOfCallable(ScalarCallable): - def with_types(self, arg_id_to_dtype, kernel, program_callables_info): + def with_types(self, arg_id_to_dtype, kernel, callables_table): new_arg_id_to_dtype = dict((i, dtype) for i, dtype in arg_id_to_dtype.items() if dtype is not None) new_arg_id_to_dtype[-1] = kernel.index_dtype return (self.copy(arg_id_to_dtype=new_arg_id_to_dtype), - program_callables_info) + callables_table) def loopy_specific_callable_func_id_to_knl_callable_mappers(target, identifier): diff --git a/loopy/library/random123.py b/loopy/library/random123.py index 397e985b41aec4477cab9d767242b9569d3cb27d..e59a892bb4c7b3bd7222bf61b29e0ade92195240 100644 --- a/loopy/library/random123.py +++ b/loopy/library/random123.py @@ -169,14 +169,14 @@ class Random123Callable(ScalarCallable): Records information about for the random123 functions. """ - def with_types(self, arg_id_to_dtype, kernel, program_callables_info): + def with_types(self, arg_id_to_dtype, kernel, callables_table): if 0 not in arg_id_to_dtype or 1 not in arg_id_to_dtype or ( arg_id_to_dtype[0] is None or arg_id_to_dtype[1] is None): # the types provided aren't mature enough to specialize the # callable return (self.copy(), - program_callables_info) + callables_table) name = self.name target = kernel.target @@ -195,7 +195,7 @@ class Random123Callable(ScalarCallable): return ( self.copy(arg_id_to_dtype=new_arg_id_to_dtype, name_in_target=fn+"_gen"), - program_callables_info) + callables_table) elif name == fn + "_f32": new_arg_id_to_dtype = {-1: target.vector_dtype(NumpyType(np.float32), @@ -203,7 +203,7 @@ class Random123Callable(ScalarCallable): -2: ctr_dtype, 0: ctr_dtype, 1: key_dtype} return self.copy(arg_id_to_dtype=new_arg_id_to_dtype, - name_in_target=name), program_callables_info + name_in_target=name), callables_table elif name == fn + "_f64": new_arg_id_to_dtype = {-1: target.vector_dtype(NumpyType(np.float64), @@ -211,10 +211,10 @@ class Random123Callable(ScalarCallable): -2: ctr_dtype, 0: ctr_dtype, 1: key_dtype} return self.copy(arg_id_to_dtype=new_arg_id_to_dtype, - name_in_target=name), program_callables_info + name_in_target=name), callables_table return (self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) def generate_preambles(self, target): rng_variant = FUNC_NAMES_TO_RNG[self.name] diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 70df864d456de4212138e2bd4e0dbd1f61cd4fcb..7c32d0bed9c233cb30ca2130170e374a5cb54ea2 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -424,7 +424,7 @@ def parse_reduction_op(name): # {{{ reduction specific callables class ReductionCallable(ScalarCallable): - def with_types(self, arg_id_to_dtype, kernel, program_callables_info): + def with_types(self, arg_id_to_dtype, kernel, callables_table): scalar_dtype = arg_id_to_dtype[0] index_dtype = arg_id_to_dtype[1] result_dtypes = self.name.reduction_op.result_dtypes(kernel, scalar_dtype, @@ -436,15 +436,15 @@ class ReductionCallable(ScalarCallable): index_dtype) + "_op" return self.copy(arg_id_to_dtype=new_arg_id_to_dtype, - name_in_target=name_in_target), program_callables_info + name_in_target=name_in_target), callables_table - def with_descr(self, arg_id_to_descr, program_callables_info): + def with_descr(self, arg_id_to_descr, callables_table): from loopy.library.kernel.function_interface import ValueArgDescriptor new_arg_id_to_descr = arg_id_to_descr.copy() new_arg_id_to_descr[-1] = ValueArgDescriptor() return ( self.copy(arg_id_to_descr=arg_id_to_descr), - program_callables_info) + callables_table) def generate_preambles(self, target): if isinstance(self.name, ArgExtOp): diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 1042c857da1b309a41198a2957703b92676d4025..85b0c6d4893b7e3f6c86d484de521e10ce65848c 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -890,7 +890,7 @@ def _insert_subdomain_into_domain_tree(kernel, domains, subdomain): # }}} -def realize_reduction_for_single_kernel(kernel, program_callables_info, +def realize_reduction_for_single_kernel(kernel, callables_table, insn_id_filter=None, unknown_types_ok=True, automagic_scans_ok=False, force_scan=False, force_outer_iname_for_scan=None): """Rewrites reductions into their imperative form. With *insn_id_filter* @@ -1012,7 +1012,7 @@ def realize_reduction_for_single_kernel(kernel, program_callables_info, # {{{ sequential - def map_reduction_seq(expr, rec, program_callables_info, nresults, arg_dtypes, + def map_reduction_seq(expr, rec, callables_table, nresults, arg_dtypes, reduction_dtypes): outer_insn_inames = temp_kernel.insn_inames(insn) @@ -1130,7 +1130,7 @@ def realize_reduction_for_single_kernel(kernel, program_callables_info, v[iname].lt_set(v[0] + ubound)).get_basic_sets() return bs - def map_reduction_local(expr, rec, program_callables_info, nresults, arg_dtypes, + def map_reduction_local(expr, rec, callables_table, nresults, arg_dtypes, reduction_dtypes): red_iname, = expr.inames @@ -1370,7 +1370,7 @@ def realize_reduction_for_single_kernel(kernel, program_callables_info, # {{{ sequential scan - def map_scan_seq(expr, rec, program_callables_info, nresults, arg_dtypes, + def map_scan_seq(expr, rec, callables_table, nresults, arg_dtypes, reduction_dtypes, sweep_iname, scan_iname, sweep_min_value, scan_min_value, stride): outer_insn_inames = temp_kernel.insn_inames(insn) @@ -1459,7 +1459,7 @@ def realize_reduction_for_single_kernel(kernel, program_callables_info, # {{{ local-parallel scan - def map_scan_local(expr, rec, program_callables_info, nresults, arg_dtypes, + def map_scan_local(expr, rec, callables_table, nresults, arg_dtypes, reduction_dtypes, sweep_iname, scan_iname, sweep_min_value, scan_min_value, stride): @@ -1468,7 +1468,7 @@ def realize_reduction_for_single_kernel(kernel, program_callables_info, assert scan_size > 0 if scan_size == 1: - return map_reduction_seq(expr, rec, program_callables_info, + return map_reduction_seq(expr, rec, callables_table, nresults, arg_dtypes, reduction_dtypes) outer_insn_inames = temp_kernel.insn_inames(insn) @@ -1668,15 +1668,15 @@ def realize_reduction_for_single_kernel(kernel, program_callables_info, # {{{ seq/par dispatch - def map_reduction(expr, rec, program_callables_info, nresults=1): + def map_reduction(expr, rec, callables_table, nresults=1): # Only expand one level of reduction at a time, going from outermost to # innermost. Otherwise we get the (iname + insn) dependencies wrong. from loopy.type_inference import ( infer_arg_and_reduction_dtypes_for_reduction_expression) - arg_dtypes, reduction_dtypes, program_callables_info = ( + arg_dtypes, reduction_dtypes, callables_table = ( infer_arg_and_reduction_dtypes_for_reduction_expression( - temp_kernel, expr, program_callables_info, unknown_types_ok)) + temp_kernel, expr, callables_table, unknown_types_ok)) outer_insn_inames = temp_kernel.insn_inames(insn) bad_inames = frozenset(expr.inames) & outer_insn_inames @@ -1785,7 +1785,7 @@ def realize_reduction_for_single_kernel(kernel, program_callables_info, for tag in temp_kernel.iname_tags(sweep_iname)))) elif parallel: return map_scan_local( - expr, rec, program_callables_info, nresults, + expr, rec, callables_table, nresults, arg_dtypes, reduction_dtypes, sweep_iname, scan_param.scan_iname, scan_param.sweep_lower_bound, @@ -1793,7 +1793,7 @@ def realize_reduction_for_single_kernel(kernel, program_callables_info, scan_param.stride) elif sequential: return map_scan_seq( - expr, rec, program_callables_info, nresults, + expr, rec, callables_table, nresults, arg_dtypes, reduction_dtypes, sweep_iname, scan_param.scan_iname, scan_param.sweep_lower_bound, @@ -1814,12 +1814,12 @@ def realize_reduction_for_single_kernel(kernel, program_callables_info, if n_sequential: assert n_local_par == 0 - return map_reduction_seq(expr, rec, program_callables_info, + return map_reduction_seq(expr, rec, callables_table, nresults, arg_dtypes, reduction_dtypes) else: assert n_local_par > 0 return map_reduction_local( - expr, rec, program_callables_info, nresults, arg_dtypes, + expr, rec, callables_table, nresults, arg_dtypes, reduction_dtypes) # }}} @@ -1854,12 +1854,12 @@ def realize_reduction_for_single_kernel(kernel, program_callables_info, from loopy.symbolic import Reduction if isinstance(insn.expression, Reduction) and nresults > 1: new_expressions = cb_mapper(insn.expression, - program_callables_info=program_callables_info, + callables_table=callables_table, nresults=nresults) else: new_expressions = ( cb_mapper(insn.expression, - program_callables_info=program_callables_info),) + callables_table=callables_table),) if generated_insns: # An expansion happened, so insert the generated stuff plus @@ -1952,10 +1952,10 @@ def realize_reduction(program, *args, **kwargs): assert isinstance(program, Program) new_resolved_functions = {} - for func_id, in_knl_callable in program.program_callables_info.items(): + for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): new_subkernel = realize_reduction_for_single_kernel( - in_knl_callable.subkernel, program.program_callables_info, + in_knl_callable.subkernel, program.callables_table, *args, **kwargs) in_knl_callable = in_knl_callable.copy( subkernel=new_subkernel) @@ -1968,9 +1968,9 @@ def realize_reduction(program, *args, **kwargs): new_resolved_functions[func_id] = in_knl_callable - new_program_callables_info = program.program_callables_info.copy( + new_callables_table = program.callables_table.copy( resolved_functions=new_resolved_functions) - return program.copy(program_callables_info=new_program_callables_info) + return program.copy(callables_table=new_callables_table) # }}} @@ -2153,11 +2153,11 @@ class ArgDescrInferenceMapper(RuleAwareIdentityMapper): """ def __init__(self, rule_mapping_context, caller_kernel, - program_callables_info): + callables_table): super(ArgDescrInferenceMapper, self).__init__( rule_mapping_context) self.caller_kernel = caller_kernel - self.program_callables_info = program_callables_info + self.callables_table = callables_table def map_call(self, expr, expn_state, **kwargs): from pymbolic.primitives import Call, CallWithKwargs @@ -2193,12 +2193,12 @@ class ArgDescrInferenceMapper(RuleAwareIdentityMapper): combined_arg_id_to_descr.update(assignee_id_to_descr) # specializing the function according to the parameter description - in_knl_callable = self.program_callables_info[expr.function.name] - new_in_knl_callable, self.program_callables_info = ( + in_knl_callable = self.callables_table[expr.function.name] + new_in_knl_callable, self.callables_table = ( in_knl_callable.with_descrs( - combined_arg_id_to_descr, self.program_callables_info)) - self.program_callables_info, new_func_id = ( - self.program_callables_info.with_callable( + combined_arg_id_to_descr, self.callables_table)) + self.callables_table, new_func_id = ( + self.callables_table.with_callable( expr.function.function, new_in_knl_callable)) @@ -2242,7 +2242,7 @@ class ArgDescrInferenceMapper(RuleAwareIdentityMapper): return kernel.copy(instructions=new_insns) -def traverse_to_infer_arg_descr(kernel, program_callables_info): +def traverse_to_infer_arg_descr(kernel, callables_table): """ Returns a copy of *kernel* with the argument shapes and strides matching for scoped functions in the *kernel*. Refer @@ -2258,12 +2258,12 @@ def traverse_to_infer_arg_descr(kernel, program_callables_info): kernel.substitutions, kernel.get_var_name_generator()) arg_descr_inf_mapper = ArgDescrInferenceMapper(rule_mapping_context, - kernel, program_callables_info) + kernel, callables_table) descr_inferred_kernel = rule_mapping_context.finish_kernel( arg_descr_inf_mapper.map_kernel(kernel)) - return descr_inferred_kernel, arg_descr_inf_mapper.program_callables_info + return descr_inferred_kernel, arg_descr_inf_mapper.callables_table def infer_arg_descr(program): @@ -2272,23 +2272,23 @@ def infer_arg_descr(program): :attr:`loopy.InKernelCallable.arg_id_to_descr` inferred for all the callables. """ - root_kernel_callable = program.program_callables_info[program.name] - old_callables_count = program.program_callables_info.callables_count - program_callables_info = ( - program.program_callables_info.with_edit_callables_mode()) + root_kernel_callable = program.callables_table[program.name] + old_callables_count = program.callables_table.callables_count + callables_table = ( + program.callables_table.with_edit_callables_mode()) root_kernel = program.root_kernel - new_root_kernel, program_callables_info = traverse_to_infer_arg_descr( - root_kernel, program_callables_info) + new_root_kernel, callables_table = traverse_to_infer_arg_descr( + root_kernel, callables_table) new_root_kernel_callable = root_kernel_callable.copy( subkernel=new_root_kernel) - program_callables_info, _ = program_callables_info.with_callable(program.name, + callables_table, _ = callables_table.with_callable(program.name, new_root_kernel_callable) - program_callables_info = program_callables_info.with_exit_edit_callables_mode( + callables_table = callables_table.with_exit_edit_callables_mode( old_callables_count) - return program.copy(program_callables_info=program_callables_info) + return program.copy(callables_table=callables_table) # }}} @@ -2298,7 +2298,7 @@ preprocess_cache = WriteOncePersistentDict( key_builder=LoopyKeyBuilder()) -def preprocess_single_kernel(kernel, program_callables_info, device=None): +def preprocess_single_kernel(kernel, callables_table, device=None): from loopy.kernel import KernelState if kernel.state >= KernelState.PREPROCESSED: return kernel @@ -2356,7 +2356,7 @@ def preprocess_single_kernel(kernel, program_callables_info, device=None): # because it manipulates the depends_on field, which could prevent # defaults from being applied. kernel = realize_reduction_for_single_kernel(kernel, - program_callables_info, unknown_types_ok=False) + callables_table, unknown_types_ok=False) # Ordering restriction: # add_axes_to_temporaries_for_ilp because reduction accumulators @@ -2420,7 +2420,7 @@ def infer_hw_axes_sizes(program): resolved_function_with_hw_axes_sizes_inferred = {} for func_id, in_knl_callable in ( - program.program_callables_info.items()): + program.callables_table.items()): if func_id == program.name: resolved_function_with_hw_axes_sizes_inferred[func_id] = ( in_knl_callable) @@ -2428,11 +2428,11 @@ def infer_hw_axes_sizes(program): resolved_function_with_hw_axes_sizes_inferred[func_id] = ( in_knl_callable.with_hw_axes_sizes(local_size, global_size)) - new_program_callables_info = ( - program.program_callables_info.copy( + new_callables_table = ( + program.callables_table.copy( resolved_functions=resolved_function_with_hw_axes_sizes_inferred)) - return program.copy(program_callables_info=new_program_callables_info) + return program.copy(callables_table=new_callables_table) # }}} @@ -2451,16 +2451,16 @@ def preprocess_program(program, device=None): # Callable editing restrictions: # - # - should not edit program_callables_info in :meth:`preprocess_single_kernel` + # - should not edit callables_table in :meth:`preprocess_single_kernel` # as we are iterating over it.[1] # # [1] https://docs.python.org/3/library/stdtypes.html#dictionary-view-objects new_resolved_functions = {} - for func_id, in_knl_callable in program.program_callables_info.items(): + for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): new_subkernel = preprocess_single_kernel( - in_knl_callable.subkernel, program.program_callables_info, + in_knl_callable.subkernel, program.callables_table, device) in_knl_callable = in_knl_callable.copy( subkernel=new_subkernel) @@ -2472,9 +2472,9 @@ def preprocess_program(program, device=None): new_resolved_functions[func_id] = in_knl_callable - new_program_callables_info = program.program_callables_info.copy( + new_callables_table = program.callables_table.copy( resolved_functions=new_resolved_functions) - program = program.copy(program_callables_info=new_program_callables_info) + program = program.copy(callables_table=new_callables_table) # }}} diff --git a/loopy/program.py b/loopy/program.py index 7479ee04354a253425bda35f5e93f5c47ab50432..f7c399c1e5ef20fbb07ace640cc9d024e28ab72a 100644 --- a/loopy/program.py +++ b/loopy/program.py @@ -47,7 +47,7 @@ __doc__ = """ .. currentmodule:: loopy .. autoclass:: Program -.. autoclass:: ProgramCallablesInfo +.. autoclass:: CallablesTable .. autofunction:: make_program_from_kernel .. autofunction:: iterate_over_kernels_if_given_program @@ -73,11 +73,11 @@ class ResolvedFunctionMarker(RuleAwareIdentityMapper): :arg function_ids: A container with instances of :class:`str` indicating the function identifiers to look for while scoping functions. """ - def __init__(self, rule_mapping_context, kernel, program_callables_info, + def __init__(self, rule_mapping_context, kernel, callables_table, function_id_to_in_knl_callable_mappers): super(ResolvedFunctionMarker, self).__init__(rule_mapping_context) self.kernel = kernel - self.program_callables_info = program_callables_info + self.callables_table = callables_table self.function_id_to_in_knl_callable_mappers = ( function_id_to_in_knl_callable_mappers) @@ -123,8 +123,8 @@ class ResolvedFunctionMarker(RuleAwareIdentityMapper): # associate the newly created ResolvedFunction with the # resolved in-kernel callable - self.program_callables_info, new_func_id = ( - self.program_callables_info.with_added_callable( + self.callables_table, new_func_id = ( + self.callables_table.with_added_callable( expr.function, in_knl_callable)) return type(expr)( ResolvedFunction(new_func_id), @@ -144,8 +144,8 @@ class ResolvedFunctionMarker(RuleAwareIdentityMapper): expr.operation.get_scalar_callables()): in_knl_callable = self.find_in_knl_callable_from_identifier(func_id) assert in_knl_callable is not None - self.program_callables_info, _ = ( - self.program_callables_info.with_added_callable(func_id, + self.callables_table, _ = ( + self.callables_table.with_added_callable(func_id, in_knl_callable)) return super(ResolvedFunctionMarker, self).map_reduction(expr, expn_state) @@ -162,37 +162,37 @@ def _default_func_id_to_kernel_callable_mappers(target): ))) -def initialize_program_callables_info_from_kernel(kernel): +def initialize_callables_table_from_kernel(kernel): """ - Returns an instance of :class:`loopy.ProgramCallablesInfo`, by resolving + Returns an instance of :class:`loopy.CallablesTable`, by resolving the functions based on :mod:`loopy`'s default function resolvers. """ # collect the default function resolvers func_id_to_kernel_callable_mappers = ( _default_func_id_to_kernel_callable_mappers(kernel.target)) - program_callables_info = ProgramCallablesInfo({}) + callables_table = CallablesTable({}) from loopy.symbolic import SubstitutionRuleMappingContext rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) resolved_function_marker = ResolvedFunctionMarker( - rule_mapping_context, kernel, program_callables_info, + rule_mapping_context, kernel, callables_table, func_id_to_kernel_callable_mappers) # mark the functions as "Resolved" in the expression nodes. kernel_with_functions_resolved = rule_mapping_context.finish_kernel( resolved_function_marker.map_kernel(kernel)) - # collect the update program_callables_info - program_callables_info = resolved_function_marker.program_callables_info + # collect the update callables_table + callables_table = resolved_function_marker.callables_table callable_kernel = CallableKernel(kernel_with_functions_resolved) - # add the callable kernel to the program_callables_info - program_callables_info, _ = program_callables_info.with_added_callable( + # add the callable kernel to the callables_table + callables_table, _ = callables_table.with_added_callable( Variable(kernel.name), callable_kernel) - return program_callables_info + return callables_table # {{{ program definition @@ -206,9 +206,9 @@ class Program(ImmutableRecord): An instance of :class:`str`, also the name of the top-most level :class:`loopy.LoopKernel`. - .. attribute:: program_callables_info + .. attribute:: callables_table - An instance of :class:`loopy.program.ProgramCallablesInfo`. + An instance of :class:`loopy.program.CallablesTable`. .. attribute:: target @@ -232,16 +232,16 @@ class Program(ImmutableRecord): """ def __init__(self, name, - program_callables_info, + callables_table, target, func_id_to_in_knl_callable_mappers): - assert isinstance(program_callables_info, ProgramCallablesInfo) + assert isinstance(callables_table, CallablesTable) - assert name in program_callables_info + assert name in callables_table super(Program, self).__init__( name=name, - program_callables_info=program_callables_info, + callables_table=callables_table, target=target, func_id_to_in_knl_callable_mappers=( func_id_to_in_knl_callable_mappers)) @@ -250,7 +250,7 @@ class Program(ImmutableRecord): hash_fields = ( "name", - "program_callables_info", + "callables_table", "target",) update_persistent_hash = LoopKernel.update_persistent_hash @@ -262,7 +262,7 @@ class Program(ImmutableRecord): new_self = super(Program, self).copy(**kwargs) new_resolved_functions = {} for func_id, in_knl_callable in ( - new_self.program_callables_info.items()): + new_self.callables_table.items()): if isinstance(in_knl_callable, CallableKernel): subkernel = in_knl_callable.subkernel new_resolved_functions[func_id] = in_knl_callable.copy( @@ -270,11 +270,11 @@ class Program(ImmutableRecord): else: new_resolved_functions[func_id] = in_knl_callable - program_callables_info = new_self.program_callables_info.copy( + callables_table = new_self.callables_table.copy( resolved_functions=new_resolved_functions) return super(Program, new_self).copy( - program_callables_info=program_callables_info) + callables_table=callables_table) else: return super(Program, self).copy(**kwargs) @@ -285,7 +285,7 @@ class Program(ImmutableRecord): *global_size* and *local_size* are :class:`islpy.PwAff` objects. """ return self.root_kernel.get_grid_size_upper_bounds( - self.program_callables_info, + self.callables_table, ignore_auto=ignore_auto) def get_grid_size_upper_bounds_as_exprs(self, ignore_auto=False): @@ -295,7 +295,7 @@ class Program(ImmutableRecord): *global_size* and *local_size* are :mod:`pymbolic` expressions """ return self.root_kernel.get_grid_size_upper_bounds_as_exprs( - self.program_callables_info, + self.callables_table, ignore_auto=ignore_auto) # {{{ implementation arguments @@ -338,7 +338,7 @@ class Program(ImmutableRecord): Syntactic sugar. """ - return self.program_callables_info[self.name].subkernel + return self.callables_table[self.name].subkernel @property def arg_dict(self): @@ -367,14 +367,14 @@ class Program(ImmutableRecord): Returns a copy of *self* with the topmost level kernel as *root_kernel*. """ - new_in_knl_callable = self.program_callables_info[ + new_in_knl_callable = self.callables_table[ self.name].copy(subkernel=root_kernel) new_resolved_functions = ( - self.program_callables_info.resolved_functions.copy()) + self.callables_table.resolved_functions.copy()) new_resolved_functions[self.name] = new_in_knl_callable return self.copy( - program_callables_info=self.program_callables_info.copy( + callables_table=self.callables_table.copy( resolved_functions=new_resolved_functions)) def __call__(self, *args, **kwargs): @@ -462,14 +462,14 @@ def rename_resolved_functions_in_a_single_kernel(kernel, class CallablesCountingMapper(CombineMapper): """ Returns an instance of :class:`collections.Counter` with the count of - callables registered in *program_callables_info*. + callables registered in *callables_table*. - .. attribute:: program_callables_info + .. attribute:: callables_table - An instance of :class:`loopy.program.ProgramCallablesInfo`. + An instance of :class:`loopy.program.CallablesTable`. """ - def __init__(self, program_callables_info): - self.program_callables_info = program_callables_info + def __init__(self, callables_table): + self.callables_table = callables_table def combine(self, values): return sum(values, Counter()) @@ -483,7 +483,7 @@ class CallablesCountingMapper(CombineMapper): kw_parameters = {} if isinstance(expr.function, (ResolvedFunction)): - in_knl_callable = self.program_callables_info[expr.function.name] + in_knl_callable = self.callables_table[expr.function.name] if isinstance(in_knl_callable, ScalarCallable): return (Counter([expr.function.name]) + self.combine((self.rec(child) for child in expr.parameters @@ -495,7 +495,7 @@ class CallablesCountingMapper(CombineMapper): callables_count_in_subkernel = ( count_callables_in_kernel( in_knl_callable.subkernel, - self.program_callables_info)) + self.callables_table)) return (Counter([expr.function.name]) + self.combine((self.rec(child) for child in expr.parameters @@ -525,16 +525,16 @@ class CallablesCountingMapper(CombineMapper): @memoize_method -def count_callables_in_kernel(kernel, program_callables_info): +def count_callables_in_kernel(kernel, callables_table): """ Returns an instance of :class:`collections.Counter` representing the number of callables in the *kernel* that are registered in - *program_callables_info*. + *callables_table*. """ assert isinstance(kernel, LoopKernel) callables_count = Counter() callables_counting_mapper = CallablesCountingMapper( - program_callables_info) + callables_table) subst_expander = SubstitutionRuleExpander(kernel.substitutions) for insn in kernel.instructions: @@ -555,7 +555,7 @@ def count_callables_in_kernel(kernel, program_callables_info): # {{{ program callables info -class ProgramCallablesInfo(ImmutableRecord): +class CallablesTable(ImmutableRecord): # FIXME: is CallablesTable a better name?(similar to symbol table in # compilers.) """ @@ -594,7 +594,7 @@ class ProgramCallablesInfo(ImmutableRecord): history = dict((func_id, frozenset([func_id])) for func_id in resolved_functions) - super(ProgramCallablesInfo, self).__init__( + super(CallablesTable, self).__init__( resolved_functions=resolved_functions, history=history, is_being_edited=is_being_edited) @@ -618,7 +618,7 @@ class ProgramCallablesInfo(ImmutableRecord): def callables_count(self): """ Returns an instance of :class:`collection.Counter` representing the number - of times the callables is called in program_callables_info. + of times the callables is called in callables_table. """ # should raise an error if there are more than one root kernels(which is # illegal) @@ -648,24 +648,24 @@ class ProgramCallablesInfo(ImmutableRecord): .. note:: - Always checks whether the - :attr:``loopy.ProgramCallablesInfo.resolved_functions` has + :attr:``loopy.CallablesTable.resolved_functions` has *in_kernel_callable*, does not introduce copies. - The difference between - :meth:`loopy.ProgramCallablesInfo.with_added_callable` - and :meth:`ProgramCallablesInfo.with_callable` being that + :meth:`loopy.CallablesTable.with_added_callable` + and :meth:`CallablesTable.with_callable` being that the former has no support for renaming the callable back i.e. ``with_callable`` supports renaming from ``sin_0`` to ``sin``, if possible, through the member method - ``loopy.ProgramCallablesInfo.with_exit_edit_callables_mode`` + ``loopy.CallablesTable.with_exit_edit_callables_mode`` This subtle difference makes -- - - :meth:`loopy.ProgramCallablesInfo.with_added_callable` suitable + - :meth:`loopy.CallablesTable.with_added_callable` suitable for usage while resolving the functions first time, where no renaming is needed. - - :meth:`loopy.ProgramCallablesInfo.with_callable` suitable for + - :meth:`loopy.CallablesTable.with_callable` suitable for implementing edits in callables during inference-walks. """ @@ -745,7 +745,7 @@ class ProgramCallablesInfo(ImmutableRecord): def with_callable(self, function, in_kernel_callable): """ Returns an instance of :class:`tuple` ``(new_self, new_function)``. - Also refer -- :meth:`loopy.ProgramCallablesInfo.with_added_callable` + Also refer -- :meth:`loopy.CallablesTable.with_added_callable` :arg function: An instance of :class:`pymbolic.primitives.Variable` or @@ -929,12 +929,12 @@ def make_program_from_kernel(kernel): """ # get the program callables info - program_callables_info = initialize_program_callables_info_from_kernel(kernel) + callables_table = initialize_callables_table_from_kernel(kernel) # get the program from program callables info program = Program( name=kernel.name, - program_callables_info=program_callables_info, + callables_table=callables_table, func_id_to_in_knl_callable_mappers=( _default_func_id_to_kernel_callable_mappers(kernel.target)), target=kernel.target) @@ -953,7 +953,7 @@ def iterate_over_kernels_if_given_program(transform_for_single_kernel): if isinstance(program_or_kernel, Program): program = program_or_kernel new_resolved_functions = {} - for func_id, in_knl_callable in program.program_callables_info.items(): + for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): new_subkernel = transform_for_single_kernel( in_knl_callable.subkernel, *args, **kwargs) @@ -968,9 +968,9 @@ def iterate_over_kernels_if_given_program(transform_for_single_kernel): new_resolved_functions[func_id] = in_knl_callable - new_program_callables_info = program.program_callables_info.copy( + new_callables_table = program.callables_table.copy( resolved_functions=new_resolved_functions) - return program.copy(program_callables_info=new_program_callables_info) + return program.copy(callables_table=new_callables_table) else: assert isinstance(program_or_kernel, LoopKernel) kernel = program_or_kernel diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 201bcc2562754a30d20d3903ecf01d0d8984b11e..2b3f7a3b986b62aae325a2bb1e75859411be2350 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1832,7 +1832,7 @@ class MinRecursionLimitForScheduling(MinRecursionLimit): # {{{ main scheduling entrypoint -def generate_loop_schedules(kernel, program_callables_info, debug_args={}): +def generate_loop_schedules(kernel, callables_table, debug_args={}): """ .. warning:: @@ -1846,18 +1846,18 @@ def generate_loop_schedules(kernel, program_callables_info, debug_args={}): with MinRecursionLimitForScheduling(kernel): for sched in generate_loop_schedules_inner(kernel, - program_callables_info, debug_args=debug_args): + callables_table, debug_args=debug_args): yield sched -def generate_loop_schedules_inner(kernel, program_callables_info, debug_args={}): +def generate_loop_schedules_inner(kernel, callables_table, debug_args={}): from loopy.kernel import KernelState if kernel.state not in (KernelState.PREPROCESSED, KernelState.SCHEDULED): raise LoopyError("cannot schedule a kernel that has not been " "preprocessed") from loopy.check import pre_schedule_checks - pre_schedule_checks(kernel, program_callables_info) + pre_schedule_checks(kernel, callables_table) schedule_count = 0 @@ -1971,7 +1971,7 @@ def generate_loop_schedules_inner(kernel, program_callables_info, debug_args={}) kernel, gen_sched) gsize, lsize = ( - kernel.get_grid_size_upper_bounds(program_callables_info)) + kernel.get_grid_size_upper_bounds(callables_table)) if (gsize or lsize): if not kernel.options.disable_global_barriers: @@ -2028,7 +2028,7 @@ schedule_cache = WriteOncePersistentDict( key_builder=LoopyKeyBuilder()) -def _get_one_scheduled_kernel_inner(kernel, program_callables_info): +def _get_one_scheduled_kernel_inner(kernel, callables_table): # This helper function exists to ensure that the generator chain is fully # out of scope after the function returns. This allows it to be # garbage-collected in the exit handler of the @@ -2038,10 +2038,10 @@ def _get_one_scheduled_kernel_inner(kernel, program_callables_info): # # See https://gitlab.tiker.net/inducer/sumpy/issues/31 for context. - return next(iter(generate_loop_schedules(kernel, program_callables_info))) + return next(iter(generate_loop_schedules(kernel, callables_table))) -def get_one_scheduled_kernel(kernel, program_callables_info): +def get_one_scheduled_kernel(kernel, callables_table): from loopy import CACHING_ENABLED sched_cache_key = kernel @@ -2060,7 +2060,7 @@ def get_one_scheduled_kernel(kernel, program_callables_info): with ProcessLogger(logger, "%s: schedule" % kernel.name): with MinRecursionLimitForScheduling(kernel): result = _get_one_scheduled_kernel_inner(kernel, - program_callables_info) + callables_table) if CACHING_ENABLED and not from_cache: schedule_cache.store_if_not_present(sched_cache_key, result) diff --git a/loopy/statistics.py b/loopy/statistics.py index 5dddd49e0549f3648140b4b0374900a3d8ff0e93..d65387d16fe820455c4ab04e4e2f4e2152f62ab8 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -648,11 +648,11 @@ class MemAccess(Record): # {{{ counter base class CounterBase(CombineMapper): - def __init__(self, knl, program_callables_info): + def __init__(self, knl, callables_table): self.knl = knl - self.program_callables_info = program_callables_info + self.callables_table = callables_table from loopy.type_inference import TypeInferenceMapper - self.type_inf = TypeInferenceMapper(knl, program_callables_info) + self.type_inf = TypeInferenceMapper(knl, callables_table) def combine(self, values): return sum(values) @@ -707,11 +707,11 @@ class CounterBase(CombineMapper): # {{{ ExpressionOpCounter class ExpressionOpCounter(CounterBase): - def __init__(self, knl, program_callables_info): + def __init__(self, knl, callables_table): self.knl = knl - self.program_callables_info = program_callables_info + self.callables_table = callables_table from loopy.type_inference import TypeInferenceMapper - self.type_inf = TypeInferenceMapper(knl, program_callables_info) + self.type_inf = TypeInferenceMapper(knl, callables_table) def combine(self, values): return sum(values) @@ -725,7 +725,7 @@ class ExpressionOpCounter(CounterBase): def map_call(self, expr): from loopy.symbolic import ResolvedFunction if isinstance(expr.function, ResolvedFunction): - function_identifier = self.program_callables_info[ + function_identifier = self.callables_table[ expr.function.name].name else: function_identifier = expr.function.name @@ -1111,7 +1111,7 @@ def count(kernel, set, space=None): from loopy.program import Program if isinstance(kernel, Program): if len([in_knl_callable for in_knl_callable in - kernel.program_callables_info.values() if isinstance(in_knl_callable, + kernel.callables_table.values() if isinstance(in_knl_callable, CallableKernel)]) != 1: raise NotImplementedError("Currently only supported for program with " "only one CallableKernel.") @@ -1216,10 +1216,10 @@ def count(kernel, set, space=None): return add_assumptions_guard(kernel, count) -def get_unused_hw_axes_factor(knl, program_callables_info, insn, +def get_unused_hw_axes_factor(knl, callables_table, insn, disregard_local_axes, space=None): # FIXME: Multi-kernel support - gsize, lsize = knl.get_grid_size_upper_bounds(program_callables_info) + gsize, lsize = knl.get_grid_size_upper_bounds(callables_table) g_used = set() l_used = set() @@ -1257,7 +1257,7 @@ def get_unused_hw_axes_factor(knl, program_callables_info, insn, return add_assumptions_guard(knl, result) -def count_insn_runs(knl, program_callables_info, insn, count_redundant_work, +def count_insn_runs(knl, callables_table, insn, count_redundant_work, disregard_local_axes=False): insn_inames = knl.insn_inames(insn) @@ -1278,7 +1278,7 @@ def count_insn_runs(knl, program_callables_info, insn, count_redundant_work, c = count(knl, domain, space=space) if count_redundant_work: - unused_fac = get_unused_hw_axes_factor(knl, program_callables_info, + unused_fac = get_unused_hw_axes_factor(knl, callables_table, insn, disregard_local_axes=disregard_local_axes, space=space) return c * unused_fac else: @@ -1286,7 +1286,7 @@ def count_insn_runs(knl, program_callables_info, insn, count_redundant_work, @memoize_method -def _get_insn_count(knl, program_callables_info, insn_id, subgroup_size, +def _get_insn_count(knl, callables_table, insn_id, subgroup_size, count_redundant_work, count_granularity=CountGranularity.WORKITEM): insn = knl.id_to_insn[insn_id] @@ -1299,12 +1299,12 @@ def _get_insn_count(knl, program_callables_info, insn_id, subgroup_size, if count_granularity == CountGranularity.WORKITEM: return count_insn_runs( - knl, program_callables_info, insn, + knl, callables_table, insn, count_redundant_work=count_redundant_work, disregard_local_axes=False) ct_disregard_local = count_insn_runs( - knl, program_callables_info, insn, disregard_local_axes=True, + knl, callables_table, insn, disregard_local_axes=True, count_redundant_work=count_redundant_work) if count_granularity == CountGranularity.WORKGROUP: @@ -1312,7 +1312,7 @@ def _get_insn_count(knl, program_callables_info, insn_id, subgroup_size, elif count_granularity == CountGranularity.SUBGROUP: # get the group size from loopy.symbolic import aff_to_expr - _, local_size = knl.get_grid_size_upper_bounds(program_callables_info) + _, local_size = knl.get_grid_size_upper_bounds(callables_table) workgroup_size = 1 if local_size: for size in local_size: @@ -1344,7 +1344,7 @@ def _get_insn_count(knl, program_callables_info, insn_id, subgroup_size, # {{{ get_op_map -def get_op_map_for_single_kernel(knl, program_callables_info, +def get_op_map_for_single_kernel(knl, callables_table, numpy_types=True, count_redundant_work=False, subgroup_size=None): @@ -1355,7 +1355,7 @@ def get_op_map_for_single_kernel(knl, program_callables_info, subgroup_size = _process_subgroup_size(knl, subgroup_size) op_map = ToCountMap() - op_counter = ExpressionOpCounter(knl, program_callables_info) + op_counter = ExpressionOpCounter(knl, callables_table) from loopy.kernel.instruction import ( CallInstruction, CInstruction, Assignment, @@ -1368,7 +1368,7 @@ def get_op_map_for_single_kernel(knl, program_callables_info, op_map = ( op_map + ToCountMap({key: val}) - * _get_insn_count(knl, program_callables_info, insn.id, + * _get_insn_count(knl, callables_table, insn.id, subgroup_size, count_redundant_work, key.count_granularity)) @@ -1458,13 +1458,13 @@ def get_op_map(program, numpy_types=True, count_redundant_work=False, op_map = ToCountMap() callables_count = ( - program.program_callables_info.callables_count) + program.callables_table.callables_count) - for func_id, in_knl_callable in program.program_callables_info.items(): + for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): knl = in_knl_callable.subkernel knl_op_map = get_op_map_for_single_kernel(knl, - program.program_callables_info, numpy_types, + program.callables_table, numpy_types, count_redundant_work, subgroup_size) for i in range(callables_count[func_id]): @@ -1535,7 +1535,7 @@ def _process_subgroup_size(knl, subgroup_size_requested): # {{{ get_mem_access_map -def get_mem_access_map_for_single_kernel(knl, program_callables_info, +def get_mem_access_map_for_single_kernel(knl, callables_table, numpy_types=True, count_redundant_work=False, subgroup_size=None): if not knl.options.ignore_boostable_into: @@ -1545,8 +1545,8 @@ def get_mem_access_map_for_single_kernel(knl, program_callables_info, subgroup_size = _process_subgroup_size(knl, subgroup_size) access_map = ToCountMap() - access_counter_g = GlobalMemAccessCounter(knl, program_callables_info) - access_counter_l = LocalMemAccessCounter(knl, program_callables_info) + access_counter_g = GlobalMemAccessCounter(knl, callables_table) + access_counter_l = LocalMemAccessCounter(knl, callables_table) from loopy.kernel.instruction import ( CallInstruction, CInstruction, Assignment, @@ -1569,7 +1569,7 @@ def get_mem_access_map_for_single_kernel(knl, program_callables_info, access_map = ( access_map + ToCountMap({key: val}) - * _get_insn_count(knl, program_callables_info, insn.id, + * _get_insn_count(knl, callables_table, insn.id, subgroup_size, count_redundant_work, key.count_granularity)) @@ -1578,7 +1578,7 @@ def get_mem_access_map_for_single_kernel(knl, program_callables_info, access_map = ( access_map + ToCountMap({key: val}) - * _get_insn_count(knl, program_callables_info, insn.id, + * _get_insn_count(knl, callables_table, insn.id, subgroup_size, count_redundant_work, key.count_granularity)) @@ -1700,13 +1700,13 @@ def get_mem_access_map(program, numpy_types=True, count_redundant_work=False, access_map = ToCountMap() - callables_count = program.program_callables_info.callables_count + callables_count = program.callables_table.callables_count - for func_id, in_knl_callable in program.program_callables_info.items(): + for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): knl = in_knl_callable.subkernel knl_access_map = get_mem_access_map_for_single_kernel(knl, - program.program_callables_info, numpy_types, + program.callables_table, numpy_types, count_redundant_work, subgroup_size) # FIXME: didn't see any easy way to multiply @@ -1726,7 +1726,7 @@ def get_mem_access_map(program, numpy_types=True, count_redundant_work=False, # {{{ get_synchronization_map -def get_synchronization_map_for_single_kernel(knl, program_callables_info, +def get_synchronization_map_for_single_kernel(knl, callables_table, subgroup_size=None): """Count the number of synchronization events each work-item encounters in @@ -1772,7 +1772,7 @@ def get_synchronization_map_for_single_kernel(knl, program_callables_info, from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, CallKernel, ReturnFromKernel, RunInstruction) from operator import mul - knl = lp.get_one_scheduled_kernel(knl, program_callables_info) + knl = lp.get_one_scheduled_kernel(knl, callables_table) iname_list = [] result = ToCountMap() @@ -1824,13 +1824,13 @@ def get_synchronization_map(program, subgroup_size=None): program = preprocess_program(program) sync_map = ToCountMap() - callables_count = program.program_callables_info.callables_count + callables_count = program.callables_table.callables_count - for func_id, in_knl_callable in program.program_callables_info.items(): + for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): knl = in_knl_callable.subkernel knl_sync_map = get_synchronization_map_for_single_kernel(knl, - program.program_callables_info, subgroup_size) + program.callables_table, subgroup_size) # FIXME: didn't see any easy way to multiply for i in range(callables_count[func_id]): @@ -1887,7 +1887,7 @@ def gather_access_footprints_for_single_kernel(kernel, ignore_uncountable=False) def gather_access_footprints(program, ignore_uncountable=False): # FIMXE: works only for one callable kernel till now. if len([in_knl_callable for in_knl_callable in - program.program_callables_info.values() if isinstance(in_knl_callable, + program.callables_table.values() if isinstance(in_knl_callable, CallableKernel)]) != 1: raise NotImplementedError("Currently only supported for program with " "only one CallableKernel.") @@ -1900,9 +1900,9 @@ def gather_access_footprints(program, ignore_uncountable=False): write_footprints = [] read_footprints = [] - callables_count = program.program_callables_info.callables_count + callables_count = program.callables_table.callables_count - for func_id, in_knl_callable in program.program_callables_info.items(): + for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): knl = in_knl_callable.subkernel knl_write_footprints, knl_read_footprints = ( diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 92ee2dc51bd07f46e4113208d67b329ed82798cd..f27ee4e96f11f686250bddf57ec87422c717373e 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -80,7 +80,7 @@ class TargetBase(object): def preprocess(self, kernel): return kernel - def pre_codegen_check(self, kernel, program_callables_info): + def pre_codegen_check(self, kernel, callables_table): pass # }}} diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 418ce0256babf90724de413ae9e5102dc80e8bd5..9b5aaf8e9ab5ca4d4619b01e1d588ca2c093948d 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -362,7 +362,7 @@ class CMathCallable(ScalarCallable): C-Target. """ - def with_types(self, arg_id_to_dtype, caller_kernel, program_callables_info): + def with_types(self, arg_id_to_dtype, caller_kernel, callables_table): name = self.name if name in ["abs", "min", "max"]: @@ -381,7 +381,7 @@ class CMathCallable(ScalarCallable): # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) dtype = arg_id_to_dtype[0] dtype = dtype.numpy_dtype @@ -409,7 +409,7 @@ class CMathCallable(ScalarCallable): self.copy(name_in_target=name, arg_id_to_dtype={0: NumpyType(dtype), -1: NumpyType(dtype)}), - program_callables_info) + callables_table) # binary functions if name in ["fmax", "fmin"]: @@ -424,7 +424,7 @@ class CMathCallable(ScalarCallable): # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) dtype = np.find_common_type( [], [dtype.numpy_dtype for id, dtype in arg_id_to_dtype.items() @@ -449,11 +449,11 @@ class CMathCallable(ScalarCallable): return ( self.copy(name_in_target=name, arg_id_to_dtype={-1: dtype, 0: dtype, 1: dtype}), - program_callables_info) + callables_table) return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) def scope_c_math_functions(target, identifier): @@ -893,7 +893,7 @@ class CASTBuilder(ASTBuilderBase): ecm = codegen_state.expression_to_code_mapper func_id = insn.expression.function.name - in_knl_callable = codegen_state.program_callables_info[func_id] + in_knl_callable = codegen_state.callables_table[func_id] if isinstance(in_knl_callable, ScalarCallable) and ( in_knl_callable.name_in_target == 'loopy_make_tuple'): diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 65a8c2028d6ec4d58e9c73bdb6d7bf486b90cf32..289877d9ad69d1302de9e3da91643b8a7d614c35 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -55,7 +55,7 @@ class ExpressionToCExpressionMapper(IdentityMapper): if type_inf_mapper is None: type_inf_mapper = TypeInferenceMapper(self.kernel, - self.codegen_state.program_callables_info) + self.codegen_state.callables_table) self.type_inf_mapper = type_inf_mapper self.allow_complex = codegen_state.allow_complex @@ -389,7 +389,7 @@ class ExpressionToCExpressionMapper(IdentityMapper): # {{{ implement indexof, indexof_vec identifier_name = ( - self.codegen_state.program_callables_info[expr.function.name].name) + self.codegen_state.callables_table[expr.function.name].name) if identifier_name in ["indexof", "indexof_vec"]: if len(expr.parameters) != 1: raise LoopyError("%s takes exactly one argument" % identifier_name) @@ -432,11 +432,11 @@ class ExpressionToCExpressionMapper(IdentityMapper): # }}} from loopy.kernel.function_interface import ManglerCallable - if isinstance(self.codegen_state.program_callables_info[expr.function.name], + if isinstance(self.codegen_state.callables_table[expr.function.name], ManglerCallable): from loopy.codegen import SeenFunction in_knl_callable = ( - self.codegen_state.program_callables_info[ + self.codegen_state.callables_table[ expr.function.name]) mangle_result = in_knl_callable.mangle_result(self.kernel) self.codegen_state.seen_functions.add( @@ -445,7 +445,7 @@ class ExpressionToCExpressionMapper(IdentityMapper): mangle_result.arg_dtypes)) return ( - self.codegen_state.program_callables_info[ + self.codegen_state.callables_table[ expr.function.name].emit_call( expression_to_code_mapper=self, expression=expr, diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index e6abf73fd0a4b721f7f80fc17b4677283e7c8e7a..32b810eb33170177798ced4bec910a214b95152e 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -123,7 +123,7 @@ _CUDA_SPECIFIC_FUNCTIONS = { class CudaCallable(ScalarCallable): def cuda_with_types(self, arg_id_to_dtype, caller_kernel, - program_callables_info): + callables_table): name = self.name @@ -138,7 +138,7 @@ class CudaCallable(ScalarCallable): # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) dtype = arg_id_to_dtype[0] scalar_dtype, offset, field_name = dtype.numpy_dtype.fields["x"] @@ -146,7 +146,7 @@ class CudaCallable(ScalarCallable): self.copy(name_in_target=name, arg_id_to_dtype={-1: NumpyType(scalar_dtype), 0: dtype, 1: dtype}), - program_callables_info) + callables_table) if name in _CUDA_SPECIFIC_FUNCTIONS: num_args = _CUDA_SPECIFIC_FUNCTIONS[name] @@ -161,7 +161,7 @@ class CudaCallable(ScalarCallable): # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) dtype = np.find_common_type( [], [dtype.numpy_dtype for id, dtype in @@ -177,11 +177,11 @@ class CudaCallable(ScalarCallable): return ( self.copy(name_in_target=name, arg_id_to_dtype=updated_arg_id_to_dtype), - program_callables_info) + callables_table) return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) def scope_cuda_functions(target, identifier): @@ -303,7 +303,7 @@ class CUDACASTBuilder(CASTBuilder): codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( codegen_state.kernel.schedule, schedule_index), - codegen_state.program_callables_info) + codegen_state.callables_table) from loopy.symbolic import get_dependencies if not get_dependencies(local_grid_size): diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 43963ddb2f47fb80475b7060b7929cf21d34142e..c067bc4b901ef3922236ac3dc1a8245aaf28c8aa 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -763,7 +763,7 @@ class KernelExecutorBase(object): from loopy.schedule import get_one_scheduled_kernel program = program.with_root_kernel( get_one_scheduled_kernel(program.root_kernel, - program.program_callables_info)) + program.callables_table)) return program diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index f8c42ad69f6767930999e68d2f969952ef6e740b..94a81a65a15bcc0da7530b51d5943d4b94bf74ac 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -172,9 +172,9 @@ class ISPCTarget(CTarget): host_program_name_suffix = "" device_program_name_suffix = "_inner" - def pre_codegen_check(self, kernel, program_callables_info): + def pre_codegen_check(self, kernel, callables_table): gsize, lsize = kernel.get_grid_size_upper_bounds_as_exprs( - program_callables_info) + callables_table) if len(lsize) > 1: for i, ls_i in enumerate(lsize[1:]): if ls_i != 1: diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index d8c195de296ac595224d6be96507e49534ac28c2..ea29665ac2702acb41579fd1fe2fe8edf14ea21a 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -172,7 +172,7 @@ class OpenCLCallable(ScalarCallable): :class:`loopy.target.c.CMathCallable`. """ - def with_types(self, arg_id_to_dtype, caller_kernel, program_callables_info): + def with_types(self, arg_id_to_dtype, caller_kernel, callables_table): name = self.name if name in ["max", "min"]: @@ -182,7 +182,7 @@ class OpenCLCallable(ScalarCallable): if 0 not in arg_id_to_dtype or 1 not in arg_id_to_dtype: return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) dtype = np.find_common_type( [], [dtype.numpy_dtype for id, dtype in arg_id_to_dtype.items() @@ -195,7 +195,7 @@ class OpenCLCallable(ScalarCallable): return ( self.copy(name_in_target=name, arg_id_to_dtype={-1: dtype, 0: dtype, 1: dtype}), - program_callables_info) + callables_table) else: # Unsupported type. raise LoopyError("%s function not supported for the types %s" % @@ -212,14 +212,14 @@ class OpenCLCallable(ScalarCallable): # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) dtype = arg_id_to_dtype[0] scalar_dtype, offset, field_name = dtype.numpy_dtype.fields["s0"] return ( self.copy(name_in_target=name, arg_id_to_dtype={-1: NumpyType(scalar_dtype), 0: dtype, 1: dtype}), - program_callables_info) + callables_table) if name in _CL_SIMPLE_MULTI_ARG_FUNCTIONS: num_args = _CL_SIMPLE_MULTI_ARG_FUNCTIONS[name] @@ -234,7 +234,7 @@ class OpenCLCallable(ScalarCallable): # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) dtype = np.find_common_type( [], [dtype.numpy_dtype for id, dtype in @@ -250,7 +250,7 @@ class OpenCLCallable(ScalarCallable): return ( self.copy(name_in_target=name, arg_id_to_dtype=updated_arg_id_to_dtype), - program_callables_info) + callables_table) if name in VECTOR_LITERAL_FUNCS: base_tp_name, dtype, count = VECTOR_LITERAL_FUNCS[name] @@ -266,7 +266,7 @@ class OpenCLCallable(ScalarCallable): # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) updated_arg_id_to_dtype = dict((id, NumpyType(dtype)) for id in range(count)) @@ -276,13 +276,13 @@ class OpenCLCallable(ScalarCallable): return ( self.copy(name_in_target="(%s%d) " % (base_tp_name, count), arg_id_to_dtype=updated_arg_id_to_dtype), - program_callables_info) + callables_table) # does not satisfy any of the conditions needed for specialization. # hence just returning a copy of the callable. return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) def scope_opencl_functions(target, identifier): @@ -479,7 +479,7 @@ class OpenCLCASTBuilder(CASTBuilder): _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( codegen_state.kernel.schedule, schedule_index), - codegen_state.program_callables_info) + codegen_state.callables_table) from loopy.symbolic import get_dependencies if not get_dependencies(local_sizes): diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 435a5e791f821392aea51a3d750a6c00d3586fc1..d98b6cdd618fa17fe30a66232a7eb76262bd2d39 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -134,7 +134,7 @@ def adjust_local_temp_var_storage(kernel, device): # {{{ check sizes against device properties -def check_sizes(kernel, program_callables_info, device): +def check_sizes(kernel, callables_table, device): import loopy as lp from loopy.diagnostic import LoopyAdvisory, LoopyError @@ -152,7 +152,7 @@ def check_sizes(kernel, program_callables_info, device): parameters[arg.name] = arg.approximately glens, llens = ( - kernel.get_grid_size_upper_bounds_as_exprs(program_callables_info)) + kernel.get_grid_size_upper_bounds_as_exprs(callables_table)) if (max(len(glens), len(llens)) > device.max_work_item_dimensions): @@ -207,7 +207,7 @@ class PyOpenCLCallable(ScalarCallable): Records information about the callables which are not covered by :class:`loopy.target.opencl.OpenCLCallable` """ - def with_types(self, arg_id_to_dtype, caller_kernel, program_callables_info): + def with_types(self, arg_id_to_dtype, caller_kernel, callables_table): name = self.name @@ -221,7 +221,7 @@ class PyOpenCLCallable(ScalarCallable): # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) dtype = arg_id_to_dtype[0] @@ -238,7 +238,7 @@ class PyOpenCLCallable(ScalarCallable): self.copy(name_in_target="%s_%s" % (tpname, name), arg_id_to_dtype={0: dtype, -1: NumpyType( np.dtype(dtype.numpy_dtype.type(0).real))}), - program_callables_info) + callables_table) if name in ["sqrt", "exp", "log", "sin", "cos", "tan", @@ -256,7 +256,7 @@ class PyOpenCLCallable(ScalarCallable): return ( self.copy(name_in_target="%s_%s" % (tpname, name), arg_id_to_dtype={0: dtype, -1: dtype}), - program_callables_info) + callables_table) else: # function calls for floating parameters. numpy_dtype = dtype.numpy_dtype @@ -267,11 +267,11 @@ class PyOpenCLCallable(ScalarCallable): return ( self.copy(name_in_target=name, arg_id_to_dtype={0: dtype, -1: dtype}), - program_callables_info) + callables_table) return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) def pyopencl_function_id_to_in_knl_callable_mapper(target, identifier): @@ -397,8 +397,8 @@ class PyOpenCLTarget(OpenCLTarget): kernel = adjust_local_temp_var_storage(kernel, self.device) return kernel - def pre_codegen_check(self, kernel, program_callables_info): - check_sizes(kernel, program_callables_info, self.device) + def pre_codegen_check(self, kernel, callables_table): + check_sizes(kernel, callables_table, self.device) def get_host_ast_builder(self): return PyOpenCLPythonASTBuilder(self) diff --git a/loopy/target/python.py b/loopy/target/python.py index 2e6712ec1425004f599c00f09f83f0c58111bab3..1f83112ff8fd9f32f2e48f3c76a3de0abaad92fd 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -45,7 +45,7 @@ class ExpressionToPythonMapper(StringifyMapper): if type_inf_mapper is None: type_inf_mapper = TypeInferenceMapper(self.kernel, - self.codegen_state.program_callables_info) + self.codegen_state.callables_table) self.type_inf_mapper = type_inf_mapper def handle_unsupported_expression(self, victim, enclosing_prec): @@ -85,7 +85,7 @@ class ExpressionToPythonMapper(StringifyMapper): def map_call(self, expr, enclosing_prec): from pymbolic.mapper.stringifier import PREC_NONE - identifier_name = self.codegen_state.program_callables_info[ + identifier_name = self.codegen_state.callables_table[ expr.function.name].name if identifier_name in ["indexof", "indexof_vec"]: @@ -93,7 +93,7 @@ class ExpressionToPythonMapper(StringifyMapper): "indexof, indexof_vec not yet supported in Python") from loopy.kernel.function_interface import ManglerCallable - in_knl_callable = self.codegen_state.program_callables_info[ + in_knl_callable = self.codegen_state.callables_table[ expr.function.name] if isinstance(in_knl_callable, ManglerCallable): from loopy.codegen import SeenFunction diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py index 57c4397f998a74222bc482127076dee809de2bac..2519b6a14765400c9aaba249c1010d07d820355e 100644 --- a/loopy/transform/buffer.py +++ b/loopy/transform/buffer.py @@ -133,7 +133,7 @@ buffer_array_cache = WriteOncePersistentDict( # Adding an argument? also add something to the cache_key below. -def buffer_array_for_single_kernel(kernel, program_callables_info, var_name, +def buffer_array_for_single_kernel(kernel, callables_table, var_name, buffer_inames, init_expression=None, store_expression=None, within=None, default_tag="l.auto", temporary_scope=None, temporary_is_local=None, fetch_bounding_box=False): @@ -534,7 +534,7 @@ def buffer_array_for_single_kernel(kernel, program_callables_info, var_name, kernel = tag_inames(kernel, new_iname_to_tag) from loopy.kernel.tools import assign_automatic_axes - kernel = assign_automatic_axes(kernel, program_callables_info) + kernel = assign_automatic_axes(kernel, callables_table) if CACHING_ENABLED: from loopy.preprocess import prepare_for_caching @@ -548,10 +548,10 @@ def buffer_array(program, *args, **kwargs): assert isinstance(program, Program) new_resolved_functions = {} - for func_id, in_knl_callable in program.program_callables_info.items(): + for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): new_subkernel = buffer_array_for_single_kernel( - in_knl_callable.subkernel, program.program_callables_info, + in_knl_callable.subkernel, program.callables_table, *args, **kwargs) in_knl_callable = in_knl_callable.copy( subkernel=new_subkernel) @@ -564,8 +564,8 @@ def buffer_array(program, *args, **kwargs): new_resolved_functions[func_id] = in_knl_callable - new_program_callables_info = program.program_callables_info.copy( + new_callables_table = program.callables_table.copy( resolved_functions=new_resolved_functions) - return program.copy(program_callables_info=new_program_callables_info) + return program.copy(callables_table=new_callables_table) # vim: foldmethod=marker diff --git a/loopy/transform/callable.py b/loopy/transform/callable.py index 90f530953c76581d7f35cd5bf5fed9bb6789f5e9..0013de1d5c855ed90488a39846712fb3808869a9 100644 --- a/loopy/transform/callable.py +++ b/loopy/transform/callable.py @@ -46,11 +46,11 @@ def _resolved_callables_from_function_lookup(program, ``(target, identifier)`` that returns either an instance of :class:`loopy.InKernelCallable` or *None*. """ - program_callables_info = program.program_callables_info + callables_table = program.callables_table callable_knls = dict( (func_id, in_knl_callable) for func_id, in_knl_callable in - program_callables_info.items() if isinstance(in_knl_callable, + callables_table.items() if isinstance(in_knl_callable, CallableKernel)) edited_callable_knls = {} @@ -62,28 +62,28 @@ def _resolved_callables_from_function_lookup(program, kernel.substitutions, kernel.get_var_name_generator()) resolved_function_marker = ResolvedFunctionMarker( - rule_mapping_context, kernel, program_callables_info, + rule_mapping_context, kernel, callables_table, [func_id_to_in_kernel_callable_mapper]) new_subkernel = rule_mapping_context.finish_kernel( resolved_function_marker.map_kernel(kernel)) - program_callables_info = resolved_function_marker.program_callables_info + callables_table = resolved_function_marker.callables_table edited_callable_knls[func_id] = in_knl_callable.copy( subkernel=new_subkernel) new_resolved_functions = {} - for func_id, in_knl_callable in program_callables_info.items(): + for func_id, in_knl_callable in callables_table.items(): if func_id in edited_callable_knls: new_resolved_functions[func_id] = edited_callable_knls[func_id] else: new_resolved_functions[func_id] = in_knl_callable - program_callables_info = program_callables_info.copy( + callables_table = callables_table.copy( resolved_functions=new_resolved_functions) - return program.copy(program_callables_info=program_callables_info) + return program.copy(callables_table=callables_table) def register_function_id_to_in_knl_callable_mapper(program, diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 5f4f2f2a77b927e4a4352077ed94492249ef75a0..888bedc1de4ba1993d505a2c660390d41e286a99 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -143,7 +143,7 @@ class _not_provided: # noqa: N801 pass -def add_prefetch_for_single_kernel(kernel, program_callables_info, var_name, +def add_prefetch_for_single_kernel(kernel, callables_table, var_name, sweep_inames=[], dim_arg_names=None, # "None" is a valid value here, distinct from the default. @@ -334,7 +334,7 @@ def add_prefetch_for_single_kernel(kernel, program_callables_info, var_name, # warning message. from loopy.transform.precompute import precompute_for_single_kernel - new_kernel = precompute_for_single_kernel(kernel, program_callables_info, + new_kernel = precompute_for_single_kernel(kernel, callables_table, subst_use, sweep_inames, precompute_inames=dim_arg_names, default_tag=default_tag, dtype=arg.dtype, fetch_bounding_box=fetch_bounding_box, @@ -373,10 +373,10 @@ def add_prefetch(program, *args, **kwargs): assert isinstance(program, Program) new_resolved_functions = {} - for func_id, in_knl_callable in program.program_callables_info.items(): + for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): new_subkernel = add_prefetch_for_single_kernel( - in_knl_callable.subkernel, program.program_callables_info, + in_knl_callable.subkernel, program.callables_table, *args, **kwargs) in_knl_callable = in_knl_callable.copy( subkernel=new_subkernel) @@ -389,9 +389,9 @@ def add_prefetch(program, *args, **kwargs): new_resolved_functions[func_id] = in_knl_callable - new_program_callables_info = program.program_callables_info.copy( + new_callables_table = program.callables_table.copy( resolved_functions=new_resolved_functions) - return program.copy(program_callables_info=new_program_callables_info) + return program.copy(callables_table=new_callables_table) # }}} diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py index 44e69ecfb0254ca1dcd7d0eae570f2139a64c83d..9b83f242bde7923a3932a00b42f442954cf9a7db 100644 --- a/loopy/transform/fusion.py +++ b/loopy/transform/fusion.py @@ -420,23 +420,23 @@ def fuse_kernels(programs, suffixes=None, data_flow=None): """ # all the resolved functions in programs must be registered in - # main_program_callables_info + # main_callables_table main_prog_callables_info = ( - programs[0].program_callables_info) + programs[0].callables_table) old_root_kernel_callable = ( - programs[0].program_callables_info[programs[0].name]) + programs[0].callables_table[programs[0].name]) kernels = [programs[0].root_kernel] # removing the callable collisions that maybe present for prog in programs[1:]: root_kernel = prog.root_kernel renames_needed = {} - for old_func_id, in_knl_callable in prog.program_callables_info.items(): + for old_func_id, in_knl_callable in prog.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): # Fusing programs with multiple callable kernels is tough. # Reason: Need to first figure out the order in which the # callable kernels must be resolved into - # main_program_callables_info, because of renaming is + # main_callables_table, because of renaming is # needed to be done in the callable kernels before registering. # Hence disabling it until required. if in_knl_callable.subkernel.name != prog.name: @@ -468,6 +468,6 @@ def fuse_kernels(programs, suffixes=None, data_flow=None): var(programs[0].name), new_root_kernel_callable) return programs[0].copy( - program_callables_info=main_prog_callables_info) + callables_table=main_prog_callables_info) # vim: foldmethod=marker diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index b6a0454eea6a377dc3052561db3a5f910dd60c47..fb6682f4866317c0852c3814268ada904b2391b8 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1095,7 +1095,7 @@ def get_iname_duplication_options_for_single_kernel(knl, use_boostable_into=Fals def get_iname_duplication_options(program, use_boostable_into=False): - for in_knl_callable in program.program_callables_info.values(): + for in_knl_callable in program.callables_table.values(): if isinstance(in_knl_callable, CallableKernel): for option in get_iname_duplication_options_for_single_kernel( in_knl_callable.subkernel, use_boostable_into): @@ -1121,7 +1121,7 @@ def has_schedulable_iname_nesting_for_single_kernel(knl): def has_schedulable_iname_nesting(program): return all(has_schedulable_iname_nesting_for_single_kernel( in_knl_callable.subkernel) for in_knl_callable in - program.program_callables_info.values() if isinstance(in_knl_callable, + program.callables_table.values() if isinstance(in_knl_callable, CallableKernel)) # }}} diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 93cf932b1bf555b64d502b2c7a8c2fcc658d6a25..f73110ecdff79d7c029c0dd0d895ef71ea68326b 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -42,7 +42,7 @@ def find_instructions_in_single_kernel(kernel, insn_match): def find_instructions(program, insn_match): assert isinstance(program, Program) insns = [] - for in_knl_callable in program.program_callables_info.values(): + for in_knl_callable in program.callables_table.values(): if isinstance(in_knl_callable, CallableKernel): insns += (find_instructions_in_single_kernel( in_knl_callable.subkernel, insn_match)) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 66c7114aee2a987c3a7810252ba3cc4f4d293c83..71b11fa24c49437032537d22aa7275b7e9e3d7f4 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -261,7 +261,7 @@ class _not_provided(object): # noqa: N801 pass -def precompute_for_single_kernel(kernel, program_callables_info, subst_use, +def precompute_for_single_kernel(kernel, callables_table, subst_use, sweep_inames=[], within=None, storage_axes=None, temporary_name=None, precompute_inames=None, precompute_outer_inames=None, storage_axis_to_tag={}, @@ -1047,7 +1047,7 @@ def precompute_for_single_kernel(kernel, program_callables_info, subst_use, if filter_iname_tags_by_type(new_iname_to_tag.values(), AutoFitLocalIndexTag): from loopy.kernel.tools import assign_automatic_axes - kernel = assign_automatic_axes(kernel, program_callables_info) + kernel = assign_automatic_axes(kernel, callables_table) return kernel @@ -1056,10 +1056,10 @@ def precompute(program, *args, **kwargs): assert isinstance(program, Program) new_resolved_functions = {} - for func_id, in_knl_callable in program.program_callables_info.items(): + for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): new_subkernel = precompute_for_single_kernel( - in_knl_callable.subkernel, program.program_callables_info, + in_knl_callable.subkernel, program.callables_table, *args, **kwargs) in_knl_callable = in_knl_callable.copy( subkernel=new_subkernel) @@ -1072,8 +1072,8 @@ def precompute(program, *args, **kwargs): new_resolved_functions[func_id] = in_knl_callable - new_program_callables_info = program.program_callables_info.copy( + new_callables_table = program.callables_table.copy( resolved_functions=new_resolved_functions) - return program.copy(program_callables_info=new_program_callables_info) + return program.copy(callables_table=new_callables_table) # vim: foldmethod=marker diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 4b957b033fd4659732970b12ee3986b4028811cf..e463353ef9ea0860188b49fcb8f2f06bb96b0f41 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -235,9 +235,9 @@ class TemporarySaver(object): def new_shape(self): return self.hw_dims + self.non_hw_dims - def __init__(self, kernel, program_callables_info): + def __init__(self, kernel, callables_table): self.kernel = kernel - self.program_callables_info = program_callables_info + self.callables_table = callables_table self.var_name_gen = kernel.get_var_name_generator() self.insn_name_gen = kernel.get_instruction_id_generator() @@ -441,7 +441,7 @@ class TemporarySaver(object): group_sizes, local_sizes = ( self.kernel.get_grid_sizes_for_insn_ids_as_exprs(accessor_insn_ids, - self.program_callables_info)) + self.callables_table)) if temporary.address_space == lp.AddressSpace.LOCAL: # Elide local axes in the save slot for local temporaries. @@ -630,7 +630,7 @@ class TemporarySaver(object): kernel = lp.add_nosync(kernel, "global", source, sink) from loopy.kernel.tools import assign_automatic_axes - return assign_automatic_axes(kernel, self.program_callables_info) + return assign_automatic_axes(kernel, self.callables_table) def save(self, temporary, subkernel): self.save_or_reload_impl(temporary, subkernel, "save") @@ -754,12 +754,12 @@ def save_and_reload_temporaries(program): program = lp.preprocess_program(program) from loopy.schedule import get_one_scheduled_kernel knl = get_one_scheduled_kernel(program.root_kernel, - program.program_callables_info) + program.callables_table) assert knl.schedule is not None liveness = LivenessAnalysis(knl) - saver = TemporarySaver(knl, program.program_callables_info) + saver = TemporarySaver(knl, program.callables_table) from loopy.schedule.tools import ( temporaries_read_in_subkernel, temporaries_written_in_subkernel) diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index afe3fec59ea41a85ddc1cc09f74504b07a5f0d45..acdf5b2a1a1a4f84c76703a471f95db0b34e0d7a 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -510,7 +510,7 @@ def find_rules_matching(knl, pattern): def find_one_rule_matching(program, pattern): rules = [] - for in_knl_callable in program.program_callables_info.values(): + for in_knl_callable in program.callables_table.values(): if isinstance(in_knl_callable, CallableKernel): knl = in_knl_callable.subkernel rules.extend(find_rules_matching(knl, pattern)) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 43986640547c9c504c271dd36469e5c2f72300ee..029381d8d8ce20030140b2948fbf0ddf343ae39c 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -35,7 +35,7 @@ from loopy.diagnostic import ( TypeInferenceFailure, DependencyTypeInferenceFailure) from loopy.kernel.instruction import _DataObliviousInstruction -from loopy.program import ProgramCallablesInfo +from loopy.program import CallablesTable from loopy.symbolic import ( LinearSubscript, parse_tagged_name, RuleAwareIdentityMapper, SubstitutionRuleExpander, ResolvedFunction, @@ -197,7 +197,7 @@ def change_names_of_pymbolic_calls(kernel, pymbolic_calls_to_new_names): # {{{ type inference mapper class TypeInferenceMapper(CombineMapper): - def __init__(self, kernel, program_callables_info, new_assignments=None): + def __init__(self, kernel, callables_table, new_assignments=None): """ :arg new_assignments: mapping from names to either :class:`loopy.kernel.data.TemporaryVariable` @@ -206,12 +206,12 @@ class TypeInferenceMapper(CombineMapper): instances """ self.kernel = kernel - assert isinstance(program_callables_info, ProgramCallablesInfo) + assert isinstance(callables_table, CallablesTable) if new_assignments is None: new_assignments = {} self.new_assignments = new_assignments self.symbols_with_unknown_types = set() - self.program_callables_info = program_callables_info + self.callables_table = callables_table self.old_calls_to_new_calls = {} def __call__(self, expr, return_tuple=False, return_dtype_set=False): @@ -245,16 +245,16 @@ class TypeInferenceMapper(CombineMapper): # /!\ Introduce caches with care--numpy.float32(x) and numpy.float64(x) # are Python-equal (for many common constants such as integers). - def copy(self, program_callables_info=None): - if program_callables_info is None: - program_callables_info = self.program_callables_info - return type(self)(self.kernel, program_callables_info, + def copy(self, callables_table=None): + if callables_table is None: + callables_table = self.callables_table + return type(self)(self.kernel, callables_table, self.new_assignments) def with_assignments(self, names_to_vars): new_ass = self.new_assignments.copy() new_ass.update(names_to_vars) - return type(self)(self.kernel, self.program_callables_info, new_ass) + return type(self)(self.kernel, self.callables_table, new_ass) @staticmethod def combine(dtype_sets): @@ -431,7 +431,7 @@ class TypeInferenceMapper(CombineMapper): # specializing the known function wrt type if isinstance(expr.function, ResolvedFunction): - in_knl_callable = self.program_callables_info[expr.function.name] + in_knl_callable = self.callables_table[expr.function.name] # {{{ checking that there is no overwriting of types of in_knl_callable @@ -465,17 +465,17 @@ class TypeInferenceMapper(CombineMapper): # }}} - in_knl_callable, self.program_callables_info = ( + in_knl_callable, self.callables_table = ( in_knl_callable.with_types( arg_id_to_dtype, self.kernel, - self.program_callables_info)) + self.callables_table)) in_knl_callable = in_knl_callable.with_target(self.kernel.target) # storing the type specialized function so that it can be used for # later use - self.program_callables_info, new_function_id = ( - self.program_callables_info.with_callable( + self.callables_table, new_function_id = ( + self.callables_table.with_callable( expr.function.function, in_knl_callable)) @@ -538,8 +538,8 @@ class TypeInferenceMapper(CombineMapper): in_knl_callable = ManglerCallable( identifier, function_mangler, arg_id_to_dtype, arg_id_to_descr, mangle_result.target_name) - self.program_callables_info, new_function_id = ( - self.program_callables_info.with_added_callable( + self.callables_table, new_function_id = ( + self.callables_table.with_added_callable( expr.function, in_knl_callable)) if isinstance(expr, Call): @@ -688,7 +688,7 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): if var_name in kernel.all_params(): return [kernel.index_dtype], [], {}, ( - type_inf_mapper.program_callables_info) + type_inf_mapper.callables_table) from functools import partial debug = partial(_debug, kernel) @@ -735,13 +735,13 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): if not dtype_sets: return ( None, type_inf_mapper.symbols_with_unknown_types, None, - type_inf_mapper.program_callables_info) + type_inf_mapper.callables_table) result = type_inf_mapper.combine(dtype_sets) return (result, type_inf_mapper.symbols_with_unknown_types, type_inf_mapper.old_calls_to_new_calls, - type_inf_mapper.program_callables_info) + type_inf_mapper.callables_table) # }}} @@ -768,7 +768,7 @@ class _DictUnionView: # {{{ infer_unknown_types -def infer_unknown_types_for_a_single_kernel(kernel, program_callables_info, +def infer_unknown_types_for_a_single_kernel(kernel, callables_table, expect_completion=False): """Infer types on temporaries and arguments.""" @@ -831,7 +831,7 @@ def infer_unknown_types_for_a_single_kernel(kernel, program_callables_info, new_temp_vars, new_arg_dict ]) - type_inf_mapper = TypeInferenceMapper(kernel, program_callables_info, + type_inf_mapper = TypeInferenceMapper(kernel, callables_table, item_lookup) from loopy.symbolic import SubstitutionRuleExpander @@ -867,11 +867,11 @@ def infer_unknown_types_for_a_single_kernel(kernel, program_callables_info, debug("inferring type for %s %s", type(item).__name__, item.name) (result, symbols_with_unavailable_types, - new_old_calls_to_new_calls, program_callables_info) = ( + new_old_calls_to_new_calls, callables_table) = ( _infer_var_type( kernel, item.name, type_inf_mapper, subst_expander)) type_inf_mapper = type_inf_mapper.copy( - program_callables_info=program_callables_info) + callables_table=callables_table) failed = not result if not failed: @@ -979,7 +979,7 @@ def infer_unknown_types_for_a_single_kernel(kernel, program_callables_info, raise NotImplementedError("Unknown instructions type %s." % ( type(insn).__name__)) - program_callables_info = type_inf_mapper.program_callables_info + callables_table = type_inf_mapper.callables_table old_calls_to_new_calls.update(type_inf_mapper.old_calls_to_new_calls) end_time = time.time() @@ -1003,39 +1003,39 @@ def infer_unknown_types_for_a_single_kernel(kernel, program_callables_info, from loopy.check import check_functions_are_resolved check_functions_are_resolved(type_specialized_kernel) - return type_specialized_kernel, program_callables_info + return type_specialized_kernel, callables_table def infer_unknown_types(program, expect_completion=False): """Infer types on temporaries and arguments.""" - program_callables_info = program.program_callables_info + callables_table = program.callables_table type_uninferred_knl_callable = ( - program_callables_info[program.name]) + callables_table[program.name]) type_uninferred_root_kernel = type_uninferred_knl_callable.subkernel - old_callables_count = program_callables_info.callables_count - program_callables_info = ( - program.program_callables_info.with_edit_callables_mode()) - root_kernel, program_callables_info = ( + old_callables_count = callables_table.callables_count + callables_table = ( + program.callables_table.with_edit_callables_mode()) + root_kernel, callables_table = ( infer_unknown_types_for_a_single_kernel( type_uninferred_root_kernel, - program_callables_info, expect_completion)) + callables_table, expect_completion)) type_inferred_knl_callable = type_uninferred_knl_callable.copy( subkernel=root_kernel) - program_callables_info, _ = ( - program_callables_info.with_callable( + callables_table, _ = ( + callables_table.with_callable( program.name, type_inferred_knl_callable)) - program_callables_info = ( - program_callables_info.with_exit_edit_callables_mode( + callables_table = ( + callables_table.with_exit_edit_callables_mode( old_callables_count)) - return program.copy(program_callables_info=program_callables_info) + return program.copy(callables_table=callables_table) # }}} @@ -1043,8 +1043,8 @@ def infer_unknown_types(program, expect_completion=False): # {{{ reduction expression helper def infer_arg_and_reduction_dtypes_for_reduction_expression( - kernel, expr, program_callables_info, unknown_types_ok): - type_inf_mapper = TypeInferenceMapper(kernel, program_callables_info) + kernel, expr, callables_table, unknown_types_ok): + type_inf_mapper = TypeInferenceMapper(kernel, callables_table) import loopy as lp if expr.is_tuple_typed: @@ -1076,7 +1076,7 @@ def infer_arg_and_reduction_dtypes_for_reduction_expression( for dt in reduction_dtypes) return tuple(arg_dtypes), reduction_dtypes, ( - type_inf_mapper.program_callables_info) + type_inf_mapper.callables_table) # }}} diff --git a/test/test_loopy.py b/test/test_loopy.py index 43371c8a808dbd592952f201358ac70eceb8b936..fa32ca04cb0541a415c2ca248b6edd7b753981d1 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -416,7 +416,7 @@ def test_ilp_write_race_detection_global(ctx_factory): from warnings import catch_warnings with catch_warnings(record=True) as warn_list: list(lp.generate_loop_schedules(knl.root_kernel, - knl.program_callables_info)) + knl.callables_table)) assert any(isinstance(w.message, WriteRaceConditionWarning) for w in warn_list) @@ -1271,7 +1271,7 @@ def save_and_reload_temporaries_test(queue, prog, out_expect, debug=False): from loopy.transform.save import save_and_reload_temporaries prog = save_and_reload_temporaries(prog) prog = prog.with_root_kernel(lp.get_one_scheduled_kernel(prog.root_kernel, - prog.program_callables_info)) + prog.callables_table)) if debug: print(prog) @@ -2222,7 +2222,7 @@ def test_unscheduled_insn_detection(): "...") prog = lp.preprocess_kernel(prog) - knl = lp.get_one_scheduled_kernel(prog.root_kernel, prog.program_callables_info) + knl = lp.get_one_scheduled_kernel(prog.root_kernel, prog.callables_table) prog = prog.with_root_kernel(knl) insn1, = lp.find_instructions(prog, "id:insn1") insns = prog.root_kernel.instructions[:] @@ -2392,7 +2392,7 @@ def test_barrier_insertion_near_top_of_loop(): prog = lp.set_temporary_scope(prog, "a", "local") prog = lp.set_temporary_scope(prog, "b", "local") prog = lp.preprocess_kernel(prog) - knl = lp.get_one_scheduled_kernel(prog.root_kernel, prog.program_callables_info) + knl = lp.get_one_scheduled_kernel(prog.root_kernel, prog.callables_table) print(knl) @@ -2420,7 +2420,7 @@ def test_barrier_insertion_near_bottom_of_loop(): prog = lp.set_temporary_scope(prog, "a", "local") prog = lp.set_temporary_scope(prog, "b", "local") prog = lp.preprocess_kernel(prog) - knl = lp.get_one_scheduled_kernel(prog.root_kernel, prog.program_callables_info) + knl = lp.get_one_scheduled_kernel(prog.root_kernel, prog.callables_table) print(knl) @@ -2479,7 +2479,7 @@ def test_multi_argument_reduction_type_inference(): allow_simultaneous=True) t_inf_mapper = TypeInferenceMapper(prog.root_kernel, - prog.program_callables_info) + prog.callables_table) assert ( t_inf_mapper(expr, return_tuple=True, return_dtype_set=True) @@ -2836,7 +2836,7 @@ def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier): prog = lp.preprocess_kernel(prog) knl = lp.get_one_scheduled_kernel(prog.root_kernel, - prog.program_callables_info) + prog.callables_table) assert barrier_between(knl, "first", "second") == expect_barrier diff --git a/test/testlib.py b/test/testlib.py index eebc792d00297adf4785f850582b415ca1d20439..853e2584a1e10732b3ec49cd737016734cdea5fa 100644 --- a/test/testlib.py +++ b/test/testlib.py @@ -9,9 +9,9 @@ class GridOverride(object): self.clean = clean self.vecsize = vecsize - def __call__(self, insn_ids, program_callables_info, ignore_auto=True): + def __call__(self, insn_ids, callables_table, ignore_auto=True): gsize, _ = self.clean.get_grid_sizes_for_insn_ids(insn_ids, - program_callables_info, ignore_auto) + callables_table, ignore_auto) return gsize, (self.vecsize,) # }}} @@ -139,14 +139,14 @@ class SeparateTemporariesPreambleTestPreambleGenerator( class Log2Callable(lp.ScalarCallable): - def with_types(self, arg_id_to_dtype, kernel, program_callables_info): + def with_types(self, arg_id_to_dtype, kernel, callables_table): if 0 not in arg_id_to_dtype or arg_id_to_dtype[0] is None: # the types provided aren't mature enough to specialize the # callable return ( self.copy(arg_id_to_dtype=arg_id_to_dtype), - program_callables_info) + callables_table) dtype = arg_id_to_dtype[0].numpy_dtype @@ -168,7 +168,7 @@ class Log2Callable(lp.ScalarCallable): self.copy(name_in_target=name_in_target, arg_id_to_dtype={0: NumpyType(dtype), -1: NumpyType(dtype)}), - program_callables_info) + callables_table) def register_log2_lookup(target, identifier):