diff --git a/loopy/execution.py b/loopy/execution.py index dac5b2ff80767ae00c126aba31c2851cfe3769ef..07e28f06d33e5884ac57c9505593c9ee916c3171 100644 --- a/loopy/execution.py +++ b/loopy/execution.py @@ -187,7 +187,12 @@ class KernelExecutorBase(object): def get_typed_and_scheduled_kernel(self, arg_to_dtype_set): from loopy import CACHING_ENABLED - cache_key = (type(self).__name__, self.kernel, arg_to_dtype_set) + from loopy.preprocess import prepare_for_caching + # prepare_for_caching() gets run by preprocess, but the kernel at this + # stage is not guaranteed to be preprocessed. + cacheable_kernel = prepare_for_caching(self.kernel) + cache_key = (type(self).__name__, cacheable_kernel, arg_to_dtype_set) + if CACHING_ENABLED: try: return typed_and_scheduled_cache[cache_key] diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index a02fc58d97f370d45f36a465c38fa3caf3da9d41..531cc822e1bc76573ef6e0812970d16bd6df0b17 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -862,6 +862,16 @@ class ArrayBase(ImmutableRecord): def __repr__(self): return "<%s>" % self.__str__() + def update_persistent_hash_for_shape(self, key_hash, key_builder, shape): + if isinstance(shape, tuple): + for shape_i in shape: + if shape_i is None: + key_builder.rec(key_hash, shape_i) + else: + key_builder.update_for_pymbolic_expression(key_hash, shape_i) + else: + key_builder.rec(key_hash, shape) + def update_persistent_hash(self, key_hash, key_builder): """Custom hash computation function for use with :class:`pytools.persistent_dict.PersistentDict`. @@ -869,14 +879,7 @@ class ArrayBase(ImmutableRecord): key_builder.rec(key_hash, self.name) key_builder.rec(key_hash, self.dtype) - if isinstance(self.shape, tuple): - for shape_i in self.shape: - if shape_i is None: - key_builder.rec(key_hash, shape_i) - else: - key_builder.update_for_pymbolic_expression(key_hash, shape_i) - else: - key_builder.rec(key_hash, self.shape) + self.update_persistent_hash_for_shape(key_hash, key_builder, self.shape) key_builder.rec(key_hash, self.dim_tags) key_builder.rec(key_hash, self.offset) key_builder.rec(key_hash, self.dim_names) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 001dd06326edcad14d8ecd39e29229dd45de8ef2..94b31df12dae516d3539438b7e4ed66ed765e697 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -497,7 +497,8 @@ class TemporaryVariable(ArrayBase): """ super(TemporaryVariable, self).update_persistent_hash(key_hash, key_builder) - key_builder.rec(key_hash, self.storage_shape) + self.update_persistent_hash_for_shape(key_hash, key_builder, + self.storage_shape) key_builder.rec(key_hash, self.base_indices) initializer = self.initializer @@ -510,7 +511,7 @@ class TemporaryVariable(ArrayBase): # }}} -# {{{ subsitution rule +# {{{ substitution rule class SubstitutionRule(ImmutableRecord): """ diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 57cf74b808ae1a7107e76a18a3876785ab8baabd..e05140ff5ba6ba32882607f99eeebfe4dfd80471 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -596,7 +596,8 @@ class SchedulerState(ImmutableRecord): .. attribute:: preschedule A sequence of schedule items that must be inserted into the - schedule, maintaining the same ordering + schedule, maintaining the same relative ordering. Newly scheduled + items may interleave this sequence. .. attribute:: prescheduled_insn_ids @@ -1073,28 +1074,6 @@ def generate_loop_schedules_internal( % iname) continue - if ( - not sched_state.within_subkernel - and iname not in sched_state.prescheduled_inames): - # Avoid messing up some orderings such as picking: - # - # EnterLoop(temporary.reload) - # CallKernel - # ... - # - # instead of - # - # CallKernel - # EnterLoop(temporary.reload) - # ... - # - # This serves a heuristic to catch some bad decisions early, the - # scheduler will not allow the first variant regardless. - if debug_mode: - print("scheduling '%s' prohibited because we are outside " - "a subkernel" % iname) - continue - currently_accessible_inames = ( active_inames_set | sched_state.parallel_inames) if ( diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 7b3a67c6b0b11a3adc68d58a10f309a6ee21919e..45da8eb3e3d7ccd784f9825995483018b4d897eb 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -232,7 +232,8 @@ class TypeInferenceMapper(CombineMapper): # Codegen for complex types depends on exactly correct types. # Refuse temptation to guess. raise TypeInferenceFailure("Complex constant '%s' needs to " - "be sized for type inference " % expr) + "be sized (i.e. as numpy.complex64/128) for type inference " + % expr) else: raise TypeInferenceFailure("Cannot deduce type of constant '%s'" % expr) diff --git a/test/test_loopy.py b/test/test_loopy.py index 48cb6980ab79bcfc640a19551d9a7708b6a2b20c..78633abbd41408ae700aa8516e8a9c6f70f018a9 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2376,6 +2376,21 @@ def test_kernel_var_name_generator(): assert vng("b") != "b" +def test_execution_backend_can_cache_dtypes(ctx_factory): + # When the kernel is invoked, the execution backend uses it as a cache key + # for the type inference and scheduling cache. This tests to make sure that + # dtypes in the kernel can be cached, even though they may not have a + # target. + + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + knl = lp.make_kernel("{[i]: 0 <= i < 10}", "<>tmp[i] = i") + knl = lp.add_dtypes(knl, dict(tmp=int)) + + knl(queue) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])