diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst index 1ba295777c88a79190171867354b04dec5d9405b..b3cfbc5c485f9670f59de4edda80ffabf6e23076 100644 --- a/doc/ref_transform.rst +++ b/doc/ref_transform.rst @@ -100,7 +100,7 @@ Modifying Arguments .. autofunction:: rename_argument -.. autofunction:: set_temporary_scope +.. autofunction:: set_temporary_address_space Creating Batches of Operations ------------------------------ diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 2671de282b3018792bc2d83cecc1fc6467a268fe..b7197b664356e1dcde3fc8c107b86806302e12f1 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -958,7 +958,7 @@ Consider the following example: ... out[16*i_outer + i_inner] = sum(k, a_temp[k]) ... """) >>> knl = lp.tag_inames(knl, dict(i_outer="g.0", i_inner="l.0")) - >>> knl = lp.set_temporary_scope(knl, "a_temp", "local") + >>> knl = lp.set_temporary_address_space(knl, "a_temp", "local") >>> knl = lp.set_options(knl, "write_code") >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) @@ -1078,7 +1078,7 @@ Temporaries in global memory local and private temporaries, the runtime allocates storage for global temporaries when the kernel gets executed. The user must explicitly specify that a temporary is global. To specify that a temporary is global, use -:func:`loopy.set_temporary_scope`. +:func:`loopy.set_temporary_address_space`. Substitution rules ~~~~~~~~~~~~~~~~~~ @@ -1260,8 +1260,8 @@ put those instructions into the schedule. ... --------------------------------------------------------------------------- TEMPORARIES: - tmp: type: np:dtype('int32'), shape: () scope:private - tmp_save_slot: type: np:dtype('int32'), shape: (n // 16, 16), dim_tags: (N1:stride:16, N0:stride:1) scope:global + tmp: type: np:dtype('int32'), shape: () aspace:private + tmp_save_slot: type: np:dtype('int32'), shape: (n // 16, 16), dim_tags: (N1:stride:16, N0:stride:1) aspace:global --------------------------------------------------------------------------- ... --------------------------------------------------------------------------- @@ -1565,8 +1565,8 @@ information provided. Now we will count the operations: .. doctest:: >>> op_map = lp.get_op_map(knl, subgroup_size=32) - >>> print(lp.stringify_stats_mapping(op_map)) - Op(np:dtype('float32'), add, subgroup, "stats_knl") : ... + >>> print(op_map) + Op(np:dtype('float32'), add, subgroup, "stats_knl"): ... Each line of output will look roughly like:: @@ -1627,8 +1627,8 @@ together into keys containing only the specified fields: .. doctest:: >>> op_map_dtype = op_map.group_by('dtype') - >>> print(lp.stringify_stats_mapping(op_map_dtype)) - Op(np:dtype('float32'), None, None) : ... + >>> print(op_map_dtype) + Op(np:dtype('float32'), None, None): ... <BLANKLINE> >>> f32op_count = op_map_dtype[lp.Op(dtype=np.float32) ... ].eval_with_dict(param_dict) @@ -1653,8 +1653,8 @@ we'll continue using the kernel from the previous example: .. doctest:: >>> mem_map = lp.get_mem_access_map(knl, subgroup_size=32) - >>> print(lp.stringify_stats_mapping(mem_map)) - MemAccess(global, np:dtype('float32'), {}, {}, load, a, None, subgroup, 'stats_knl') : ... + >>> print(mem_map) + MemAccess(global, np:dtype('float32'), {}, {}, load, a, None, subgroup, 'stats_knl'): ... <BLANKLINE> Each line of output will look roughly like:: @@ -1724,14 +1724,14 @@ using :func:`loopy.ToCountMap.to_bytes` and :func:`loopy.ToCountMap.group_by`: .. doctest:: >>> bytes_map = mem_map.to_bytes() - >>> print(lp.stringify_stats_mapping(bytes_map)) - MemAccess(global, np:dtype('float32'), {}, {}, load, a, None, subgroup, 'stats_knl') : ... + >>> print(bytes_map) + MemAccess(global, np:dtype('float32'), {}, {}, load, a, None, subgroup, 'stats_knl'): ... <BLANKLINE> >>> global_ld_st_bytes = bytes_map.filter_by(mtype=['global'] ... ).group_by('direction') - >>> print(lp.stringify_stats_mapping(global_ld_st_bytes)) - MemAccess(None, None, None, None, load, None, None, None, None) : ... - MemAccess(None, None, None, None, store, None, None, None, None) : ... + >>> print(global_ld_st_bytes) + MemAccess(None, None, None, None, load, None, None, None, None): ... + MemAccess(None, None, None, None, store, None, None, None, None): ... <BLANKLINE> >>> loaded = global_ld_st_bytes[lp.MemAccess(direction='load') ... ].eval_with_dict(param_dict) @@ -1743,12 +1743,12 @@ using :func:`loopy.ToCountMap.to_bytes` and :func:`loopy.ToCountMap.group_by`: The lines of output above might look like:: - MemAccess(global, np:dtype('float32'), {}, {}, load, a, None, subgroup) : [m, l, n] -> { 8 * m * l * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float32'), {}, {}, load, b, None, subgroup) : [m, l, n] -> { 4 * m * l * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float32'), {}, {}, store, c, None, subgroup) : [m, l, n] -> { 4 * m * l * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float64'), {}, {}, load, g, None, subgroup) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float64'), {}, {}, load, h, None, subgroup) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } - MemAccess(global, np:dtype('float64'), {}, {}, store, e, None, subgroup) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float32'), {}, {}, load, a, None, subgroup): [m, l, n] -> { 8 * m * l * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float32'), {}, {}, load, b, None, subgroup): [m, l, n] -> { 4 * m * l * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float32'), {}, {}, store, c, None, subgroup): [m, l, n] -> { 4 * m * l * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float64'), {}, {}, load, g, None, subgroup): [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float64'), {}, {}, load, h, None, subgroup): [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } + MemAccess(global, np:dtype('float64'), {}, {}, store, e, None, subgroup): [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 } One can see how these functions might be useful in computing, for example, achieved memory bandwidth in byte/sec or performance in FLOP/sec. @@ -1767,13 +1767,13 @@ this time. >>> knl_consec = lp.split_iname(knl, "k", 128, ... outer_tag="l.1", inner_tag="l.0") >>> mem_map = lp.get_mem_access_map(knl_consec, subgroup_size=32) - >>> print(lp.stringify_stats_mapping(mem_map)) - MemAccess(global, np:dtype('float32'), {0: 1, 1: 128}, {}, load, a, None, workitem, 'stats_knl') : ... - MemAccess(global, np:dtype('float32'), {0: 1, 1: 128}, {}, load, b, None, workitem, 'stats_knl') : ... - MemAccess(global, np:dtype('float32'), {0: 1, 1: 128}, {}, store, c, None, workitem, 'stats_knl') : ... - MemAccess(global, np:dtype('float64'), {0: 1, 1: 128}, {}, load, g, None, workitem, 'stats_knl') : ... - MemAccess(global, np:dtype('float64'), {0: 1, 1: 128}, {}, load, h, None, workitem, 'stats_knl') : ... - MemAccess(global, np:dtype('float64'), {0: 1, 1: 128}, {}, store, e, None, workitem, 'stats_knl') : ... + >>> print(mem_map) + MemAccess(global, np:dtype('float32'), {0: 1, 1: 128}, {}, load, a, None, workitem, 'stats_knl'): ... + MemAccess(global, np:dtype('float32'), {0: 1, 1: 128}, {}, load, b, None, workitem, 'stats_knl'): ... + MemAccess(global, np:dtype('float32'), {0: 1, 1: 128}, {}, store, c, None, workitem, 'stats_knl'): ... + MemAccess(global, np:dtype('float64'), {0: 1, 1: 128}, {}, load, g, None, workitem, 'stats_knl'): ... + MemAccess(global, np:dtype('float64'), {0: 1, 1: 128}, {}, load, h, None, workitem, 'stats_knl'): ... + MemAccess(global, np:dtype('float64'), {0: 1, 1: 128}, {}, store, e, None, workitem, 'stats_knl'): ... <BLANKLINE> With this parallelization, consecutive work-items will access consecutive array @@ -1812,13 +1812,13 @@ we'll switch the inner and outer tags in our parallelization of the kernel: >>> knl_nonconsec = lp.split_iname(knl, "k", 128, ... outer_tag="l.0", inner_tag="l.1") >>> mem_map = lp.get_mem_access_map(knl_nonconsec, subgroup_size=32) - >>> print(lp.stringify_stats_mapping(mem_map)) - MemAccess(global, np:dtype('float32'), {0: 128, 1: 1}, {}, load, a, None, workitem, 'stats_knl') : ... - MemAccess(global, np:dtype('float32'), {0: 128, 1: 1}, {}, load, b, None, workitem, 'stats_knl') : ... - MemAccess(global, np:dtype('float32'), {0: 128, 1: 1}, {}, store, c, None, workitem, 'stats_knl') : ... - MemAccess(global, np:dtype('float64'), {0: 128, 1: 1}, {}, load, g, None, workitem, 'stats_knl') : ... - MemAccess(global, np:dtype('float64'), {0: 128, 1: 1}, {}, load, h, None, workitem, 'stats_knl') : ... - MemAccess(global, np:dtype('float64'), {0: 128, 1: 1}, {}, store, e, None, workitem, 'stats_knl') : ... + >>> print(mem_map) + MemAccess(global, np:dtype('float32'), {0: 128, 1: 1}, {}, load, a, None, workitem, 'stats_knl'): ... + MemAccess(global, np:dtype('float32'), {0: 128, 1: 1}, {}, load, b, None, workitem, 'stats_knl'): ... + MemAccess(global, np:dtype('float32'), {0: 128, 1: 1}, {}, store, c, None, workitem, 'stats_knl'): ... + MemAccess(global, np:dtype('float64'), {0: 128, 1: 1}, {}, load, g, None, workitem, 'stats_knl'): ... + MemAccess(global, np:dtype('float64'), {0: 128, 1: 1}, {}, load, h, None, workitem, 'stats_knl'): ... + MemAccess(global, np:dtype('float64'), {0: 128, 1: 1}, {}, store, e, None, workitem, 'stats_knl'): ... <BLANKLINE> With this parallelization, consecutive work-items will access *nonconsecutive* @@ -1870,8 +1870,8 @@ kernel from the previous example: .. doctest:: >>> sync_map = lp.get_synchronization_map(knl) - >>> print(lp.stringify_stats_mapping(sync_map)) - Sync(kernel_launch, stats_knl) : [l, m, n] -> { 1 } + >>> print(sync_map) + Sync(kernel_launch, stats_knl): [l, m, n] -> { 1 } <BLANKLINE> We can evaluate this polynomial using :meth:`islpy.PwQPolynomial.eval_with_dict`: @@ -1930,9 +1930,9 @@ count the barriers using :func:`loopy.get_synchronization_map`: .. doctest:: >>> sync_map = lp.get_synchronization_map(knl) - >>> print(lp.stringify_stats_mapping(sync_map)) - Sync(barrier_local, loopy_kernel) : { 1000 } - Sync(kernel_launch, loopy_kernel) : { 1 } + >>> print(sync_map) + Sync(barrier_local, loopy_kernel): { 1000 } + Sync(kernel_launch, loopy_kernel): { 1 } <BLANKLINE> Based on the kernel code printed above, we would expect each work-item to diff --git a/loopy/__init__.py b/loopy/__init__.py index 1f61c7719aff2a3b36a4f33a408f878f58fcd5d1..57e214833e17bd7b4d762f0079573a8cdeb980af 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -30,26 +30,26 @@ from loopy.translation_unit import for_each_kernel from loopy.kernel.instruction import ( LegacyStringInstructionTag, UseStreamingStoreTag, - MemoryOrdering, memory_ordering, - MemoryScope, memory_scope, + MemoryOrdering, + MemoryScope, VarAtomicity, OrderedAtomic, AtomicInit, AtomicUpdate, InstructionBase, - MultiAssignmentBase, Assignment, ExpressionInstruction, + MultiAssignmentBase, Assignment, CallInstruction, CInstruction, NoOpInstruction, BarrierInstruction) from loopy.kernel.data import ( auto, KernelArgument, ValueArg, ArrayArg, GlobalArg, ConstantArg, ImageArg, - AddressSpace, temp_var_scope, + AddressSpace, TemporaryVariable, SubstitutionRule, CallMangleInfo) from loopy.kernel.function_interface import ( CallableKernel, ScalarCallable) from loopy.translation_unit import ( - TranslationUnit, Program, make_program) + TranslationUnit, make_program) -from loopy.kernel import LoopKernel, KernelState, kernel_state +from loopy.kernel import LoopKernel, KernelState from loopy.kernel.tools import ( get_dot_dependency_graph, show_dependency_graph, @@ -69,7 +69,7 @@ from loopy.library.reduction import register_reduction_parser from loopy.version import VERSION, MOST_RECENT_LANGUAGE_VERSION from loopy.transform.iname import ( - set_loop_priority, prioritize_loops, untag_inames, + prioritize_loops, untag_inames, split_iname, chunk_iname, join_inames, tag_inames, duplicate_inames, rename_iname, rename_inames, remove_unused_inames, split_reduction_inward, split_reduction_outward, @@ -94,7 +94,8 @@ from loopy.transform.data import ( remove_unused_arguments, alias_temporaries, set_argument_order, rename_argument, - set_temporary_scope) + set_temporary_scope, + set_temporary_address_space) from loopy.transform.subst import (extract_subst, assignment_to_subst, expand_subst, find_rules_matching, @@ -133,7 +134,7 @@ from loopy.schedule import ( generate_loop_schedules, get_one_scheduled_kernel, get_one_linearized_kernel, linearize) from loopy.statistics import (ToCountMap, ToCountPolynomialMap, CountGranularity, - stringify_stats_mapping, Op, MemAccess, get_op_map, get_mem_access_map, + Op, MemAccess, get_op_map, get_mem_access_map, get_synchronization_map, gather_access_footprints, gather_access_footprint_bytes, Sync) from loopy.codegen import ( @@ -167,26 +168,26 @@ __all__ = [ "auto", "LoopKernel", - "KernelState", "kernel_state", # lower case is deprecated + "KernelState", "LegacyStringInstructionTag", "UseStreamingStoreTag", - "MemoryOrdering", "memory_ordering", # lower case is deprecated - "MemoryScope", "memory_scope", # lower case is deprecated + "MemoryOrdering", + "MemoryScope", "VarAtomicity", "OrderedAtomic", "AtomicInit", "AtomicUpdate", "InstructionBase", - "MultiAssignmentBase", "Assignment", "ExpressionInstruction", + "MultiAssignmentBase", "Assignment", "CallInstruction", "CInstruction", "NoOpInstruction", "BarrierInstruction", "ScalarCallable", "CallableKernel", - "TranslationUnit", "make_program", "Program", + "TranslationUnit", "make_program", "KernelArgument", "ValueArg", "ArrayArg", "GlobalArg", "ConstantArg", "ImageArg", - "AddressSpace", "temp_var_scope", # temp_var_scope is deprecated + "AddressSpace", "TemporaryVariable", "SubstitutionRule", "CallMangleInfo", @@ -199,7 +200,7 @@ __all__ = [ # {{{ transforms - "set_loop_priority", "prioritize_loops", "untag_inames", + "prioritize_loops", "untag_inames", "split_iname", "chunk_iname", "join_inames", "tag_inames", "duplicate_inames", "rename_iname", "rename_inames", "remove_unused_inames", @@ -214,7 +215,7 @@ __all__ = [ "set_array_axis_names", "set_array_dim_names", "remove_unused_arguments", "alias_temporaries", "set_argument_order", - "rename_argument", "set_temporary_scope", + "rename_argument", "set_temporary_scope", "set_temporary_address_space", "find_instructions", "map_instructions", "set_instruction_priority", "add_dependency", @@ -280,7 +281,7 @@ __all__ = [ "generate_code", "generate_code_v2", "generate_body", "ToCountMap", "ToCountPolynomialMap", "CountGranularity", - "stringify_stats_mapping", "Op", "MemAccess", "get_op_map", + "Op", "MemAccess", "get_op_map", "get_mem_access_map", "get_synchronization_map", "gather_access_footprints", "gather_access_footprint_bytes", "Sync", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 8ddf3bf17a79550b39e41d0c17f85531489cb411..8eb085f3434d7211acf00b387161fd56d4d93bf3 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -93,57 +93,12 @@ class _UniqueVarNameGenerator(UniqueNameGenerator): # {{{ loop kernel object -class _deprecated_KernelState_SCHEDULED: # noqa - def __init__(self, f): - self.f = f - - def __get__(self, obj, klass): - warn( - "'KernelState.SCHEDULED' is deprecated. " - "Use 'KernelState.LINEARIZED'.", - DeprecationWarning, stacklevel=2) - return self.f() - class KernelState: # noqa INITIAL = 0 CALLS_RESOLVED = 1 PREPROCESSED = 2 LINEARIZED = 3 - @_deprecated_KernelState_SCHEDULED - def SCHEDULED(): # pylint:disable=no-method-argument - return KernelState.LINEARIZED - -# {{{ kernel_state, KernelState compataibility - -class _deperecated_kernel_state_class_method: # noqa - def __init__(self, f): - self.f = f - - def __get__(self, obj, klass): - warn("'temp_var_scope' is deprecated. Use 'AddressSpace'.", - DeprecationWarning, stacklevel=2) - return self.f() - - -class kernel_state: # noqa - """Deprecated. Use :class:`loopy.kernel.KernelState` instead. - """ - - @_deperecated_kernel_state_class_method - def INITIAL(): # pylint:disable=no-method-argument - return KernelState.INITIAL - - @_deperecated_kernel_state_class_method - def PREPROCESSED(): # pylint:disable=no-method-argument - return KernelState.PREPROCESSED - - @_deperecated_kernel_state_class_method - def SCHEDULED(): # pylint:disable=no-method-argument - return KernelState.SCHEDULED - -# }}} - def _get_inames_from_domains(domains): return frozenset().union(* diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index bffdd84ecf3e29ff822a84c2d4a1c54b84b43ae9..0c7c81441cd8940fdc0c4e955c3c4c21bba238b2 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -688,7 +688,7 @@ class ArrayBase(ImmutableRecord, Taggable): def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, for_atomic=False, - target=None, alignment=None, tags=None, **kwargs): + alignment=None, tags=None, **kwargs): """ All of the following (except *name*) are optional. Specify either strides or shape. @@ -741,12 +741,7 @@ class ArrayBase(ImmutableRecord, Taggable): for_atomic=for_atomic) if dtype is lp.auto: - warn("Argument/temporary data type for '%s' should be None if " - "unspecified, not auto. This usage will be disallowed in 2018." - % name, - DeprecationWarning, stacklevel=2) - - dtype = None + raise ValueError("dtype may not be lp.auto") strides_known = strides is not None and strides is not lp.auto shape_known = shape is not None and shape is not lp.auto @@ -875,10 +870,6 @@ class ArrayBase(ImmutableRecord, Taggable): if tags is None: tags = frozenset() - if target is not None: - warn("Passing target is deprecated and will stop working in 2022.", - DeprecationWarning, stacklevel=2) - ImmutableRecord.__init__(self, name=name, dtype=dtype, @@ -915,12 +906,6 @@ class ArrayBase(ImmutableRecord, Taggable): and self.tags == other.tags ) - def target(self): - warn("Array.target is deprecated and will go away in 2022.", - DeprecationWarning, stacklevel=2) - - return None - def __ne__(self, other): return not self.__eq__(other) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index f2c6420371023c84af9a4d8bc779cd53bd55e58d..5b536dfd41a7c7fb0357d4ede64c8b76681bbd73 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -24,7 +24,6 @@ THE SOFTWARE. """ -import sys from sys import intern import numpy as np # noqa from pytools import ImmutableRecord @@ -41,11 +40,9 @@ from loopy.kernel.instruction import ( # noqa AtomicUpdate, MultiAssignmentBase, Assignment, - ExpressionInstruction, CallInstruction, make_assignment, CInstruction) -from warnings import warn __doc__ = """ .. autofunction:: filter_iname_tags_by_type @@ -145,11 +142,6 @@ class HardwareConcurrentTag(ConcurrentTag): pass -# deprecated aliases -ParallelTag = ConcurrentTag -HardwareParallelTag = HardwareConcurrentTag - - class UniqueInameTag(InameImplementationTag): pass @@ -296,39 +288,6 @@ class AddressSpace: else: raise ValueError("unexpected value of AddressSpace") - -class _deprecated_temp_var_scope_class_method: # noqa - def __init__(self, f): - self.f = f - - def __get__(self, obj, klass): - warn("'temp_var_scope' is deprecated. Use 'AddressSpace'.", - DeprecationWarning, stacklevel=2) - return self.f() - - -class temp_var_scope: # noqa - """Deprecated. Use :class:`loopy.AddressSpace` instead. - """ - - @_deprecated_temp_var_scope_class_method - def PRIVATE(): # pylint:disable=no-method-argument - return AddressSpace.PRIVATE - - @_deprecated_temp_var_scope_class_method - def LOCAL(): # pylint:disable=no-method-argument - return AddressSpace.LOCAL - - @_deprecated_temp_var_scope_class_method - def GLOBAL(): # pylint:disable=no-method-argument - return AddressSpace.GLOBAL - - @classmethod - def stringify(cls, val): - warn("'temp_var_scope' is deprecated. Use 'AddressSpace'.", - DeprecationWarning, stacklevel=2) - return AddressSpace.stringify(val) - # }}} @@ -356,12 +315,8 @@ class KernelArgument(ImmutableRecord): import loopy as lp if dtype is lp.auto: - warn("Argument/temporary data type for '%s' should be None if " - "unspecified, not auto. This usage will be disallowed in 2018." - % kwargs["name"], - DeprecationWarning, stacklevel=2) + raise TypeError("dtype may not be lp.auto") - dtype = None kwargs["dtype"] = dtype kwargs["is_output"] = kwargs.pop("is_output", None) kwargs["is_input"] = kwargs.pop("is_input", None) @@ -400,15 +355,8 @@ class ArrayArg(ArrayBase, KernelArgument): if "address_space" not in kwargs: raise TypeError("'address_space' must be specified") - is_output_only = kwargs.pop("is_output_only", None) - if is_output_only is not None: - warn("'is_output_only' is deprecated. Use 'is_output', 'is_input'" - " instead.", DeprecationWarning, stacklevel=2) - kwargs["is_output"] = is_output_only - kwargs["is_input"] = not is_output_only - else: - kwargs["is_output"] = kwargs.pop("is_output", None) - kwargs["is_input"] = kwargs.pop("is_input", None) + kwargs["is_output"] = kwargs.pop("is_output", None) + kwargs["is_input"] = kwargs.pop("is_input", None) super().__init__(*args, **kwargs) @@ -623,19 +571,6 @@ class TemporaryVariable(ArrayBase): :arg base_indices: :class:`loopy.auto` or a tuple of base indices """ - scope = kwargs.pop("scope", None) - if scope is not None: - warn("Passing 'scope' is deprecated. Use 'address_space' instead.", - DeprecationWarning, stacklevel=2) - - if address_space is not None: - raise ValueError("only one of 'scope' and 'address_space' " - "may be specified") - else: - address_space = scope - - del scope - if address_space is None: address_space = auto @@ -718,29 +653,8 @@ class TemporaryVariable(ArrayBase): _base_storage_access_may_be_aliasing), **kwargs) - @property - def scope(self): - warn("Use of 'TemporaryVariable.scope' is deprecated, " - "use 'TemporaryVariable.address_space' instead.", - DeprecationWarning, stacklevel=2) - - return self.address_space - def copy(self, **kwargs): address_space = kwargs.pop("address_space", None) - scope = kwargs.pop("scope", None) - - if scope is not None: - warn("Passing 'scope' is deprecated. Use 'address_space' instead.", - DeprecationWarning, stacklevel=2) - - if address_space is not None: - raise ValueError("only one of 'scope' and 'address_space' " - "may be specified") - else: - address_space = scope - - del scope if address_space is not None: kwargs["address_space"] = address_space @@ -771,14 +685,14 @@ class TemporaryVariable(ArrayBase): def __str__(self): if self.address_space is auto: - scope_str = "auto" + aspace_str = "auto" else: - scope_str = AddressSpace.stringify(self.address_space) + aspace_str = AddressSpace.stringify(self.address_space) return ( self.stringify(include_typename=False) + - " scope:%s" % scope_str) + " aspace:%s" % aspace_str) def __eq__(self, other): return ( @@ -818,17 +732,6 @@ class TemporaryVariable(ArrayBase): # }}} -def iname_tag_to_temp_var_scope(iname_tag): - iname_tag = parse_tag(iname_tag) - - if isinstance(iname_tag, GroupInameTag): - return AddressSpace.GLOBAL - elif isinstance(iname_tag, LocalInameTag): - return AddressSpace.LOCAL - else: - return AddressSpace.PRIVATE - - # {{{ substitution rule class SubstitutionRule(ImmutableRecord): @@ -949,37 +852,4 @@ class Iname(Taggable): # }}} -# {{{ deprecation helpers - -_old_to_new = { - "IndexTag": "InameImplementationTag", - "GroupIndexTag": "GroupInameTag", - "LocalIndexTagBase": "LocalInameTagBase", - "LocalIndexTag": "LocalInameTag", - "UniqueTag": "UniqueInameTag", - } - -if sys.version_info < (3, 7): - _glb = globals() - for _old, _new in _old_to_new.items(): - _glb[_old] = _glb[_new] - - del _old - del _new - del _glb -else: - def __getattr__(name): - new_name = _old_to_new.get(name) - if new_name is None: - raise AttributeError(name) - else: - from warnings import warn - warn(f"loopy.kernel.data.{name} is deprecated. " - f"Use loopy.kernel.data.{new_name} instead. " - "The old name will stop working in 2022.", - DeprecationWarning, stacklevel=2) - return globals()[new_name] - -# }}} - # vim: foldmethod=marker diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 80f16e1c98a77976cfd1112c7084db1a1bfc2519..29c1f64d14a6947b59c5bdf6a2a320c7138aceb3 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -301,7 +301,6 @@ class InKernelCallable(ImmutableRecord): .. automethod:: __init__ .. automethod:: with_types .. automethod:: with_descrs - .. automethod:: with_target .. automethod:: generate_preambles .. automethod:: emit_call .. automethod:: emit_call_insn @@ -389,21 +388,7 @@ class InKernelCallable(ImmutableRecord): raise NotImplementedError() - def with_target(self, target): - """ - Returns a copy of *self* with all the ``dtypes`` in - ``in_knl_callable.arg_id_to_dtype`` associated with the *target*. - - :arg target: An instance of :class:`loopy.target.TargetBase`. - """ - from warnings import warn - warn("InKernelCallable.with_target is deprecated, will be removed in " - "2022.", DeprecationWarning, stacklevel=2) - - return self - def is_ready_for_codegen(self): - return (self.arg_id_to_dtype is not None and self.arg_id_to_descr is not None) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 09a0711a3bf4057162a2501d1cfa53523b47ff41..3fce4f28312ddb937eb3e80f385e661fa3e8dd65 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -552,51 +552,6 @@ class MemoryOrdering: # noqa raise ValueError("Unknown value of MemoryOrdering") -# {{{ memory_ordering, MemoryOrdering compatibility - -class _deprecated_memory_ordering_class_method: # noqa - def __init__(self, f): - self.f = f - - def __get__(self, obj, klass): - warn("'memory_ordering' is deprecated. Use 'MemoryOrdering'.", - DeprecationWarning, stacklevel=2) - return self.f() - - -class memory_ordering: # noqa - """Deprecated. Use :class:`MemoryOrdering` instead. - """ - - @_deprecated_memory_ordering_class_method - def RELAXED(): # pylint:disable=no-method-argument - return MemoryOrdering.RELAXED - - @_deprecated_memory_ordering_class_method - def ACQUIRE(): # pylint:disable=no-method-argument - return MemoryOrdering.ACQUIRE - - @_deprecated_memory_ordering_class_method - def RELEASE(): # pylint:disable=no-method-argument - return MemoryOrdering.RELEASE - - @_deprecated_memory_ordering_class_method - def ACQ_REL(): # pylint:disable=no-method-argument - return MemoryOrdering.ACQ_REL - - @_deprecated_memory_ordering_class_method - def SEQ_CST(): # pylint:disable=no-method-argument - return MemoryOrdering.SEQ_CST - - @staticmethod - def to_string(v): - warn("'memory_ordering' is deprecated. Use 'MemoryOrdering'.", - DeprecationWarning, stacklevel=2) - return MemoryOrdering.to_string(v) - -# }}} - - class MemoryScope: # noqa """Scope of atomicity, defined as in OpenCL. @@ -629,51 +584,6 @@ class MemoryScope: # noqa raise ValueError("Unknown value of MemoryScope") -# {{{ memory_scope, MemoryScope compatiability - -class _deprecated_memory_scope_class_method: # noqa - def __init__(self, f): - self.f = f - - def __get__(self, obj, klass): - warn("'memory_scope' is deprecated. Use 'MemoryScope'.", - DeprecationWarning, stacklevel=2) - return self.f() - - -class memory_scope: # noqa - """Deprecated. Use :class:`MemoryScope` instead. - """ - - @_deprecated_memory_scope_class_method - def WORK_ITEM(): # pylint:disable=no-method-argument - return MemoryScope.WORK_ITEM - - @_deprecated_memory_scope_class_method - def WORK_GROUP(): # pylint:disable=no-method-argument - return MemoryScope.WORK_GROUP - - @_deprecated_memory_scope_class_method - def DEVICE(): # pylint:disable=no-method-argument - return MemoryScope.DEVICE - - @_deprecated_memory_scope_class_method - def ALL_SVM_DEVICES(): # pylint:disable=no-method-argument - return MemoryScope.ALL_SVM_DEVICES - - @_deprecated_memory_scope_class_method - def auto(): # pylint:disable=no-method-argument - return MemoryScope.auto - - @staticmethod - def to_string(v): - warn("'memory_scope' is deprecated. Use 'MemoryScope'.", - DeprecationWarning, stacklevel=2) - return MemoryScope.to_string(v) - -# }}} - - class VarAtomicity: """A base class for the description of how atomic access to :attr:`var_name` shall proceed. @@ -999,14 +909,6 @@ class Assignment(MultiAssignmentBase): # }}} - -class ExpressionInstruction(Assignment): - def __init__(self, *args, **kwargs): - warn("ExpressionInstruction is deprecated. Use Assignment instead", - DeprecationWarning, stacklevel=2) - - super().__init__(*args, **kwargs) - # }}} @@ -1601,13 +1503,6 @@ class BarrierInstruction(_DataObliviousInstruction): return first_line - @property - def kind(self): - from warnings import warn - warn("BarrierInstruction.kind is deprecated, use synchronization_kind " - "instead", DeprecationWarning, stacklevel=2) - return self.synchronization_kind - # }}} diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 9a9b1c6e9ad5de6d0b21e1b432c2e4547373c490..8e32e417038f669b7306eda664a4f4dd12255ce1 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -108,7 +108,7 @@ class ScalarReductionOperation(ReductionOperation): def result_dtypes(self, arg_dtype): if arg_dtype is None: - return None + return (None,) return (arg_dtype,) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 1b2a01840c1359371e43a130644fd217b4b023d3..2b2e23278ab0ccd3a6391b6e97999f3310528d33 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -42,19 +42,6 @@ from pytools import ProcessLogger from functools import partial -# {{{ prepare for caching - -def prepare_for_caching(program): - from warnings import warn - warn("prepare_for_caching is deprecated and no longer needed. " - "It will stop working in 2022.", - DeprecationWarning, stacklevel=2) - - return program - -# }}} - - # {{{ check for writes to predicates def check_for_writes_to_predicates(kernel): @@ -547,7 +534,7 @@ def filter_reachable_callables(t_unit): return t_unit.copy(callables_table=new_callables) -def _preprocess_single_kernel(kernel, callables_table, device=None): +def _preprocess_single_kernel(kernel, callables_table): from loopy.kernel import KernelState prepro_logger = ProcessLogger(logger, "%s: preprocess" % kernel.name) @@ -596,7 +583,7 @@ def _preprocess_single_kernel(kernel, callables_table, device=None): @memoize_on_disk -def preprocess_program(program, device=None): +def preprocess_program(program): from loopy.kernel import KernelState if program.state >= KernelState.PREPROCESSED: @@ -616,12 +603,6 @@ def preprocess_program(program, device=None): program = filter_reachable_callables(program) - if device is not None: - # FIXME: Time to remove this? (Git blame shows 5 years ago) - from warnings import warn - warn("passing 'device' to preprocess_kernel() is deprecated", - DeprecationWarning, stacklevel=2) - program = infer_unknown_types(program, expect_completion=False) from loopy.transform.subst import expand_subst @@ -655,8 +636,7 @@ def preprocess_program(program, device=None): for func_id, in_knl_callable in program.callables_table.items(): if isinstance(in_knl_callable, CallableKernel): new_subkernel = _preprocess_single_kernel( - in_knl_callable.subkernel, program.callables_table, - device) + in_knl_callable.subkernel, program.callables_table) in_knl_callable = in_knl_callable.copy( subkernel=new_subkernel) elif isinstance(in_knl_callable, ScalarCallable): diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 5822f44ed4608d4968a034144c03a42e6f029f9d..b117726226978d65873024dd02d83202c93c49f9 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -24,8 +24,7 @@ THE SOFTWARE. from pytools import ImmutableRecord import sys import islpy as isl -from loopy.diagnostic import (warn_with_kernel, LoopyError, - ScheduleDebugInputError) +from loopy.diagnostic import LoopyError, ScheduleDebugInputError, warn_with_kernel from pytools import MinRecursionLimit, ProcessLogger @@ -415,7 +414,7 @@ def format_insn(kernel, insn_id): from loopy.kernel.instruction import ( MultiAssignmentBase, NoOpInstruction, BarrierInstruction) if isinstance(insn, MultiAssignmentBase): - return "{}{}{} = {}{}{} {{id={}}}".format( + return "{}{}{} = {}{}{} {{id={}}""}".format( Fore.CYAN, ", ".join(str(a) for a in insn.assignees), Style.RESET_ALL, Fore.MAGENTA, str(insn.expression), Style.RESET_ALL, format_insn_id(kernel, insn_id)) @@ -2133,7 +2132,7 @@ schedule_cache = WriteOncePersistentDict( key_builder=LoopyKeyBuilder()) -def _get_one_scheduled_kernel_inner(kernel, callables_table): +def _get_one_linearized_kernel_inner(kernel, callables_table): # This helper function exists to ensure that the generator chain is fully # out of scope after the function returns. This allows it to be # garbage-collected in the exit handler of the @@ -2146,15 +2145,6 @@ def _get_one_scheduled_kernel_inner(kernel, callables_table): return next(iter(generate_loop_schedules(kernel, callables_table))) -def get_one_scheduled_kernel(kernel, callables_table): - warn_with_kernel( - kernel, "get_one_scheduled_kernel_deprecated", - "get_one_scheduled_kernel is deprecated. " - "Use get_one_linearized_kernel instead.", - DeprecationWarning, stacklevel=2) - return get_one_linearized_kernel(kernel, callables_table) - - def get_one_linearized_kernel(kernel, callables_table): from loopy import CACHING_ENABLED @@ -2175,7 +2165,7 @@ def get_one_linearized_kernel(kernel, callables_table): if not from_cache: with ProcessLogger(logger, "%s: schedule" % kernel.name): with MinRecursionLimitForScheduling(kernel): - result = _get_one_scheduled_kernel_inner(kernel, + result = _get_one_linearized_kernel_inner(kernel, callables_table) if CACHING_ENABLED and not from_cache: @@ -2184,6 +2174,15 @@ def get_one_linearized_kernel(kernel, callables_table): return result +def get_one_scheduled_kernel(kernel, callables_table): + warn_with_kernel( + kernel, "get_one_scheduled_kernel_deprecated", + "get_one_scheduled_kernel is deprecated. " + "Use get_one_linearized_kernel instead.", + DeprecationWarning, stacklevel=2) + return get_one_linearized_kernel(kernel, callables_table) + + def linearize(t_unit): from loopy.kernel.function_interface import (CallableKernel, ScalarCallable) diff --git a/loopy/statistics.py b/loopy/statistics.py index 03511249a9c40c09ab2193871935b072b5a36889..11d88769d9eb0067b101aa64e4947b4d7398194b 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -571,18 +571,6 @@ def subst_into_to_count_map(space, tcm, subst_dict): # }}} -def stringify_stats_mapping(m): - - from warnings import warn - warn("stringify_stats_mapping is deprecated and will be removed in 2020." - " Use ToCountMap.__str__() instead.", DeprecationWarning, stacklevel=2) - - result = "" - for key in sorted(m.keys(), key=lambda k: str(k)): - result += ("{} : {}\n".format(key, m[key])) - return result - - # {{{ CountGranularity class CountGranularity: @@ -743,7 +731,7 @@ class MemAccess(ImmutableRecord): def __init__(self, mtype=None, dtype=None, lid_strides=None, gid_strides=None, direction=None, variable=None, - *, variable_tags=None, variable_tag=None, + *, variable_tags=None, count_granularity=None, kernel_name=None): if count_granularity not in CountGranularity.ALL+[None]: @@ -751,24 +739,9 @@ class MemAccess(ImmutableRecord): "not allowed. count_granularity options: %s" % (count_granularity, CountGranularity.ALL+[None])) - # {{{ normalize variable_tags - - if variable_tags is not None and variable_tag is not None: - raise TypeError( - "may not specify both 'variable_tags' and 'variable_tag'") - if variable_tag is not None: - from loopy.kernel.creation import _normalize_string_tag - variable_tags = frozenset({_normalize_string_tag(variable_tag)}) - - from warnings import warn - warn("Passing 'variable_tag' to MemAccess is deprecated and will " - "stop working in 2022. Pass variable_tags instead.") - if variable_tags is None: variable_tags = frozenset() - # }}} - if dtype is not None: from loopy.types import to_loopy_type dtype = to_loopy_type(dtype) @@ -780,20 +753,6 @@ class MemAccess(ImmutableRecord): count_granularity=count_granularity, kernel_name=kernel_name) - @property - def variable_tag(self): - from warnings import warn - warn("Accessing MemAccess.variable_tag is deprecated and will stop working " - "in 2022. Use MemAccess.variable_tags instead.", DeprecationWarning, - stacklevel=2) - - if len(self.variable_tags) != 1: - raise ValueError("cannot access MemAccess.variable_tag: access has " - f"{len(self.variable_tags)} tags") - - tag, = self.variable_tags - return tag - def __hash__(self): # dicts in gid_strides and lid_strides aren't natively hashable return hash(repr(self)) @@ -812,6 +771,7 @@ class MemAccess(ImmutableRecord): "None" if not self.variable_tags else str(self.variable_tags), self.count_granularity, repr(self.kernel_name)) + # }}} diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8f702f783494e9949d799df2773561e3c36e40d5..43682cd0dac1d26ea0a9ae75c1cb5281df0f7bed 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -673,20 +673,6 @@ class TaggedVariable(LoopyExpressionBase, p.Variable, Taggable): Taggable.__init__(self, tags) - @property - def tag(self): - from warnings import warn - warn("Accessing TaggedVariable.tag is deprecated and will stop working " - "in 2022. Use TaggedVariable.tags instead.", DeprecationWarning, - stacklevel=2) - - if len(self.tags) != 1: - raise ValueError("cannot access TaggedVariable.tag: variable has " - f"{len(self.tags)} tags") - - tag, = self.tags - return tag - def __getinitargs__(self): return self.name, self.tags diff --git a/loopy/tools.py b/loopy/tools.py index d12ff750c406faee86f76f18cfe4516456e66530..a14a3fd0936785c8003774401a954a25ef352bfb 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -656,6 +656,8 @@ def intern_frozenset_of_ids(fs): return frozenset(intern(s) for s in fs) +# {{{ t_unit_to_python + def _is_generated_t_unit_the_same(python_code, var_name, ref_t_unit): """ Helper for :func:`kernel_to_python`. Returns *True* only if the variable @@ -743,7 +745,7 @@ def _kernel_to_python(kernel, is_entrypoint=False, var_name="kernel"): python_code = r""" <%! import loopy as lp %> - <%! tv_scope = {0: 'lp.AddressSpace.PRIVATE', 1: 'lp.AddressSpace.LOCAL', + <%! tv_aspace = {0: 'lp.AddressSpace.PRIVATE', 1: 'lp.AddressSpace.LOCAL', 2: 'lp.AddressSpace.GLOBAL', lp.auto: 'lp.auto' } %> ${var_name} = lp.${make_kernel}( [ @@ -770,11 +772,11 @@ def _kernel_to_python(kernel, is_entrypoint=False, var_name="kernel"): lp.ValueArg( name="${arg.name}", dtype=${('np.'+arg.dtype.numpy_dtype.name - if arg.dtype else 'lp.auto')}), + if arg.dtype else 'None')}), % else: lp.GlobalArg( name="${arg.name}", dtype=${('np.'+arg.dtype.numpy_dtype.name - if arg.dtype else 'lp.auto')}, + if arg.dtype else 'None')}, shape=${arg.shape}, for_atomic=${arg.for_atomic}), % endif % endfor @@ -783,7 +785,7 @@ def _kernel_to_python(kernel, is_entrypoint=False, var_name="kernel"): name="${tv.name}", dtype=${'np.'+tv.dtype.numpy_dtype.name if tv.dtype else 'lp.auto'}, shape=${tv.shape}, for_atomic=${tv.for_atomic}, - address_space=${tv_scope[tv.address_space]}, + address_space=${tv_aspace[tv.address_space]}, read_only=${tv.read_only}, % if tv.initializer is not None: initializer=${"np."+repr(tv.initializer)}, @@ -871,6 +873,10 @@ def t_unit_to_python(t_unit, var_name="t_unit", else: return python_code +# }}} + + +# {{{ memoize_on_disk def memoize_on_disk(func, key_builder_t=LoopyKeyBuilder): from loopy.version import DATA_MODEL_VERSION @@ -928,4 +934,6 @@ def memoize_on_disk(func, key_builder_t=LoopyKeyBuilder): return wrapper +# }}} + # vim: fdm=marker diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py index e3dbeeb513a115c8b649ae805864066ae39b6e5a..5af437c05c5995c2c86aea469ae76126b49fc0fa 100644 --- a/loopy/transform/buffer.py +++ b/loopy/transform/buffer.py @@ -125,7 +125,7 @@ class ArrayAccessReplacer(RuleAwareIdentityMapper): def buffer_array_for_single_kernel(kernel, callables_table, var_name, buffer_inames, init_expression=None, store_expression=None, within=None, default_tag="l.auto", temporary_scope=None, - temporary_is_local=None, fetch_bounding_box=False): + fetch_bounding_box=False): """Replace accesses to *var_name* with ones to a temporary, which is created and acts as a buffer. To perform this transformation, the access footprint to *var_name* is determined and a temporary of a suitable @@ -170,32 +170,11 @@ def buffer_array_for_single_kernel(kernel, callables_table, var_name, return kernel.with_kernel(buffer_array(kernel[kernel_names[0]], var_name, buffer_inames, init_expression, store_expression, within, - default_tag, temporary_scope, temporary_is_local, + default_tag, temporary_scope, fetch_bounding_box, kernel.callables_table)) assert isinstance(kernel, LoopKernel) - # {{{ unify temporary_scope / temporary_is_local - - from loopy.kernel.data import AddressSpace - if temporary_is_local is not None: - from warnings import warn - warn("temporary_is_local is deprecated. Use temporary_scope instead", - DeprecationWarning, stacklevel=2) - - if temporary_scope is not None: - raise LoopyError("may not specify both temporary_is_local and " - "temporary_scope") - - if temporary_is_local: - temporary_scope = AddressSpace.LOCAL - else: - temporary_scope = AddressSpace.PRIVATE - - del temporary_is_local - - # }}} - # {{{ process arguments if isinstance(init_expression, str): diff --git a/loopy/transform/data.py b/loopy/transform/data.py index c91aee4c3d594a610b822e237d57af38cb9c9ca4..aab148ad3e30a26d528d87baa7e37c613e41b605 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -352,7 +352,6 @@ def add_prefetch_for_single_kernel(kernel, callables_table, var_name, fetch_bounding_box=fetch_bounding_box, temporary_name=temporary_name, temporary_address_space=temporary_address_space, - temporary_scope=temporary_scope, precompute_outer_inames=fetch_outer_inames, compute_insn_id=prefetch_insn_id, within=within) @@ -743,15 +742,15 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False): # }}} -# {{{ set temporary scope +# {{{ set temporary address space @for_each_kernel -def set_temporary_scope(kernel, temp_var_names, scope): +def set_temporary_address_space(kernel, temp_var_names, address_space): """ :arg temp_var_names: a container with membership checking, or a comma-separated string of variables for which the - scope is to be set. - :arg scope: One of the values from :class:`loopy.AddressSpace`, or one + address space is to be set. + :arg address_space: One of the values from :class:`loopy.AddressSpace`, or one of the strings ``"private"``, ``"local"``, or ``"global"``. """ @@ -759,17 +758,17 @@ def set_temporary_scope(kernel, temp_var_names, scope): temp_var_names = [s.strip() for s in temp_var_names.split(",")] from loopy.kernel.data import AddressSpace - if isinstance(scope, str): + if isinstance(address_space, str): try: - scope = getattr(AddressSpace, scope.upper()) + address_space = getattr(AddressSpace, address_space.upper()) except AttributeError: - raise LoopyError("scope '%s' unknown" % scope) + raise LoopyError("address_space '%s' unknown" % address_space) - if not isinstance(scope, int) or scope not in [ + if not isinstance(address_space, int) or address_space not in [ AddressSpace.PRIVATE, AddressSpace.LOCAL, AddressSpace.GLOBAL]: - raise LoopyError("invalid scope '%s'" % scope) + raise LoopyError("invalid address_space '%s'" % address_space) new_temp_vars = kernel.temporary_variables.copy() for tv_name in temp_var_names: @@ -778,10 +777,19 @@ def set_temporary_scope(kernel, temp_var_names, scope): except KeyError: raise LoopyError("temporary '%s' not found" % tv_name) - new_temp_vars[tv_name] = tv.copy(address_space=scope) + new_temp_vars[tv_name] = tv.copy(address_space=address_space) return kernel.copy(temporary_variables=new_temp_vars) + +def set_temporary_scope(kernel, temp_var_names, address_space): + from warnings import warn + warn("set_temporary_scope is deprecated and will stop working in " + "July 2022. Use set_temporary_address_space instead.", + DeprecationWarning, stacklevel=2) + + return set_temporary_address_space(kernel, temp_var_names, address_space) + # }}} diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index d82b2b3520b6f539dfa59fd02a5d0b1f1a1aedeb..65c93cabdc261dfe0d5039568178c504f7033823 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -83,21 +83,6 @@ __doc__ = """ # {{{ set loop priority -@for_each_kernel -def set_loop_priority(kernel, loop_priority): - from warnings import warn - warn("set_loop_priority is deprecated. Use prioritize_loops instead. " - "Attention: A call to set_loop_priority will overwrite any previously " - "set priorities!", DeprecationWarning, stacklevel=2) - - if isinstance(loop_priority, str): - loop_priority = tuple(s.strip() - for s in loop_priority.split(",") if s.strip()) - loop_priority = tuple(loop_priority) - - return kernel.copy(loop_priority=frozenset([loop_priority])) - - @for_each_kernel def prioritize_loops(kernel, loop_priority): """Indicates the textual order in which loops should be entered in the @@ -1032,7 +1017,7 @@ def _get_iname_duplication_options(insn_iname_sets, old_common_inames=frozenset( # If partitioning was empty, we have recursed successfully and yield nothing -def get_iname_duplication_options(kernel, use_boostable_into=None): +def get_iname_duplication_options(kernel): """List options for duplication of inames, if necessary for schedulability :returns: a generator listing all options to duplicate inames, if duplication @@ -1069,15 +1054,6 @@ def get_iname_duplication_options(kernel, use_boostable_into=None): assert isinstance(kernel, LoopKernel) - if use_boostable_into: - raise LoopyError("'use_boostable_into=True' is no longer supported.") - - if use_boostable_into is False: - from warnings import warn - warn("passing 'use_boostable_into=False' to 'get_iname_duplication_options'" - " is deprecated. The argument will go away in 2021.", - DeprecationWarning, stacklevel=2) - from loopy.kernel.data import ConcurrentTag concurrent_inames = { diff --git a/loopy/transform/pack_and_unpack_args.py b/loopy/transform/pack_and_unpack_args.py index c221c8235de53e55987ed8a6f5b181360b0afa17..9ced3fdc63a4cc5ebe7e6b30e462b0b17c03685a 100644 --- a/loopy/transform/pack_and_unpack_args.py +++ b/loopy/transform/pack_and_unpack_args.py @@ -177,7 +177,7 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, pack_name = vng(arg + "_pack") from loopy.kernel.data import (TemporaryVariable, - temp_var_scope) + AddressSpace) if arg in kernel.arg_dict: arg_in_caller = kernel.arg_dict[arg] @@ -189,7 +189,7 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, dtype=arg_in_caller.dtype, dim_tags=in_knl_callable.arg_id_to_descr[arg_id].dim_tags, shape=in_knl_callable.arg_id_to_descr[arg_id].shape, - scope=temp_var_scope.PRIVATE, + address_space=AddressSpace.PRIVATE, ) new_tmps[pack_name] = pack_tmp diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 201abd4704ef883603400e4ab2bfaafae1e8d9fb..52ee88c20b9553a6f7e4c63a5c3ed70c51f9178c 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -21,6 +21,7 @@ THE SOFTWARE. """ +import numpy as np import islpy as isl from loopy.symbolic import (get_dependencies, RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, @@ -31,7 +32,7 @@ from loopy.translation_unit import TranslationUnit from loopy.kernel.function_interface import CallableKernel, ScalarCallable from loopy.kernel.tools import (kernel_has_global_barriers, find_most_recent_global_barrier) -import numpy as np +from loopy.kernel.data import AddressSpace from pymbolic import var @@ -389,26 +390,6 @@ def precompute_for_single_kernel(kernel, callables_table, subst_use, default_tag, dtype, fetch_bounding_box, temporary_address_space, compute_insn_id, kernel.callables_table, **kwargs)) - # {{{ unify temporary_address_space / temporary_scope - - temporary_scope = kwargs.pop("temporary_scope", None) - - from loopy.kernel.data import AddressSpace - if temporary_scope is not None: - from warnings import warn - warn("temporary_scope is deprecated. Use temporary_address_space instead", - DeprecationWarning, stacklevel=2) - - if temporary_address_space is not None: - raise LoopyError("may not specify both temporary_address_space and " - "temporary_scope") - - temporary_address_space = temporary_scope - - del temporary_scope - - # }}} - if kwargs: raise TypeError("unrecognized keyword arguments: %s" % ", ".join(kwargs.keys())) @@ -1055,7 +1036,7 @@ def precompute_for_single_kernel(kernel, callables_table, subst_use, pass else: raise LoopyError("Existing and new temporary '%s' do not " - "have matching scopes (existing: %s, new: %s)" + "have matching address spaces (existing: %s, new: %s)" % (temporary_name, AddressSpace.stringify(temp_var.address_space), AddressSpace.stringify(temporary_address_space))) diff --git a/loopy/translation_unit.py b/loopy/translation_unit.py index 16cc5a682e4ec6cbd675569ec46a0f4a922ce10a..fb9b8fc1829bf4b6fbd1b4dd069a660e87eda733 100644 --- a/loopy/translation_unit.py +++ b/loopy/translation_unit.py @@ -368,14 +368,6 @@ class TranslationUnit(ImmutableRecord): self._hash_value = hash(key_hash.digest()) return self._hash_value - -class Program(TranslationUnit): - def __init__(self, *args, **kwargs): - from warnings import warn - warn("Program is deprecated, use TranslationUnit instead, " - "will be removed in 2022", DeprecationWarning, stacklevel=2) - super().__init__(*args, **kwargs) - # }}} diff --git a/loopy/type_inference.py b/loopy/type_inference.py index f2fbe6d6fc4b835ebec79de362cd81dc98ec9834..2ad687cb091e9b6b41f128a81f8c35bdad723d93 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -1056,7 +1056,6 @@ def infer_unknown_types(program, expect_completion=False): def infer_arg_and_reduction_dtypes_for_reduction_expression( kernel, expr, callables_table, unknown_types_ok): type_inf_mapper = TypeReader(kernel, callables_table) - import loopy as lp if expr.is_tuple_typed: arg_dtypes_result = type_inf_mapper( @@ -1066,7 +1065,7 @@ def infer_arg_and_reduction_dtypes_for_reduction_expression( arg_dtypes = arg_dtypes_result[0] else: if unknown_types_ok: - arg_dtypes = [lp.auto] * expr.operation.arg_count + arg_dtypes = [None] * expr.operation.arg_count else: raise LoopyError("failed to determine types of accumulators for " "reduction '%s'" % expr) @@ -1075,18 +1074,14 @@ def infer_arg_and_reduction_dtypes_for_reduction_expression( arg_dtypes = [type_inf_mapper(expr)] except DependencyTypeInferenceFailure: if unknown_types_ok: - arg_dtypes = [lp.auto] + arg_dtypes = [None] else: raise LoopyError("failed to determine type of accumulator for " "reduction '%s'" % expr) reduction_dtypes = expr.operation.result_dtypes(*arg_dtypes) - reduction_dtypes = tuple( - dt - if dt is not lp.auto else dt - for dt in reduction_dtypes) - return tuple(arg_dtypes), reduction_dtypes + return tuple(arg_dtypes), tuple(reduction_dtypes) # }}} diff --git a/loopy/types.py b/loopy/types.py index 01a1e7885592fc89d8f86545783baf114a858a5f..e7504862acc7944fa65fbdebb6cfd851f07ac812 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -43,18 +43,6 @@ class LoopyType: Abstract class for dtypes of variables encountered in a :class:`loopy.LoopKernel`. """ - def target(self): - warn("LoopyType.target is deprecated and will go away in 2022.", - DeprecationWarning, stacklevel=2) - - return None - - def with_target(self, target): - warn("LoopyType.with_target is deprecated and will go away in 2022.", - DeprecationWarning, stacklevel=2) - - return self - def is_integral(self): raise NotImplementedError() diff --git a/loopy/version.py b/loopy/version.py index aa94283d05bc9ee46760da54862fc8ea75ade8a0..5372b59355d63c2158505bbc41411ef17ac41a38 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -42,7 +42,7 @@ else: # }}} -VERSION = (2021, 2) +VERSION = (2022, 1) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS diff --git a/test/test_callables.py b/test/test_callables.py index 58581f2defd7a29497f7f2fcdeeb2b99b9e03dbf..f0955aefe772c5020db7487902ef43460a91ee0f 100644 --- a/test/test_callables.py +++ b/test/test_callables.py @@ -554,7 +554,7 @@ def test_callees_with_gbarriers_are_inlined(ctx_factory): "{ : }", """ y[:] = ones_and_zeros() - """, [lp.GlobalArg("y", shape=6, dtype=lp.auto)]) + """, [lp.GlobalArg("y", shape=6, dtype=None)]) t_unit = lp.merge([t_unit, ones_and_zeros]) evt, (out,) = t_unit(queue) @@ -589,7 +589,7 @@ def test_callees_with_gbarriers_are_inlined_with_nested_calls(ctx_factory): "{ : }", """ y[:] = dummy_ones_and_zeros() - """, [lp.GlobalArg("y", shape=6, dtype=lp.auto)]) + """, [lp.GlobalArg("y", shape=6, dtype=None)]) t_unit = lp.merge([t_unit, dummy_ones_and_zeros, ones_and_zeros]) evt, (out,) = t_unit(queue) @@ -617,7 +617,7 @@ def test_inlining_with_indirections(ctx_factory): "{ : }", """ y[:] = ones_and_zeros(mymap[:]) - """, [lp.GlobalArg("y", shape=6, dtype=lp.auto), + """, [lp.GlobalArg("y", shape=6, dtype=None), lp.GlobalArg("mymap", dtype=np.int32, shape=3)]) t_unit = lp.merge([t_unit, ones_and_zeros]) diff --git a/test/test_loopy.py b/test/test_loopy.py index f17376bf9e430b9dc6326a8d9f65cc2166afe87e..10171ef6e87e6e32f8796f9a72dfe72d715619e2 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -64,7 +64,7 @@ def test_globals_decl_once_with_multi_subprogram(ctx_factory): """, [ lp.TemporaryVariable( - "cnst", initializer=cnst, scope=lp.AddressSpace.GLOBAL, + "cnst", initializer=cnst, address_space=lp.AddressSpace.GLOBAL, read_only=True), lp.GlobalArg("out", is_input=False, shape=lp.auto), "..."]) @@ -1137,7 +1137,7 @@ def test_save_of_private_array(ctx_factory, debug=False): end """, seq_dependencies=True) - knl = lp.set_temporary_scope(knl, "t", "private") + knl = lp.set_temporary_address_space(knl, "t", "private") save_and_reload_temporaries_test(queue, knl, np.arange(8), debug) @@ -1160,7 +1160,7 @@ def test_save_of_private_array_in_hw_loop(ctx_factory, debug=False): """, seq_dependencies=True) knl = lp.tag_inames(knl, dict(i="g.0")) - knl = lp.set_temporary_scope(knl, "t", "private") + knl = lp.set_temporary_address_space(knl, "t", "private") save_and_reload_temporaries_test( queue, knl, np.vstack(8 * (np.arange(8),)), debug) @@ -1184,7 +1184,7 @@ def test_save_of_private_multidim_array(ctx_factory, debug=False): end """, seq_dependencies=True) - knl = lp.set_temporary_scope(knl, "t", "private") + knl = lp.set_temporary_address_space(knl, "t", "private") result = np.array([np.vstack(8 * (np.arange(8),)) for i in range(8)]) save_and_reload_temporaries_test(queue, knl, result, debug) @@ -1208,7 +1208,7 @@ def test_save_of_private_multidim_array_in_hw_loop(ctx_factory, debug=False): end """, seq_dependencies=True) - knl = lp.set_temporary_scope(knl, "t", "private") + knl = lp.set_temporary_address_space(knl, "t", "private") knl = lp.tag_inames(knl, dict(i="g.0")) result = np.array([np.vstack(8 * (np.arange(8),)) for i in range(8)]) @@ -1240,7 +1240,7 @@ def test_save_of_multiple_private_temporaries(ctx_factory, hw_loop, debug=False) end """, seq_dependencies=True) - knl = lp.set_temporary_scope(knl, "t_arr", "private") + knl = lp.set_temporary_address_space(knl, "t_arr", "private") if hw_loop: knl = lp.tag_inames(knl, dict(i="g.0")) @@ -1264,7 +1264,7 @@ def test_save_of_local_array(ctx_factory, debug=False): end """, seq_dependencies=True) - knl = lp.set_temporary_scope(knl, "t", "local") + knl = lp.set_temporary_address_space(knl, "t", "local") knl = lp.tag_inames(knl, dict(i="g.0", j="l.0")) save_and_reload_temporaries_test(queue, knl, np.arange(8), debug) @@ -1286,7 +1286,7 @@ def test_save_of_local_array_with_explicit_local_barrier(ctx_factory, debug=Fals end """, seq_dependencies=True) - knl = lp.set_temporary_scope(knl, "t", "local") + knl = lp.set_temporary_address_space(knl, "t", "local") knl = lp.tag_inames(knl, dict(i="g.0", j="l.0")) save_and_reload_temporaries_test(queue, knl, np.arange(8), debug) @@ -1307,7 +1307,7 @@ def test_save_local_multidim_array(ctx_factory, debug=False): end """, seq_dependencies=True) - knl = lp.set_temporary_scope(knl, "t_local", "local") + knl = lp.set_temporary_address_space(knl, "t_local", "local") knl = lp.tag_inames(knl, dict(j="l.0", i="g.0")) save_and_reload_temporaries_test(queue, knl, 1, debug) @@ -1329,8 +1329,8 @@ def test_save_with_base_storage(ctx_factory, debug=False): seq_dependencies=True) knl = lp.tag_inames(knl, dict(i="l.0")) - knl = lp.set_temporary_scope(knl, "a", "local") - knl = lp.set_temporary_scope(knl, "b", "local") + knl = lp.set_temporary_address_space(knl, "a", "local") + knl = lp.set_temporary_address_space(knl, "b", "local") knl = lp.alias_temporaries(knl, ["a", "b"], synchronize_for_exclusive_use=False) @@ -1350,7 +1350,7 @@ def test_save_ambiguous_storage_requirements(): knl = lp.tag_inames(knl, dict(i="g.0", j="l.0")) knl = lp.duplicate_inames(knl, "j", within="writes:out", tags={"j": "l.0"}) - knl = lp.set_temporary_scope(knl, "a", "local") + knl = lp.set_temporary_address_space(knl, "a", "local") from loopy.diagnostic import LoopyError with pytest.raises(LoopyError): @@ -1427,7 +1427,7 @@ def test_global_temporary(ctx_factory): knl = lp.add_and_infer_dtypes(knl, {"a": np.float32, "c": np.float32, "out": np.float32, "n": np.int32}) - knl = lp.set_temporary_scope(knl, "c", "global") + knl = lp.set_temporary_address_space(knl, "c", "global") ref_knl = knl @@ -1679,8 +1679,8 @@ def test_global_barrier(ctx_factory): knl = lp.add_and_infer_dtypes(knl, {"z": np.float64}) ref_knl = knl - ref_knl = lp.set_temporary_scope(ref_knl, "z", "global") - ref_knl = lp.set_temporary_scope(ref_knl, "v", "global") + ref_knl = lp.set_temporary_address_space(ref_knl, "z", "global") + ref_knl = lp.set_temporary_address_space(ref_knl, "v", "global") knl = lp.split_iname(knl, "i", 256, outer_tag="g.0", inner_tag="l.0") print(knl) @@ -1712,7 +1712,7 @@ def test_missing_global_barrier(): end """) - knl = lp.set_temporary_scope(knl, "z", "global") + knl = lp.set_temporary_address_space(knl, "z", "global") knl = lp.split_iname(knl, "i", 256, outer_tag="g.0") knl = lp.add_dtypes(knl, {"z": np.float32, "v": np.float32}) knl = lp.preprocess_kernel(knl) @@ -2222,8 +2222,8 @@ def test_barrier_insertion_near_top_of_loop(): seq_dependencies=True) prog = lp.tag_inames(prog, dict(i="l.0")) - prog = lp.set_temporary_scope(prog, "a", "local") - prog = lp.set_temporary_scope(prog, "b", "local") + prog = lp.set_temporary_address_space(prog, "a", "local") + prog = lp.set_temporary_address_space(prog, "b", "local") prog = lp.preprocess_kernel(prog) knl = lp.get_one_linearized_kernel(prog["loopy_kernel"], prog.callables_table) @@ -2250,8 +2250,8 @@ def test_barrier_insertion_near_bottom_of_loop(): """, seq_dependencies=True) prog = lp.tag_inames(prog, dict(i="l.0")) - prog = lp.set_temporary_scope(prog, "a", "local") - prog = lp.set_temporary_scope(prog, "b", "local") + prog = lp.set_temporary_address_space(prog, "a", "local") + prog = lp.set_temporary_address_space(prog, "b", "local") prog = lp.preprocess_kernel(prog) knl = lp.get_one_linearized_kernel(prog["loopy_kernel"], prog.callables_table) diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py index 8ba55629877e77b184f9d075e89e3aa5ab758d1f..d273fdfd7405ee0937ab0f1558e0f60d30b9d9b6 100644 --- a/test/test_numa_diff.py +++ b/test/test_numa_diff.py @@ -242,11 +242,11 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa if 1: print("OPS") op_map = lp.get_op_map(hsv, subgroup_size=32) - print(lp.stringify_stats_mapping(op_map)) + print(op_map) print("MEM") gmem_map = lp.get_mem_access_map(hsv, subgroup_size=32).to_bytes() - print(lp.stringify_stats_mapping(gmem_map)) + print(gmem_map) # FIXME: renaming's a bit tricky in this program model. # add a simple transformation for it diff --git a/test/test_statistics.py b/test/test_statistics.py index 0f074ae530ebfc830a3d514fedda6b339c0c7391..e3d253698ec19434f72c8352f2959adc1a23414c 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -947,7 +947,7 @@ def test_mem_access_counter_global_temps(): # Change temporary b address space knl = lp.privatize_temporaries_with_inames(knl, "i,j", "b") - knl = lp.set_temporary_scope(knl, "b", "global") + knl = lp.set_temporary_address_space(knl, "b", "global") mem_map = lp.get_mem_access_map(knl, count_redundant_work=True, subgroup_size="guess") @@ -1020,8 +1020,8 @@ def test_barrier_counter_barriers(): e[i,j,k] = c[i,j,k+1]+c[i,j,k-1] {dep=first} """ ], [ - lp.TemporaryVariable("c", lp.auto, shape=(50, 10, 99)), - "..." + lp.TemporaryVariable("c", shape=(50, 10, 99)), + ... ], name="weird2", ) @@ -1235,37 +1235,40 @@ def test_mem_access_tagged_variables(): mem_access_map = lp.get_mem_access_map(knl, count_redundant_work=True, subgroup_size=SGS) - f32s1lb = mem_access_map[lp.MemAccess("global", np.float32, - lid_strides={0: 1}, - gid_strides={1: bsize}, - direction="load", variable="b", - variable_tag="mmbload", - count_granularity=CG.WORKITEM, - kernel_name="matmul") - ].eval_with_dict(params) - f32s1la = mem_access_map[lp.MemAccess("global", np.float32, - lid_strides={1: Variable("m")}, - gid_strides={0: Variable("m")*bsize}, - direction="load", - variable="a", - variable_tag="mmaload", - count_granularity=CG.SUBGROUP, - kernel_name="matmul") - ].eval_with_dict(params) + f32s1lb = mem_access_map[ + lp.MemAccess("global", np.float32, + lid_strides={0: 1}, + gid_strides={1: bsize}, + direction="load", variable="b", + variable_tags=frozenset([lp.LegacyStringInstructionTag("mmbload")]), + count_granularity=CG.WORKITEM, + kernel_name="matmul") + ].eval_with_dict(params) + f32s1la = mem_access_map[ + lp.MemAccess("global", np.float32, + lid_strides={1: Variable("m")}, + gid_strides={0: Variable("m")*bsize}, + direction="load", + variable="a", + variable_tags=frozenset([lp.LegacyStringInstructionTag("mmaload")]), + count_granularity=CG.SUBGROUP, + kernel_name="matmul") + ].eval_with_dict(params) assert f32s1lb == n*m*ell # uniform: (count-per-sub-group)*n_subgroups assert f32s1la == m*n_subgroups - f32coal = mem_access_map[lp.MemAccess("global", np.float32, - lid_strides={0: 1, 1: Variable("ell")}, - gid_strides={0: Variable("ell")*bsize, 1: bsize}, - direction="store", variable="c", - variable_tag="mmresult", - count_granularity=CG.WORKITEM, - kernel_name="matmul") - ].eval_with_dict(params) + f32coal = mem_access_map[ + lp.MemAccess("global", np.float32, + lid_strides={0: 1, 1: Variable("ell")}, + gid_strides={0: Variable("ell")*bsize, 1: bsize}, + direction="store", variable="c", + variable_tags=frozenset([lp.LegacyStringInstructionTag("mmresult")]), + count_granularity=CG.WORKITEM, + kernel_name="matmul") + ].eval_with_dict(params) assert f32coal == n*ell diff --git a/test/test_transform.py b/test/test_transform.py index 2aa07dabb42fe5987e6c48c115b1588b0f39de1b..fc1d0efec440380f35fe11e76545e4349acbc0a1 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -506,8 +506,8 @@ def test_add_nosync(): tmp5[i] = 1 {id=insn6,conflicts=g1} """, name="nosync") - orig_prog = lp.set_temporary_scope(orig_prog, "tmp3", "local") - orig_prog = lp.set_temporary_scope(orig_prog, "tmp5", "local") + orig_prog = lp.set_temporary_address_space(orig_prog, "tmp3", "local") + orig_prog = lp.set_temporary_address_space(orig_prog, "tmp5", "local") # No dependency present - don't add nosync prog = lp.add_nosync(orig_prog, "any", "writes:tmp", "writes:tmp2",