diff --git a/contrib/c-integer-semantics.py b/contrib/c-integer-semantics.py index 8556430d0b50df3fba70f799c0921779e7111528..62a980f0d1dcbd4ca6508d3538e39461fe1c81e3 100644 --- a/contrib/c-integer-semantics.py +++ b/contrib/c-integer-semantics.py @@ -95,8 +95,8 @@ def main(): func.argtypes = [ctypes.c_longlong, ctypes.c_longlong] func.restype = ctypes.c_longlong - cdiv = int_exp.cdiv # noqa - cmod = int_exp.cmod # noqa + cdiv = int_exp.cdiv + cmod = int_exp.cmod int_floor_div = int_exp.loopy_floor_div_int64 int_floor_div_pos_b = int_exp.loopy_floor_div_pos_b_int64 int_mod_pos_b = int_exp.loopy_mod_pos_b_int64 diff --git a/contrib/mem-pattern-explorer/pattern_vis.py b/contrib/mem-pattern-explorer/pattern_vis.py index bbde231740fef0d2dd3f5942ad4ec24cd641795b..f285dbb886298d7eb624a3a8623acf5a5a7a659b 100644 --- a/contrib/mem-pattern-explorer/pattern_vis.py +++ b/contrib/mem-pattern-explorer/pattern_vis.py @@ -27,7 +27,7 @@ class ArrayAccessPatternContext: self.arrays = [] - def l(self, index): # noqa: E741,E743 + def l(self, index): # noqa: E743 subscript = [np.newaxis] * self.ind_length subscript[len(self.gsize) + index] = slice(None) @@ -147,7 +147,7 @@ class Array: div_ceil(nelements, self.elements_per_row), self.elements_per_row,) shaped_array = np.zeros( - base_shape + (self.nattributes,), + (*base_shape, self.nattributes), dtype=np.float32) shaped_array.reshape(-1, self.nattributes)[:nelements] = self.array @@ -160,7 +160,7 @@ class Array: else: subgroup.fill(1) - rgb_array = np.zeros(base_shape + (3,)) + rgb_array = np.zeros((*base_shape, 3)) if 1: if len(self.ctx.gsize) > 1: # g.0 -> red diff --git a/doc/conf.py b/doc/conf.py index 951b02211d6986e2bfbf2f0d460c90e11d9adaf2..70f7121b4dcef4ebb1eef624c75edc2bd8610e7d 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -2,7 +2,7 @@ import os from urllib.request import urlopen -_conf_url = "https://raw.githubusercontent.com/inducer/sphinxconfig/main/sphinxconfig.py" # noqa +_conf_url = "https://raw.githubusercontent.com/inducer/sphinxconfig/main/sphinxconfig.py" with urlopen(_conf_url) as _inf: exec(compile(_inf.read(), _conf_url, "exec"), globals()) diff --git a/examples/python/ispc-stream-harness.py b/examples/python/ispc-stream-harness.py index bf6e29e479e3fc49b01e590e55471f399bcb1e23..ce61b16bef98b34f1d03bce918a29a910d0ae2c5 100644 --- a/examples/python/ispc-stream-harness.py +++ b/examples/python/ispc-stream-harness.py @@ -26,7 +26,7 @@ def transform(knl, vars, stream_dtype): knl = lp.add_and_infer_dtypes(knl, dict.fromkeys(vars, stream_dtype)) - knl = lp.set_argument_order(knl, vars + ["n"]) + knl = lp.set_argument_order(knl, [*vars, "n"]) return knl diff --git a/loopy/__init__.py b/loopy/__init__.py index 07f06a0210adec96460615fe9bec7d3db560c9b2..01d69cf12860fd03b4a0861b08dc406e9d4aeac2 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -465,7 +465,7 @@ def register_preamble_generators(kernel: LoopKernel, preamble_generators): "and would thus disrupt loopy's caches" % pgen) - new_pgens = (pgen,) + new_pgens + new_pgens = (pgen, *new_pgens) return kernel.copy(preamble_generators=new_pgens) @@ -483,7 +483,7 @@ def register_symbol_manglers(kernel, manglers): "and would disrupt loopy's caches" % m) - new_manglers = (m,) + new_manglers + new_manglers = (m, *new_manglers) return kernel.copy(symbol_manglers=new_manglers) diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 6ee762556582bf194bbbf4e22c21a7916c6d6eb5..5b411658da37b5e70cfcdfa77900d4293003fd96 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -410,12 +410,12 @@ def auto_test_vs_ref( if ref_entrypoint is None: if len(ref_prog.entrypoints) != 1: raise LoopyError("Unable to guess entrypoint for ref_prog.") - ref_entrypoint = list(ref_prog.entrypoints)[0] + ref_entrypoint = next(iter(ref_prog.entrypoints)) if test_entrypoint is None: if len(test_prog.entrypoints) != 1: raise LoopyError("Unable to guess entrypoint for ref_prog.") - test_entrypoint = list(test_prog.entrypoints)[0] + test_entrypoint = next(iter(test_prog.entrypoints)) ref_prog = lp.preprocess_kernel(ref_prog) test_prog = lp.preprocess_kernel(test_prog) diff --git a/loopy/check.py b/loopy/check.py index 5e4897235075079a7ff602c4eef6c72be36e9350..f96123dece6a02820f73ab1f482c455ecf8edd0b 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -206,7 +206,7 @@ def check_separated_array_consistency(kernel: LoopKernel) -> None: for attr_name in ["address_space", "is_input", "is_output"]: if getattr(arg, attr_name) != getattr(sub_arg, attr_name): raise LoopyError( - "Attribute '{attr_name}' of " + f"Attribute '{attr_name}' of " f"'{arg.name}' and associated sep array " f"'{sub_arg.name}' is not consistent.") @@ -266,7 +266,7 @@ def check_offsets_and_dim_tags(kernel: LoopKernel) -> None: raise LoopyError(f"invalid value of {what}") assert new_dim_tags is not None - new_dim_tags = new_dim_tags + (dim_tag,) + new_dim_tags = (*new_dim_tags, dim_tag) arg = arg.copy(dim_tags=new_dim_tags) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index d460dd54e73e330f62d010694fd0ea44b4cabe18..e9c19b309cbbda94d5833bca4dc86c25cadbf0d4 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -67,8 +67,8 @@ if TYPE_CHECKING: if getattr(sys, "_BUILDING_SPHINX_DOCS", False): - from loopy.codegen.result import GeneratedProgram # noqa: F811 - from loopy.codegen.tools import CodegenOperationCacheManager # noqa: F811 + from loopy.codegen.result import GeneratedProgram + from loopy.codegen.tools import CodegenOperationCacheManager __doc__ = """ @@ -666,7 +666,7 @@ def generate_code_v2(t_unit: TranslationUnit) -> CodeGenerationResult: # adding the callee fdecls to the device_programs device_programs = ([device_programs[0].copy( ast=t_unit.target.get_device_ast_builder().ast_module.Collection( - callee_fdecls+[device_programs[0].ast]))] + + [*callee_fdecls, device_programs[0].ast]))] + device_programs[1:]) def not_reduction_op(name: str | ReductionOpFunction) -> str: diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index bee09229fc096e27e6ed4ef6cb101c94c0817e67..26e1b8f3da134e2523fd6c9f2be2b30db0905b15 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -470,7 +470,7 @@ def build_loop_nest(codegen_state, schedule_index): prev_gen_code = gen_code - def gen_code(inner_codegen_state): # noqa pylint:disable=function-redefined + def gen_code(inner_codegen_state): # pylint: disable=function-redefined condition_exprs = [ constraint_to_cond_expr(cns) for cns in bounds_checks] + list(pred_checks) diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 1bc26733ed0957e3a390ded78d07cfcd0d2e2e9d..84dedc3eaacd4163022934604fe8c2a263c797cf 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -191,7 +191,7 @@ def generate_assignment_instruction_code(codegen_state, insn): from pymbolic.mapper.stringifier import PREC_NONE lhs_code = codegen_state.expression_to_code_mapper(insn.assignee, PREC_NONE) - from cgen import Statement as S # noqa + from cgen import Statement as S gs, ls = kernel.get_grid_size_upper_bounds(codegen_state.callables_table) diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index 7fcb4294a0c530538e64cbc40fde24f82b71eb44..0f534592cfa5fc4e7a7fab73d9d6d20573f330d6 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -210,9 +210,7 @@ class CodeGenerationResult: assert program.is_device_program return self.copy( device_programs=( - list(self.device_programs[:-1]) - + - [program])) + [*list(self.device_programs[:-1]), program])) else: assert program.name == codegen_state.gen_program_name assert not program.is_device_program diff --git a/loopy/frontend/fortran/__init__.py b/loopy/frontend/fortran/__init__.py index 5e6ff24d0bf74768677d6d3f2ec97e43feb5df01..29986ddda0d6f896f9e5cb45aea2e8f2c74ff3bd 100644 --- a/loopy/frontend/fortran/__init__.py +++ b/loopy/frontend/fortran/__init__.py @@ -225,7 +225,7 @@ def parse_transformed_fortran(source, free_form=True, strict=True, prev_sys_path = sys.path try: if infile_dirname: - sys.path = prev_sys_path + [infile_dirname] + sys.path = [*prev_sys_path, infile_dirname] if pre_transform_code is not None: proc_dict["_MODULE_SOURCE_CODE"] = pre_transform_code diff --git a/loopy/frontend/fortran/expression.py b/loopy/frontend/fortran/expression.py index bb8394515022b95e67dcf7cedf4d58a88f4b7dcf..7b3200a94166d37a1c064a24da6e4464b6abb93e 100644 --- a/loopy/frontend/fortran/expression.py +++ b/loopy/frontend/fortran/expression.py @@ -21,7 +21,9 @@ THE SOFTWARE. """ import re +from collections.abc import Mapping from sys import intern +from typing import ClassVar import numpy as np @@ -29,6 +31,7 @@ import pytools.lex from pymbolic.parser import Parser as ExpressionParserBase from loopy.frontend.fortran.diagnostic import TranslationError +from loopy.symbolic import LexTable _less_than = intern("less_than") @@ -65,7 +68,7 @@ def tuple_to_complex_literal(expr): # {{{ expression parser class FortranExpressionParser(ExpressionParserBase): - lex_table = [ + lex_table: ClassVar[LexTable] = [ (_less_than, pytools.lex.RE(r"\.lt\.", re.I)), (_greater_than, pytools.lex.RE(r"\.gt\.", re.I)), (_less_equal, pytools.lex.RE(r"\.le\.", re.I)), @@ -142,7 +145,7 @@ class FortranExpressionParser(ExpressionParserBase): return ExpressionParserBase.parse_terminal( self, pstate) - COMP_MAP = { + COMP_MAP: ClassVar[Mapping[str, str]] = { _less_than: "<", _less_equal: "<=", _greater_than: ">", diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py index fc9eace87512285abd8288c167f90d82a6b23e63..860ed723cf9ad4216edbe91370d5589acde79734 100644 --- a/loopy/frontend/fortran/translator.py +++ b/loopy/frontend/fortran/translator.py @@ -22,6 +22,7 @@ THE SOFTWARE. import re from sys import intern +from typing import ClassVar from warnings import warn import numpy as np @@ -53,7 +54,7 @@ class SubscriptIndexAdjuster(IdentityMapper): super().__init__() def get_cache_key(self, expr): - return super().get_cache_key(expr) + (self.scope,) + return (*super().get_cache_key(expr), self.scope) def map_subscript(self, expr): from pymbolic.primitives import Variable @@ -441,7 +442,7 @@ class F2LoopyTranslator(FTreeWalkerBase): def map_Equivalence(self, node): raise NotImplementedError("equivalence") - TYPE_MAP = { + TYPE_MAP: ClassVar[dict[tuple[str, str], type[np.generic]]] = { ("real", ""): np.float32, ("real", "4"): np.float32, ("real", "8"): np.float64, @@ -455,9 +456,9 @@ class F2LoopyTranslator(FTreeWalkerBase): ("integer", "8"): np.int64, } if hasattr(np, "float128"): - TYPE_MAP[("real", "16")] = np.float128 # pylint:disable=no-member + TYPE_MAP["real", "16"] = np.float128 # pylint:disable=no-member if hasattr(np, "complex256"): - TYPE_MAP[("complex", "32")] = np.complex256 # pylint:disable=no-member + TYPE_MAP["complex", "32"] = np.complex256 # pylint:disable=no-member def dtype_from_stmt(self, stmt): length, kind = stmt.selector @@ -471,7 +472,7 @@ class F2LoopyTranslator(FTreeWalkerBase): else: raise RuntimeError("both length and kind specified") - return np.dtype(self.TYPE_MAP[(type(stmt).__name__.lower(), length)]) + return np.dtype(self.TYPE_MAP[type(stmt).__name__.lower(), length]) def map_type_decl(self, node): scope = self.scope_stack[-1] diff --git a/loopy/frontend/fortran/tree.py b/loopy/frontend/fortran/tree.py index b2af66f086f3279f491d1ae57e48de15ba999c0d..4abef510051b1bb50eb3725f235bc0d5aee77898 100644 --- a/loopy/frontend/fortran/tree.py +++ b/loopy/frontend/fortran/tree.py @@ -62,7 +62,7 @@ class FTreeWalkerBase: r"^(?P[_0-9a-zA-Z]+)\s*" r"(\((?P[-+*/0-9:a-zA-Z, \t]+)\))?" r"(\s*=\s*(?P.+))?" - "$") + r"$") def parse_dimension_specs(self, node, dim_decls): def parse_bounds(bounds_str): diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 4f392edd69257d45c85431a3ca98ca1cf2564af2..a3fa94b3e38006d2260b60f533cd9ed7d4a8a91a 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -31,6 +31,7 @@ from typing import ( TYPE_CHECKING, Any, Callable, + ClassVar, Dict, FrozenSet, Iterator, @@ -85,7 +86,7 @@ if TYPE_CHECKING: # {{{ loop kernel object -class KernelState(IntEnum): # noqa +class KernelState(IntEnum): INITIAL = 0 CALLS_RESOLVED = 1 PREPROCESSED = 2 @@ -199,7 +200,7 @@ class LoopKernel(Taggable): were applied to the kernel. These are stored so that they may be repeated on expressions the user specifies later. """ - index_dtype: NumpyType = NumpyType(np.dtype(np.int32)) + index_dtype: NumpyType = NumpyType(np.dtype(np.int32)) # noqa: RUF009 silenced_warnings: FrozenSet[str] = frozenset() # FIXME Yuck, this should go. @@ -1310,7 +1311,7 @@ class LoopKernel(Taggable): # {{{ persistent hash key generation / comparison - hash_fields = [ + hash_fields: ClassVar[Sequence[str]] = [ "domains", "instructions", "args", diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index fa5ae6b192c1ea130cbdf40380312a36c4ae36a8..1c59a9ae49e387fb5c533b83b5af0b3796a1456c 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -63,7 +63,7 @@ if TYPE_CHECKING: from loopy.target import TargetBase if getattr(sys, "_BUILDING_SPHINX_DOCS", False): - from loopy.target import TargetBase # noqa: F811 + from loopy.target import TargetBase T = TypeVar("T") @@ -1217,7 +1217,7 @@ def _apply_offset(sub: Expression, ary: ArrayBase) -> Expression: else: # assume it's an expression # FIXME: mypy can't figure out that ExpressionT + ExpressionT works - return ary.offset + sub # type: ignore[call-overload, arg-type, operator] # noqa: E501 + return ary.offset + sub # type: ignore[call-overload, arg-type, operator] else: return sub diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 4f1803f24a4efc6bf1a27a004450f1d22dacdcc4..43c4a4ee43ab0273300f075fc6c9f117fa1e8242 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -155,13 +155,13 @@ def expand_defines(insn, defines, single_valued=True): "in this context (when expanding '%s')" % define_name) replacements = [ - rep+((replace_pattern % define_name, subval),) + (*rep, (replace_pattern % define_name, subval)) for rep in replacements for subval in value ] else: replacements = [ - rep+((replace_pattern % define_name, value),) + (*rep, (replace_pattern % define_name, value)) for rep in replacements] for rep in replacements: @@ -285,14 +285,12 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): arrow_idx = value.find("->") if arrow_idx >= 0: result["inames_to_dup"] = ( - result.get("inames_to_dup", []) - + - [(value[:arrow_idx], value[arrow_idx+2:])]) + [*result.get("inames_to_dup", []), + (value[:arrow_idx], value[arrow_idx + 2:]) + ]) else: result["inames_to_dup"] = ( - result.get("inames_to_dup", []) - + - [(value, None)]) + [*result.get("inames_to_dup", []), (value, None)]) elif opt_key == "dep" and opt_value is not None: if opt_value.startswith("*"): @@ -2403,7 +2401,7 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): kernel_args.append(dat) continue - if isinstance(dat, ArrayBase) and isinstance(dat.shape, tuple): # noqa pylint:disable=no-member + if isinstance(dat, ArrayBase) and isinstance(dat.shape, tuple): # pylint: disable=no-member new_shape = [] for shape_axis in dat.shape: # pylint:disable=no-member if shape_axis is not None: diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 9761a2946c1fbe2d793aec75d2d7be69ade59eed..01ce9f9532708bf610765bd786bf1d30ed6c850e 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -103,7 +103,7 @@ References # {{{ utilities -def _names_from_expr(expr: Union[None, Expression, str]) -> FrozenSet[str]: +def _names_from_expr(expr: Union[Expression, str, None]) -> FrozenSet[str]: from numbers import Number from loopy.symbolic import DependencyMapper @@ -303,7 +303,7 @@ class InOrderSequentialSequentialTag(InameImplementationTag): return "ord" -ToInameTagConvertible = Union[str, None, Tag] +ToInameTagConvertible = Union[str, Tag, None] def parse_tag(tag: ToInameTagConvertible) -> Optional[Tag]: diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 22abeb8aed08470492d53255a4f56608191a5206..33dfd73f23f7144389b2988fbf579a51fb9b1210 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -22,38 +22,40 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Callable, ClassVar, FrozenSet, Tuple, TypeVar +from collections.abc import Mapping, Sequence +from dataclasses import dataclass, replace +from typing import TYPE_CHECKING, Any, Callable, FrozenSet, TypeVar +from warnings import warn -from pytools import ImmutableRecord +from immutabledict import immutabledict +from typing_extensions import Self from loopy.diagnostic import LoopyError -from loopy.kernel import LoopKernel -from loopy.kernel.array import ArrayBase -from loopy.kernel.data import ArrayArg, ValueArg +from loopy.kernel.array import ArrayBase, ArrayDimImplementationTag +from loopy.kernel.data import AddressSpace, ArrayArg, ValueArg from loopy.symbolic import DependencyMapper, WalkMapper -from loopy.tools import update_persistent_hash +from loopy.types import LoopyType +from loopy.typing import ShapeType if TYPE_CHECKING: from typing_extensions import Self + from loopy.kernel import LoopKernel from loopy.translation_unit import CallablesTable, FunctionIdT __doc__ = """ .. currentmodule:: loopy.kernel.function_interface +.. autoclass:: ArgDescriptor .. autoclass:: ValueArgDescriptor - .. autoclass:: ArrayArgDescriptor .. currentmodule:: loopy .. autoclass:: InKernelCallable - .. autoclass:: CallableKernel - .. autoclass:: ScalarCallable """ @@ -63,7 +65,7 @@ __doc__ = """ ArgDescriptorT = TypeVar("ArgDescriptorT", bound="ArgDescriptor") -class ArgDescriptor(ABC, ImmutableRecord): +class ArgDescriptor(ABC): @abstractmethod def map_expr( self, @@ -75,19 +77,25 @@ class ArgDescriptor(ABC, ImmutableRecord): def depends_on(self) -> frozenset[str]: ... + @abstractmethod + def copy(self, **kwargs: Any) -> Self: + ... + +@dataclass(frozen=True) class ValueArgDescriptor(ArgDescriptor): - hash_fields = () def map_expr(self, subst_mapper): - return self.copy() + return self def depends_on(self): return frozenset() - update_persistent_hash = update_persistent_hash + def copy(self, **kwargs: Any) -> Self: + return replace(self, **kwargs) +@dataclass(frozen=True) class ArrayArgDescriptor(ArgDescriptor): """ Records information about an array argument to an in-kernel callable. To be @@ -95,46 +103,39 @@ class ArrayArgDescriptor(ArgDescriptor): :meth:`~loopy.InKernelCallable.with_descrs`, used for matching shape and address space of caller and callee kernels. - .. attribute:: shape - - Shape of the array. - - .. attribute:: address_space - - An attribute of :class:`loopy.AddressSpace`. - - .. attribute:: dim_tags - - A tuple of instances of - :class:`loopy.kernel.array.ArrayDimImplementationTag` + .. autoattribute:: shape + .. autoattribute:: address_space + .. autoattribute:: dim_tags .. automethod:: map_expr .. automethod:: depends_on """ - fields = {"shape", "address_space", "dim_tags"} + shape: ShapeType | None + address_space: AddressSpace + dim_tags: Sequence[ArrayDimImplementationTag] | None + """See :ref:`data-dim-tags`. + """ - def __init__(self, shape, address_space, dim_tags): + if __debug__: + def __post_init__(self): + # {{{ sanity checks - # {{{ sanity checks + from loopy.kernel.array import ArrayDimImplementationTag + from loopy.kernel.data import auto - from loopy.kernel.array import ArrayDimImplementationTag - from loopy.kernel.data import auto + assert isinstance(self.shape, tuple) or self.shape in [None, auto] + assert isinstance(self.dim_tags, tuple) or self.dim_tags is None - assert isinstance(shape, tuple) or shape in [None, auto] - assert isinstance(dim_tags, tuple) or dim_tags is None + if self.dim_tags: + # FIXME at least vector dim tags should be supported + assert all(isinstance(dim_tag, ArrayDimImplementationTag) for dim_tag in + self.dim_tags) - if dim_tags: - # FIXME at least vector dim tags should be supported - assert all(isinstance(dim_tag, ArrayDimImplementationTag) for dim_tag in - dim_tags) + # }}} - # }}} - - super().__init__( - shape=shape, - address_space=address_space, - dim_tags=dim_tags) + def copy(self, **kwargs: Any) -> Self: + return replace(self, **kwargs) def map_expr(self, f): """ @@ -173,11 +174,6 @@ class ArrayArgDescriptor(ArgDescriptor): return frozenset(var.name for var in result) - def update_persistent_hash(self, key_hash, key_builder): - key_builder.rec(key_hash, self.shape) - key_builder.rec(key_hash, self.address_space) - key_builder.rec(key_hash, self.dim_tags) - class ExpressionIsScalarChecker(WalkMapper): def __init__(self, kernel): @@ -308,25 +304,14 @@ def get_kw_pos_association(kernel): # {{{ template class -class InKernelCallable(ImmutableRecord): +@dataclass(frozen=True, init=False) +class InKernelCallable(ABC): """ An abstract interface to define a callable encountered in a kernel. - .. attribute:: name - - The name of the callable which can be encountered within expressions in - a kernel. - - .. attribute:: arg_id_to_dtype - - A mapping which indicates the arguments types and result types of the - callable. - - .. attribute:: arg_id_to_descr - - A mapping which gives indicates the argument shape and ``dim_tags`` it - would be responsible for generating code. - + .. autoattribute:: name + .. autoattribute:: arg_id_to_dtype + .. autoattribute:: arg_id_to_descr .. automethod:: __init__ .. automethod:: with_types @@ -352,17 +337,39 @@ class InKernelCallable(ImmutableRecord): return value with (0-based) index *i*. """ + arg_id_to_dtype: Mapping[int | str, LoopyType] | None + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None + + def __init__(self, + arg_id_to_dtype: Mapping[int | str, LoopyType] | None = None, + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None = None, + ) -> None: + try: + hash(arg_id_to_dtype) + except TypeError: + arg_id_to_dtype = immutabledict(arg_id_to_dtype) + warn("arg_id_to_dtype passed to InKernelCallable was not hashable. " + "This usage is deprecated and will stop working in 2026.", + DeprecationWarning, stacklevel=3) + + try: + hash(arg_id_to_descr) + except TypeError: + arg_id_to_descr = immutabledict(arg_id_to_descr) + warn("arg_id_to_descr passed to InKernelCallable was not hashable. " + "This usage is deprecated and will stop working in 2026.", + DeprecationWarning, stacklevel=3) + + object.__setattr__(self, "arg_id_to_dtype", arg_id_to_dtype) + object.__setattr__(self, "arg_id_to_descr", arg_id_to_descr) + + if TYPE_CHECKING: + @property + def name(self) -> str: + raise NotImplementedError() - hash_fields: ClassVar[Tuple[str, ...]] = ( - "name", "arg_id_to_dtype", "arg_id_to_descr") - - def __init__(self, name, arg_id_to_dtype=None, arg_id_to_descr=None): - - super().__init__(name=name, - arg_id_to_dtype=arg_id_to_dtype, - arg_id_to_descr=arg_id_to_descr) - - update_persistent_hash = update_persistent_hash + def copy(self, **kwargs: Any) -> Self: + return replace(self, **kwargs) def with_types(self, arg_id_to_dtype, clbl_inf_ctx): """ @@ -391,6 +398,7 @@ class InKernelCallable(ImmutableRecord): raise NotImplementedError() + @abstractmethod def with_descrs(self, arg_id_to_descr, clbl_inf_ctx): """ :arg arg_id_to_descr: a mapping from argument identifiers (integers for @@ -418,12 +426,11 @@ class InKernelCallable(ImmutableRecord): other callables within it, then *clbl_inf_ctx* is returned as is. """ - raise NotImplementedError() - - def is_ready_for_codegen(self): + def is_ready_for_codegen(self) -> bool: return (self.arg_id_to_dtype is not None and self.arg_id_to_descr is not None) + @abstractmethod def get_hw_axes_sizes(self, arg_id_to_arg, space, callables_table): """ Returns ``gsizes, lsizes``, where *gsizes* and *lsizes* are mappings @@ -435,26 +442,28 @@ class InKernelCallable(ImmutableRecord): arguments at a call-site. :arg space: An instance of :class:`islpy.Space`. """ - raise NotImplementedError + ... + @abstractmethod def get_used_hw_axes(self, callables_table): """ Returns a tuple ``group_axes_used, local_axes_used``, where ``(group|local)_axes_used`` are :class:`frozenset` of hardware axes indices used by the callable. """ - raise NotImplementedError + @abstractmethod def generate_preambles(self, target): """ Yields the target specific preamble. """ raise NotImplementedError() + @abstractmethod def emit_call(self, expression_to_code_mapper, expression, target): + ... - raise NotImplementedError() - + @abstractmethod def emit_call_insn(self, insn, target, expression_to_code_mapper): """ Returns a tuple of ``(call, assignee_is_returned)`` which is the target @@ -469,18 +478,14 @@ class InKernelCallable(ImmutableRecord): in the target as the statement ``f(c, d, &a, &b)``. """ - raise NotImplementedError() - - def __hash__(self): - return hash(self.hash_fields) - + @abstractmethod def with_added_arg(self, arg_dtype, arg_descr): """ Registers a new argument to the callable and returns the name of the argument in the callable's namespace. """ - raise NotImplementedError() + @abstractmethod def get_called_callables( self, callables_table: CallablesTable, @@ -496,27 +501,27 @@ class InKernelCallable(ImmutableRecord): callables, else only returns the callables directly called by *self*. """ - raise NotImplementedError + @abstractmethod def with_name(self, name): """ Returns a copy of *self* so that it could be referred by *name* in a :attr:`loopy.TranslationUnit.callables_table`'s namespace. """ - raise NotImplementedError + @abstractmethod def is_type_specialized(self): """ Returns *True* iff *self*'s type signature is known, else returns *False*. """ - raise NotImplementedError # }}} # {{{ scalar callable +@dataclass(frozen=True, init=False) class ScalarCallable(InKernelCallable): """ An abstract interface to a scalar callable encountered in a kernel. @@ -537,15 +542,20 @@ class ScalarCallable(InKernelCallable): The :meth:`ScalarCallable.with_types` is intended to assist with type specialization of the function and sub-classes must define it. """ - fields = {"name", "arg_id_to_dtype", "arg_id_to_descr", "name_in_target"} - hash_fields = InKernelCallable.hash_fields + ("name_in_target",) - - def __init__(self, name, arg_id_to_dtype=None, - arg_id_to_descr=None, name_in_target=None): - super().__init__(name=name, - arg_id_to_dtype=arg_id_to_dtype, - arg_id_to_descr=arg_id_to_descr) - self.name_in_target = name_in_target + name: str + name_in_target: str | None + + def __init__(self, + name: str, + arg_id_to_dtype: Mapping[int | str, LoopyType] | None = None, + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None = None, + name_in_target: str | None = None) -> None: + super().__init__( + arg_id_to_dtype=arg_id_to_dtype, + arg_id_to_descr=arg_id_to_descr, + ) + object.__setattr__(self, "name", name) + object.__setattr__(self, "name_in_target", name_in_target) def with_types(self, arg_id_to_dtype, callables_table): raise LoopyError("No type inference information present for " @@ -689,6 +699,7 @@ class ScalarCallable(InKernelCallable): # {{{ callable kernel +@dataclass(frozen=True, init=False) class CallableKernel(InKernelCallable): """ Records information about a callee kernel. Also provides interface through @@ -702,35 +713,27 @@ class CallableKernel(InKernelCallable): :meth:`CallableKernel.with_descrs` should be called in order to match the arguments' shapes/strides across the caller and the callee kernel. - .. attribute:: subkernel - - :class:`~loopy.LoopKernel` which is being called. - + .. autoattribute:: subkernel .. automethod:: with_descrs .. automethod:: with_types """ - fields = {"subkernel", "arg_id_to_dtype", "arg_id_to_descr"} - hash_fields = ("subkernel", "arg_id_to_dtype", "arg_id_to_descr") + subkernel: LoopKernel + + def __init__(self, + subkernel: LoopKernel, + arg_id_to_dtype: Mapping[int | str, LoopyType] | None = None, + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None = None, + ) -> None: - def __init__(self, subkernel, arg_id_to_dtype=None, - arg_id_to_descr=None): - assert isinstance(subkernel, LoopKernel) - super().__init__(name=subkernel.name, + super().__init__( arg_id_to_dtype=arg_id_to_dtype, arg_id_to_descr=arg_id_to_descr) - self.subkernel = subkernel - - def copy(self, subkernel=None, arg_id_to_dtype=None, - arg_id_to_descr=None): - if subkernel is None: - subkernel = self.subkernel - if arg_id_to_descr is None: - arg_id_to_descr = self.arg_id_to_descr - if arg_id_to_dtype is None: - arg_id_to_dtype = self.arg_id_to_dtype + object.__setattr__(self, "subkernel", subkernel) - return CallableKernel(subkernel, arg_id_to_dtype, arg_id_to_descr) + @property + def name(self) -> str: + return self.subkernel.name def with_types(self, arg_id_to_dtype, callables_table): kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) @@ -769,7 +772,7 @@ class CallableKernel(InKernelCallable): # Return the kernel call with specialized subkernel and the corresponding # new arg_id_to_dtype return self.copy(subkernel=specialized_kernel, - arg_id_to_dtype=new_arg_id_to_dtype), callables_table + arg_id_to_dtype=immutabledict(new_arg_id_to_dtype)), callables_table def with_descrs(self, arg_id_to_descr, clbl_inf_ctx): @@ -844,7 +847,7 @@ class CallableKernel(InKernelCallable): # }}} return (self.copy(subkernel=subkernel, - arg_id_to_descr=arg_id_to_descr), + arg_id_to_descr=immutabledict(arg_id_to_descr)), clbl_inf_ctx) def with_added_arg(self, arg_dtype, arg_descr): @@ -852,7 +855,8 @@ class CallableKernel(InKernelCallable): if isinstance(arg_descr, ValueArgDescriptor): subknl = self.subkernel.copy( - args=self.subkernel.args+[ + args=[ + *self.subkernel.args, ValueArg(var_name, arg_dtype, self.subkernel.target)]) kw_to_pos, pos_to_kw = get_kw_pos_association(subknl) @@ -860,11 +864,11 @@ class CallableKernel(InKernelCallable): if self.arg_id_to_dtype is None: arg_id_to_dtype = {} else: - arg_id_to_dtype = self.arg_id_to_dtype.copy() + arg_id_to_dtype = dict(self.arg_id_to_dtype) if self.arg_id_to_descr is None: arg_id_to_descr = {} else: - arg_id_to_descr = self.arg_id_to_descr.copy() + arg_id_to_descr = dict(self.arg_id_to_descr) arg_id_to_dtype[var_name] = arg_dtype arg_id_to_descr[var_name] = arg_descr @@ -931,6 +935,10 @@ class CallableKernel(InKernelCallable): return yield + def emit_call(self, expression_to_code_mapper, expression, target): + raise LoopyError("Kernel '{self.name}' cannot be called " + "from within an expression, use a call statement") + def emit_call_insn(self, insn, target, expression_to_code_mapper): from loopy.target.c import CFamilyTarget if not isinstance(target, CFamilyTarget): diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index d6517adc4a85f40fbf73fdf1448282032842e28f..a5cefd8066626a5e653782aabf5482565634f3c0 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -27,7 +27,17 @@ from collections.abc import ( from dataclasses import dataclass from functools import cached_property from sys import intern -from typing import Any, FrozenSet, Mapping, Optional, Sequence, Tuple, Type, Union +from typing import ( + Any, + ClassVar, + FrozenSet, + Mapping, + Optional, + Sequence, + Tuple, + Type, + Union, +) from warnings import warn import islpy as isl @@ -257,7 +267,7 @@ class InstructionBase(ImmutableRecord, Taggable): # within_inames_is_final is deprecated and will be removed in version 2017.x. - fields = set("id depends_on_is_final " + fields: ClassVar[set[str]] = set("id depends_on_is_final " "groups conflicts_with_groups " "no_sync_with " "predicates " @@ -634,7 +644,7 @@ def _get_assignee_subscript_deps(expr): # {{{ atomic ops -class MemoryOrdering: # noqa +class MemoryOrdering: """Ordering of atomic operations, defined as in C11 and OpenCL. .. attribute:: RELAXED @@ -662,7 +672,7 @@ class MemoryOrdering: # noqa raise ValueError("Unknown value of MemoryOrdering") -class MemoryScope: # noqa +class MemoryScope: """Scope of atomicity, defined as in OpenCL. .. attribute:: auto @@ -925,8 +935,8 @@ class Assignment(MultiAssignmentBase): predicates: Optional[FrozenSet[str]] = None, tags: Optional[FrozenSet[Tag]] = None, temp_var_type: Union[ - Type[_not_provided], None, LoopyOptional, - LoopyType] = _not_provided, + Type[_not_provided], LoopyOptional, + LoopyType, None] = _not_provided, atomicity: Tuple[VarAtomicity, ...] = (), *, depends_on: Union[FrozenSet[str], str, None] = None, diff --git a/loopy/library/random123.py b/loopy/library/random123.py index 0afb0abb9fd4c583c25505be0cf92edfd849b869..329770e05f7ec486198a755d7f060d1e909c3388 100644 --- a/loopy/library/random123.py +++ b/loopy/library/random123.py @@ -24,20 +24,31 @@ THE SOFTWARE. """ +from dataclasses import dataclass, replace + import numpy as np from mako.template import Template -from pytools import ImmutableRecord +from pymbolic.typing import not_none from loopy.kernel.function_interface import ScalarCallable +from loopy.target import TargetBase # {{{ rng metadata -class RNGInfo(ImmutableRecord): +@dataclass(frozen=True) +class RNGInfo: + name: str + pyopencl_header: str + generic_header: str + key_width: int + width: int | None = None + bits: int | None = None + @property - def full_name(self): - return "%s%dx%d" % (self.name, self.width, self.bits) + def full_name(self) -> str: + return "%s%dx%d" % (self.name, not_none(self.width), not_none(self.bits)) _philox_base_info = RNGInfo( @@ -53,15 +64,15 @@ _threefry_base_info = RNGInfo( key_width=4) RNG_VARIANTS = [ - _philox_base_info.copy(width=2, bits=32), - _philox_base_info.copy(width=2, bits=64), - _philox_base_info.copy(width=4, bits=32), - _philox_base_info.copy(width=4, bits=64), - - _threefry_base_info.copy(width=2, bits=32), - _threefry_base_info.copy(width=2, bits=64), - _threefry_base_info.copy(width=4, bits=32), - _threefry_base_info.copy(width=4, bits=64), + replace(_philox_base_info, width=2, bits=32), + replace(_philox_base_info, width=2, bits=64), + replace(_philox_base_info, width=4, bits=32), + replace(_philox_base_info, width=4, bits=64), + + replace(_threefry_base_info, width=2, bits=32), + replace(_threefry_base_info, width=2, bits=64), + replace(_threefry_base_info, width=4, bits=32), + replace(_threefry_base_info, width=4, bits=64), ] FUNC_NAMES_TO_RNG = { @@ -165,12 +176,12 @@ double${ width } ${ name }_f64( # }}} +@dataclass(frozen=True, init=False) class Random123Callable(ScalarCallable): """ Records information about for the random123 functions. """ - fields = ScalarCallable.fields | {"target"} - hash_fields = ScalarCallable.hash_fields + ("target",) + target: TargetBase def __init__(self, name, arg_id_to_dtype=None, arg_id_to_descr=None, name_in_target=None, target=None): @@ -179,7 +190,7 @@ class Random123Callable(ScalarCallable): arg_id_to_descr=arg_id_to_descr, name_in_target=name_in_target) - self.target = target + object.__setattr__(self, "target", target) def with_types(self, arg_id_to_dtype, callables_table): diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 2d357d3b4ac7d50d2a946a7bfd3fbeaec8bd1515..445a0b86fb7587c257394aa2f568b709c4be6616 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -347,8 +347,7 @@ class _SegmentedScalarReductionOperation(ReductionOperation): segment_flag_dtype.numpy_dtype.type(0)), callables_table def result_dtypes(self, scalar_dtype, segment_flag_dtype): - return (self.inner_reduction.result_dtypes(scalar_dtype) - + (segment_flag_dtype,)) + return ((*self.inner_reduction.result_dtypes(scalar_dtype), segment_flag_dtype)) def __str__(self): return "segmented(%s)" % self.which @@ -571,12 +570,12 @@ class ReductionCallable(ScalarCallable): def with_types(self, arg_id_to_dtype, callables_table): scalar_dtype = arg_id_to_dtype[0] index_dtype = arg_id_to_dtype[1] - result_dtypes = self.name.reduction_op.result_dtypes(scalar_dtype, + result_dtypes = self.name.reduction_op.result_dtypes(scalar_dtype, # pylint: disable=no-member index_dtype) new_arg_id_to_dtype = arg_id_to_dtype.copy() new_arg_id_to_dtype[-1] = result_dtypes[0] new_arg_id_to_dtype[-2] = result_dtypes[1] - name_in_target = self.name.reduction_op.prefix(scalar_dtype, + name_in_target = self.name.reduction_op.prefix(scalar_dtype, # pylint: disable=no-member index_dtype) + "_op" return self.copy(arg_id_to_dtype=new_arg_id_to_dtype, @@ -594,7 +593,7 @@ class ReductionCallable(ScalarCallable): class ArgExtOpCallable(ReductionCallable): def generate_preambles(self, target): - op = self.name.reduction_op + op = self.name.reduction_op # pylint: disable=no-member scalar_dtype = self.arg_id_to_dtype[-1] index_dtype = self.arg_id_to_dtype[-2] @@ -630,7 +629,7 @@ class ArgExtOpCallable(ReductionCallable): class SegmentOpCallable(ReductionCallable): def generate_preambles(self, target): - op = self.name.reduction_op + op = self.name.reduction_op # pylint: disable=no-member scalar_dtype = self.arg_id_to_dtype[-1] segment_flag_dtype = self.arg_id_to_dtype[-2] prefix = op.prefix(scalar_dtype, segment_flag_dtype) diff --git a/loopy/options.py b/loopy/options.py index 293670774c7324854c7bc4a209350594d9cb229c..1c798f7a8dd70ae9d463055d7d40c31305c4295b 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -23,7 +23,8 @@ THE SOFTWARE. import os import re -from typing import Any +from collections.abc import Mapping +from typing import Any, ClassVar from warnings import warn from pytools import ImmutableRecord @@ -198,7 +199,7 @@ class Options(ImmutableRecord): RAW, WAR and WAW races. """ - _legacy_options_map = { + _legacy_options_map: ClassVar[Mapping[str, tuple[str, None] | None]] = { "cl_build_options": ("build_options", None), "write_cl": ("write_code", None), "highlight_cl": None, @@ -332,7 +333,7 @@ class Options(ImmutableRecord): return _ColoramaStub() -KEY_VAL_RE = re.compile("^([a-zA-Z0-9]+)=(.*)$") +KEY_VAL_RE = re.compile(r"^([a-zA-Z0-9]+)=(.*)$") def make_options(options_arg): diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 7eeae715d129bb6f348cceb7db95daf38789d53d..98026fdaa3d49b319b19ae6e983d4f4710fa2c1d 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -266,7 +266,7 @@ def make_args_for_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: else: raise LoopyError(f"invalid value of {what}") - new_dim_tags = new_dim_tags + (dim_tag,) + new_dim_tags = (*new_dim_tags, dim_tag) arg = arg.copy(dim_tags=new_dim_tags) @@ -499,7 +499,7 @@ def check_atomic_loads(kernel): for x in missed: if {x} & atomicity_candidates: insn = insn.copy( - atomicity=insn.atomicity + (AtomicLoad(x),)) + atomicity=(*insn.atomicity, AtomicLoad(x))) new_insns.append(insn) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 2460f5ed7e7f00c129716195a6025dcd3f469ea7..73a23a9833f8b6f5ad6da9b959cf677e63bb0b5d 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1,5 +1,7 @@ from __future__ import annotations +from loopy.kernel.function_interface import InKernelCallable +from loopy.translation_unit import FunctionIdT from loopy.typing import not_none @@ -1020,7 +1022,7 @@ def _generate_loop_schedules_v2(kernel: LoopKernel) -> Sequence[ScheduleItem]: def iname_key(iname: str) -> str: all_ancestors = sorted(loop_tree.ancestors(iname), key=lambda x: loop_tree.depth(x)) - return ",".join(all_ancestors+[iname]) + return ",".join([*all_ancestors, iname]) def key(x: ScheduleItem) -> tuple[str, ...]: if isinstance(x, RunInstruction): @@ -1097,7 +1099,7 @@ def _generate_loop_schedules_internal( assert sched_state.within_subkernel is False yield from _generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + schedule=(*sched_state.schedule, next_preschedule_item), preschedule=sched_state.preschedule[1:], within_subkernel=True, may_schedule_global_barriers=False, @@ -1110,7 +1112,7 @@ def _generate_loop_schedules_internal( if sched_state.active_inames == sched_state.enclosing_subkernel_inames: yield from _generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + schedule=(*sched_state.schedule, next_preschedule_item), preschedule=sched_state.preschedule[1:], within_subkernel=False, may_schedule_global_barriers=True), @@ -1129,7 +1131,7 @@ def _generate_loop_schedules_internal( and next_preschedule_item.originating_insn_id is None): yield from _generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + schedule=(*sched_state.schedule, next_preschedule_item), preschedule=sched_state.preschedule[1:]), debug=debug) @@ -1289,7 +1291,7 @@ def _generate_loop_schedules_internal( unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set, insn_ids_to_try=new_insn_ids_to_try, schedule=( - sched_state.schedule + (RunInstruction(insn_id=insn.id),)), + (*sched_state.schedule, RunInstruction(insn_id=insn.id))), preschedule=( sched_state.preschedule if insn_id not in sched_state.prescheduled_insn_ids @@ -1403,8 +1405,8 @@ def _generate_loop_schedules_internal( for sub_sched in _generate_loop_schedules_internal( sched_state.copy( schedule=( - sched_state.schedule - + (LeaveLoop(iname=last_entered_loop),)), + (*sched_state.schedule, + LeaveLoop(iname=last_entered_loop))), active_inames=sched_state.active_inames[:-1], insn_ids_to_try=insn_ids_to_try, preschedule=( @@ -1613,10 +1615,9 @@ def _generate_loop_schedules_internal( for sub_sched in _generate_loop_schedules_internal( sched_state.copy( schedule=( - sched_state.schedule - + (EnterLoop(iname=iname),)), + (*sched_state.schedule, EnterLoop(iname=iname))), active_inames=( - sched_state.active_inames + (iname,)), + (*sched_state.active_inames, iname)), entered_inames=( sched_state.entered_inames | frozenset((iname,))), @@ -2446,7 +2447,7 @@ def get_one_linearized_kernel( callables_table) if CACHING_ENABLED and not from_cache: - schedule_cache.store_if_not_present(sched_cache_key, result) # pylint: disable=possibly-used-before-assignment # noqa: E501 + schedule_cache.store_if_not_present(sched_cache_key, result) # pylint: disable=possibly-used-before-assignment return result @@ -2466,7 +2467,7 @@ def linearize(t_unit: TranslationUnit) -> TranslationUnit: pre_schedule_checks(t_unit) - new_callables = {} + new_callables: dict[FunctionIdT, InKernelCallable] = {} for name, clbl in t_unit.callables_table.items(): if isinstance(clbl, CallableKernel): diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index a0345049d8c74d3d346057ed993b3212951c72e2..c5c65385cc1bbc832973a6feed277d2a3bc0eb63 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -45,9 +45,9 @@ def map_schedule_onto_host_or_device(kernel): if not kernel.target.split_kernel_at_global_barriers(): new_schedule = ( - [CallKernel(kernel_name=device_prog_name_gen())] + - list(kernel.linearization) + - [ReturnFromKernel(kernel_name=kernel.name)]) + [CallKernel(kernel_name=device_prog_name_gen()), + *kernel.linearization, + ReturnFromKernel(kernel_name=kernel.name)]) kernel = kernel.copy(linearization=new_schedule) else: kernel = map_schedule_onto_host_or_device_impl( @@ -92,19 +92,13 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): schedule_required_splitting = True if current_chunk: new_schedule.extend( - [dummy_call.copy()] + - current_chunk + - [dummy_return.copy()]) + [dummy_call.copy(), *current_chunk, dummy_return.copy()]) new_schedule.extend( - [start_item] + - inner_schedule + - [end_item]) + [start_item, *inner_schedule, end_item]) current_chunk = [] else: current_chunk.extend( - [start_item] + - inner_schedule + - [end_item]) + [start_item, *inner_schedule, end_item]) elif isinstance(sched_item, Barrier): if sched_item.synchronization_kind == "global": @@ -112,9 +106,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): schedule_required_splitting = True if current_chunk: new_schedule.extend( - [dummy_call.copy()] + - current_chunk + - [dummy_return.copy()]) + [dummy_call.copy(), *current_chunk, dummy_return.copy()]) new_schedule.append(sched_item) current_chunk = [] else: @@ -127,9 +119,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): if current_chunk and schedule_required_splitting: # Wrap remainder of schedule into a kernel call. new_schedule.extend( - [dummy_call.copy()] + - current_chunk + - [dummy_return.copy()]) + [dummy_call.copy(), *current_chunk, dummy_return.copy()]) else: new_schedule.extend(current_chunk) @@ -142,9 +132,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): if not split_kernel: # Wrap everything into a kernel call. new_schedule = ( - [dummy_call.copy()] + - new_schedule + - [dummy_return.copy()]) + [dummy_call.copy(), *new_schedule, dummy_return.copy()]) # Assign names to CallKernel / ReturnFromKernel instructions diff --git a/loopy/schedule/tree.py b/loopy/schedule/tree.py index e98724f8395825985704f7377fa8997b25b32805..327fb65c2badacc76d3af66b95d3d002b95ce410 100644 --- a/loopy/schedule/tree.py +++ b/loopy/schedule/tree.py @@ -34,9 +34,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +import operator from collections.abc import Hashable, Iterator, Sequence from dataclasses import dataclass -from functools import cached_property +from functools import cached_property, reduce from typing import Generic, TypeVar from immutables import Map @@ -103,7 +104,7 @@ class Tree(Generic[NodeT]): parent = self._child_to_parent[node] assert parent is not None - return (parent,) + self.ancestors(parent) + return (parent, *self.ancestors(parent)) def parent(self, node: NodeT) -> NodeT | None: """ @@ -162,7 +163,7 @@ class Tree(Generic[NodeT]): siblings = self._parent_to_children[parent] return Tree((self._parent_to_children - .set(parent, siblings + (node,)) + .set(parent, (*siblings, node)) .set(node, ())), self._child_to_parent.set(node, parent)) @@ -231,7 +232,7 @@ class Tree(Generic[NodeT]): assert parent is not None # parent=root handled as a special case siblings = self.children(parent) parents_new_children = tuple(frozenset(siblings) - frozenset([node])) - new_parents_children = self.children(new_parent) + (node,) + new_parents_children = (*self.children(new_parent), node) new_child_to_parent = self._child_to_parent.set(node, new_parent) new_parent_to_children = (self._parent_to_children @@ -276,7 +277,7 @@ class Tree(Generic[NodeT]): for c in children_result[:-1]] + [post_process_last_child(c) for c in children_result[-1:]]) - return [str(node)] + sum(children_result, start=[]) + return [str(node), *reduce(operator.iadd, children_result, [])] return "\n".join(rec(self.root)) diff --git a/loopy/statistics.py b/loopy/statistics.py index fd697bc47fcebcafd107a4756471bbfd4c4ed393..657ee9e25c3ea5a857c53352311ab9cbba3c0966 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -25,7 +25,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +from collections.abc import Sequence from functools import cached_property, partial +from typing import ClassVar import islpy as isl from islpy import dim_type @@ -391,7 +393,7 @@ class ToCountMap: # make sure all item keys have same type if self.count_map: - key_type = type(list(self.keys())[0]) + key_type = type(next(iter(self.keys()))) if not all(isinstance(x, key_type) for x in self.keys()): raise ValueError("ToCountMap: group_by() function may only " "be used on ToCountMaps with uniform keys") @@ -598,7 +600,7 @@ class CountGranularity: WORKITEM = "workitem" SUBGROUP = "subgroup" WORKGROUP = "workgroup" - ALL = [WORKITEM, SUBGROUP, WORKGROUP] + ALL: ClassVar[Sequence[str]] = [WORKITEM, SUBGROUP, WORKGROUP] # }}} @@ -639,10 +641,10 @@ class Op(ImmutableRecord): def __init__(self, dtype=None, name=None, count_granularity=None, kernel_name=None): - if count_granularity not in CountGranularity.ALL+[None]: + if count_granularity not in [*CountGranularity.ALL, None]: raise ValueError("Op.__init__: count_granularity '%s' is " "not allowed. count_granularity options: %s" - % (count_granularity, CountGranularity.ALL+[None])) + % (count_granularity, [*CountGranularity.ALL, None])) if dtype is not None: from loopy.types import to_loopy_type @@ -735,10 +737,10 @@ class MemAccess(ImmutableRecord): *, variable_tags=None, count_granularity=None, kernel_name=None): - if count_granularity not in CountGranularity.ALL+[None]: + if count_granularity not in [*CountGranularity.ALL, None]: raise ValueError("Op.__init__: count_granularity '%s' is " "not allowed. count_granularity options: %s" - % (count_granularity, CountGranularity.ALL+[None])) + % (count_granularity, [*CountGranularity.ALL, None])) if variable_tags is None: variable_tags = frozenset() @@ -1652,7 +1654,7 @@ def _get_insn_count(knl, callables_table, insn_id, subgroup_size, # this should not happen since this is enforced in Op/MemAccess raise ValueError("get_insn_count: count_granularity '%s' is" "not allowed. count_granularity options: %s" - % (count_granularity, CountGranularity.ALL+[None])) + % (count_granularity, [*CountGranularity.ALL, None])) # }}} @@ -1768,7 +1770,7 @@ def get_op_map(program, count_redundant_work=False, if len(program.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(program.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in program.entrypoints @@ -1995,7 +1997,7 @@ def get_mem_access_map(program, count_redundant_work=False, if len(program.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(program.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in program.entrypoints @@ -2116,7 +2118,7 @@ def get_synchronization_map(program, subgroup_size=None, entrypoint=None): if len(program.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(program.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in program.entrypoints from loopy.preprocess import infer_unknown_types, preprocess_program @@ -2175,7 +2177,7 @@ def gather_access_footprints(program, ignore_uncountable=False, entrypoint=None) if len(program.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(program.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in program.entrypoints @@ -2205,10 +2207,10 @@ def gather_access_footprints(program, ignore_uncountable=False, entrypoint=None) result = {} for vname, footprint in write_footprints.items(): - result[(vname, "write")] = footprint + result[vname, "write"] = footprint for vname, footprint in read_footprints.items(): - result[(vname, "read")] = footprint + result[vname, "read"] = footprint return result diff --git a/loopy/symbolic.py b/loopy/symbolic.py index d30581db88d209a7430f71d13a13e3d5c939f816..964bd4d02f0755c4d5fdbdf9eee7949dd53f6416 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -33,8 +33,10 @@ from typing import ( TYPE_CHECKING, AbstractSet, Any, + ClassVar, Mapping, Sequence, + TypeAlias, cast, ) from warnings import warn @@ -1283,7 +1285,7 @@ class RuleAwareIdentityMapper(IdentityMapper): rec_arguments = self.rec(arguments, expn_state, *args, **kwargs) new_expn_state = expn_state.copy( - stack=expn_state.stack + ((name, tags),), + stack=(*expn_state.stack, (name, tags)), arg_context=self.make_new_arg_context( name, rule.arguments, rec_arguments, expn_state.arg_context)) @@ -1428,7 +1430,7 @@ class RuleAwareSubstitutionRuleExpander(RuleAwareIdentityMapper): self.within = within def map_substitution(self, name, tags, arguments, expn_state): - new_stack = expn_state.stack + ((name, tags),) + new_stack = (*expn_state.stack, (name, tags)) if self.within(expn_state.kernel, expn_state.instruction, new_stack): # expand @@ -1573,11 +1575,15 @@ class FunctionToPrimitiveMapper(UncachedIdentityMapper): _open_dbl_bracket = intern("open_dbl_bracket") -TRAILING_FLOAT_TAG_RE = re.compile("^(.*?)([a-zA-Z]*)$") +TRAILING_FLOAT_TAG_RE = re.compile(r"^(.*?)([a-zA-Z]*)$") + + +LexTable: TypeAlias = Sequence[ + tuple[str, pytools.lex.RE | tuple[str | pytools.lex.RE, ...]]] class LoopyParser(ParserBase): - lex_table = [ + lex_table: ClassVar[LexTable] = [ (_open_dbl_bracket, pytools.lex.RE(r"\[\[")), *ParserBase.lex_table ] diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index a2961eee96e4544418d9fb8641c97f6e22d3c730..98dbe35c7eb7000a70ba8d923877dbf64e38b743 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -26,7 +26,7 @@ THE SOFTWARE. import re from typing import Any, Optional, Sequence, Tuple, cast -import numpy as np # noqa +import numpy as np import pymbolic.primitives as p from cgen import ( @@ -259,7 +259,7 @@ def _preamble_generator(preamble_info, func_qualifier="inline"): inline {res_ctype} {func.c_name}({base_ctype} x, {exp_ctype} n) {{ if (n == 0) return 1; - {re.sub("^", 14*" ", signed_exponent_preamble, flags=re.M)} + {re.sub(r"^", 14*" ", signed_exponent_preamble, flags=re.M)} {res_ctype} y = 1; @@ -414,8 +414,8 @@ class CFamilyTarget(TargetBase): usable as a common base for C99, C++, OpenCL, CUDA, and the like. """ - hash_fields = TargetBase.hash_fields + ("fortran_abi",) - comparison_fields = TargetBase.comparison_fields + ("fortran_abi",) + hash_fields = (*TargetBase.hash_fields, "fortran_abi") + comparison_fields = (*TargetBase.comparison_fields, "fortran_abi") def __init__(self, fortran_abi=False): self.fortran_abi = fortran_abi @@ -772,16 +772,13 @@ class CFamilyASTBuilder(ASTBuilderBase[Generable]): def symbol_manglers(self): return ( - super().symbol_manglers() + [ - c_symbol_mangler - ]) + [*super().symbol_manglers(), c_symbol_mangler]) def preamble_generators(self): return ( - super().preamble_generators() + [ - lambda preamble_info: _preamble_generator(preamble_info, - self.preamble_function_qualifier), - ]) + [*super().preamble_generators(), + lambda preamble_info: _preamble_generator( + preamble_info, self.preamble_function_qualifier)]) @property def known_callables(self): @@ -837,7 +834,7 @@ class CFamilyASTBuilder(ASTBuilderBase[Generable]): if not result: return fbody else: - return Collection(result+[Line(), fbody]) + return Collection([*result, Line(), fbody]) def get_function_declaration( self, codegen_state: CodeGenerationState, @@ -1281,7 +1278,7 @@ class CFamilyASTBuilder(ASTBuilderBase[Generable]): inner) if hints: - return Collection(list(hints) + [loop]) + return Collection([*list(hints), loop]) else: return loop @@ -1397,9 +1394,7 @@ class CTarget(CFamilyTarget): class CASTBuilder(CFamilyASTBuilder): def preamble_generators(self): return ( - super().preamble_generators() + [ - c99_preamble_generator, - ]) + [*super().preamble_generators(), c99_preamble_generator]) # }}} diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 6bc496f5fd2350f89c1a9275b8644a432b09967d..873430904289dfc5624f34fa5c37be76d20a9625 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -25,7 +25,7 @@ import logging import os import tempfile from dataclasses import dataclass -from typing import Any, Callable, Optional, Sequence, Tuple, Union +from typing import Any, Callable, ClassVar, Optional, Sequence, Tuple, Union import numpy as np from codepy.jit import compile_from_string @@ -365,15 +365,15 @@ class CPlusPlusCompiler(CCompiler): # {{{ placeholder till ctypes fixes: https://github.com/python/cpython/issues/61103 class Complex64(ctypes.Structure): - _fields_ = [("real", ctypes.c_float), ("imag", ctypes.c_float)] + _fields_: ClassVar = [("real", ctypes.c_float), ("imag", ctypes.c_float)] class Complex128(ctypes.Structure): - _fields_ = [("real", ctypes.c_double), ("imag", ctypes.c_double)] + _fields_: ClassVar = [("real", ctypes.c_double), ("imag", ctypes.c_double)] class Complex256(ctypes.Structure): - _fields_ = [("real", ctypes.c_longdouble), ("imag", ctypes.c_longdouble)] + _fields_: ClassVar = [("real", ctypes.c_longdouble), ("imag", ctypes.c_longdouble)] _NUMPY_COMPLEX_TYPE_TO_CTYPE = { diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index e201326a5f9b87497c5f890b7098e3b9155d1743..82e0bb190f5c7eb165196b7a7c81475c2f6ba83d 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -641,7 +641,7 @@ class CExpressionToCodeMapper(Mapper): # FIXME: Add type suffixes? return repr(int(expr)) elif isinstance(expr, np.float32): - return f"{repr(float(expr))}f" + return f"{float(expr)!r}f" elif isinstance(expr, np.float64): return repr(float(expr)) else: diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index afeb5cee25a9e006d51e97108ef2ad63979a5532..22d663b501f33b06c09dbe0fa44a49fc75e9f8ec 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -369,8 +369,7 @@ class CUDACASTBuilder(CFamilyASTBuilder): def preamble_generators(self): return ( - super().preamble_generators() + [ - cuda_preamble_generator]) + [*super().preamble_generators(), cuda_preamble_generator]) # }}} diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 4200a4b24a251622bac9f9f39d2d4e2087617779..d88b99bb7726c2af8ed09752093c0735a80564b1 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -26,7 +26,7 @@ THE SOFTWARE. from typing import Sequence, Tuple, cast -import numpy as np # noqa +import numpy as np import pymbolic.primitives as p from cgen import Collection, Const, Declarator, Generable @@ -499,7 +499,7 @@ class ISPCASTBuilder(CFamilyASTBuilder): inner) if hints: - return Collection(list(hints) + [loop]) + return Collection([*list(hints), loop]) else: return loop diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 14383e54f21a7d0229701226aa9e23d4d827d172..8250436fd4958469e0a9d5b71d93a6a0e27dc980 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -618,15 +618,12 @@ class OpenCLCASTBuilder(CFamilyASTBuilder): def symbol_manglers(self): return ( - super().symbol_manglers() + [ - opencl_symbol_mangler - ]) + [*super().symbol_manglers(), opencl_symbol_mangler]) def preamble_generators(self): return ( - super().preamble_generators() + [ - opencl_preamble_generator]) + [*super().preamble_generators(), opencl_preamble_generator]) # }}} diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index fa7fd20e8b61ace859dbe4f564b8508dc07ece1d..183360569b3071f88ba2ee74aa498ddae96081ed 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -506,7 +506,7 @@ class PyOpenCLTarget(OpenCLTarget): """ # FIXME make prefixes conform to naming rules - # (see Reference: Loopy’s Model of a Kernel) + # (see Reference: Loopy's Model of a Kernel) host_program_name_prefix = "_lpy_host_" host_program_name_suffix = "" @@ -522,7 +522,7 @@ class PyOpenCLTarget(OpenCLTarget): pointer_size_nbytes: Optional[int] = None ) -> None: # This ensures the dtype registry is populated. - import pyopencl.tools # noqa + import pyopencl.tools super().__init__( atomics_flavor=atomics_flavor, @@ -553,10 +553,8 @@ class PyOpenCLTarget(OpenCLTarget): return None # NB: Not including 'device', as that is handled specially here. - hash_fields = OpenCLTarget.hash_fields + ( - "pyopencl_module_name",) - comparison_fields = OpenCLTarget.comparison_fields + ( - "pyopencl_module_name",) + hash_fields = (*OpenCLTarget.hash_fields, "pyopencl_module_name") + comparison_fields = (*OpenCLTarget.comparison_fields, "pyopencl_module_name") def get_host_ast_builder(self): return PyOpenCLPythonASTBuilder(self) @@ -774,9 +772,8 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): kai = get_kernel_arg_info(codegen_state.kernel) args = ( - ["_lpy_cl_kernels", "queue"] - + list(kai.passed_arg_names) - + ["wait_for=None", "allocator=None"]) + ["_lpy_cl_kernels", "queue", *kai.passed_arg_names, + "wait_for=None", "allocator=None"]) from genpy import For, Function, Line, Return, Statement as S, Suite return Function( @@ -920,7 +917,7 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): "_lpy_cl.mem_flags.READ_ONLY " "| _lpy_cl.mem_flags.COPY_HOST_PTR, " "hostbuf=" - f"_lpy_pack({repr(''.join(struct_pack_types))}, " + f"_lpy_pack({''.join(struct_pack_types)!r}, " f"{', '.join(struct_pack_args)}))"), Line(f"_lpy_knl.set_arg({cl_arg_count}, _lpy_overflow_args_buf)") ]) @@ -1096,7 +1093,7 @@ class PyOpenCLCASTBuilder(OpenCLCASTBuilder): if not result: return fbody else: - return Collection(result+[Line(), fbody]) + return Collection([*result, Line(), fbody]) def get_function_declaration( self, codegen_state: CodeGenerationState, @@ -1195,9 +1192,7 @@ class PyOpenCLCASTBuilder(OpenCLCASTBuilder): return callables def preamble_generators(self): - return ([ - pyopencl_preamble_generator, - ] + super().preamble_generators()) + return ([pyopencl_preamble_generator, *super().preamble_generators()]) # }}} diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 248f5f2eb711613c3654fe94bf72f4a00e2ef211..02781a8d99245c33f7e16c4ccc40027d57f9fa98 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -201,9 +201,8 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): gen("") - arg_list = (["_lpy_cl_kernels", "queue"] - + list(args) - + ["wait_for=wait_for", "allocator=allocator"]) + arg_list = (["_lpy_cl_kernels", "queue", *args, + "wait_for=wait_for", "allocator=allocator"]) gen(f"_lpy_evt = {host_program_name}({', '.join(arg_list)})") if kernel.options.cl_exec_manage_array_events: diff --git a/loopy/target/python.py b/loopy/target/python.py index 3a8747f384d887b7ff8b390c01c96a4835db535c..a419e6e358ab98c3584fd97c434b243240c072ed 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -33,7 +33,7 @@ from pymbolic.mapper.stringifier import StringifyMapper from loopy.codegen import CodeGenerationState from loopy.codegen.result import CodeGenerationResult -from loopy.diagnostic import LoopyError # noqa +from loopy.diagnostic import LoopyError from loopy.kernel.data import ValueArg from loopy.target import ASTBuilderBase from loopy.type_inference import TypeReader @@ -161,9 +161,7 @@ class PythonASTBuilderBase(ASTBuilderBase[Generable]): def preamble_generators(self): return ( - super().preamble_generators() + [ - _base_python_preamble_generator - ]) + [*super().preamble_generators(), _base_python_preamble_generator]) # {{{ code generation guts diff --git a/loopy/tools.py b/loopy/tools.py index bb4904bf2a3192442600b6694a00a5795087f315..ff66e36bec65bb3a4c3e322e9ee05e90a56caa6e 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -136,8 +136,8 @@ class LoopyEqKeyBuilder: kb = LoopyKeyBuilder() # Build the key. For faster hashing, avoid hashing field names. key = ( - (self.class_.__name__.encode("utf-8"),) + - tuple(self.field_dict[k] for k in sorted(self.field_dict.keys()))) + (self.class_.__name__.encode("utf-8"), + *(self.field_dict[k] for k in sorted(self.field_dict.keys())))) return kb(key) @@ -242,25 +242,14 @@ def build_ispc_shared_lib( from subprocess import check_call - ispc_cmd = ([ispc_bin, - "--pic", - "-o", "ispc.o"] - + ispc_options - + list(ispc_source_names)) + ispc_cmd = ([ispc_bin, "--pic", "-o", "ispc.o", *ispc_options, *ispc_source_names]) if not quiet: print(" ".join(ispc_cmd)) check_call(ispc_cmd, cwd=cwd) - cxx_cmd = ([ - cxx_bin, - "-shared", "-Wl,--export-dynamic", - "-fPIC", - "-oshared.so", - "ispc.o", - ] - + cxx_options - + list(cxx_source_names)) + cxx_cmd = ([cxx_bin, "-shared", "-Wl,--export-dynamic", "-fPIC", "-oshared.so", + "ispc.o", *cxx_options, *cxx_source_names]) check_call(cxx_cmd, cwd=cwd) @@ -535,7 +524,7 @@ class Optional: The value, if present. """ - __slots__ = ("has_value", "_value") + __slots__ = ("_value", "has_value") def __init__(self, value=_no_value): self.has_value = value is not _no_value @@ -828,7 +817,7 @@ def t_unit_to_python(t_unit, var_name="t_unit", "from pymbolic.primitives import *", "import immutables", ]) - body_str = "\n".join(knl_python_code_srcs + ["\n", merge_stmt]) + body_str = "\n".join([*knl_python_code_srcs, "\n", merge_stmt]) python_code = "\n".join([preamble_str, "\n", body_str]) assert _is_generated_t_unit_the_same(python_code, var_name, t_unit) diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index 7ab5e376e94e9a20e40e46633abbde692dba4dc6..e41120656fa03f8cea1eeb90a088957b9fdb4671 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -89,7 +89,7 @@ def add_barrier(kernel, insn_before="", insn_after="", id_based_on=None, synchronization_kind=synchronization_kind, mem_kind=mem_kind) - new_kernel = kernel.copy(instructions=kernel.instructions + [barrier_to_add]) + new_kernel = kernel.copy(instructions=[*kernel.instructions, barrier_to_add]) if insn_after is not None: new_kernel = add_dependency(new_kernel, insn_match=insn_after, diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py index 04c5ea3858660e5daee732ae6211d98170c1a239..857f7d48dd882ca5f49e1b5b9e0b9e05d1fcab0d 100644 --- a/loopy/transform/batch.py +++ b/loopy/transform/batch.py @@ -83,7 +83,7 @@ class _BatchVariableChanger(RuleAwareIdentityMapper): if not isinstance(idx, tuple): idx = (idx,) - return type(expr)(expr.aggregate, (self.batch_iname_expr,) + idx) + return type(expr)(expr.aggregate, (self.batch_iname_expr, *idx)) def map_variable(self, expr, expn_state): if not self.needs_batch_subscript(expr.name): @@ -98,7 +98,7 @@ def _add_unique_dim_name(name, dim_names): from pytools import UniqueNameGenerator ng = UniqueNameGenerator(set(dim_names)) - return (ng(name),) + tuple(dim_names) + return (ng(name), *tuple(dim_names)) @for_each_kernel @@ -143,7 +143,7 @@ def to_batched(kernel, nbatches, batch_varying_args, batch_iname_prefix="ibatch" nbatches_expr = nbatches batch_domain = isl.BasicSet(batch_dom_str) - new_domains = [batch_domain] + kernel.domains + new_domains = [batch_domain, *kernel.domains] for arg in kernel.args: if arg.name in batch_varying_args: @@ -152,7 +152,7 @@ def to_batched(kernel, nbatches, batch_varying_args, batch_iname_prefix="ibatch" dim_tags="c") else: arg = arg.copy( - shape=(nbatches_expr,) + arg.shape, + shape=(nbatches_expr, *arg.shape), dim_tags=("c",) * (len(arg.shape) + 1), dim_names=_add_unique_dim_name("ibatch", arg.dim_names)) @@ -168,7 +168,7 @@ def to_batched(kernel, nbatches, batch_varying_args, batch_iname_prefix="ibatch" for temp in kernel.temporary_variables.values(): if temp_needs_batching_if_not_sequential(temp, batch_varying_args): new_temps[temp.name] = temp.copy( - shape=(nbatches_expr,) + temp.shape, + shape=(nbatches_expr, *temp.shape), dim_tags=("c",) * (len(temp.shape) + 1), dim_names=_add_unique_dim_name("ibatch", temp.dim_names)) else: diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 2e19eea75675ef70037211d10daa42cdf6ae717a..b4fc190da609620134926e16e24f042d7853d240 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -124,7 +124,7 @@ def _process_footprint_subscripts(kernel, rule_name, sweep_inames, kernel = _add_kernel_axis(kernel, axis_name, 0, arg.shape[axis_nr], frozenset(sweep_inames) | fsub_dependencies) - sweep_inames = sweep_inames + [axis_name] + sweep_inames = [*sweep_inames, axis_name] inames_to_be_removed.append(axis_name) new_fsub.append(Variable(axis_name)) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 795154099570d3f9760af7c20ffe9999f3754361..2dbba93e35af5578d80cc87bbadbab43262ba35f 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -296,16 +296,16 @@ def _split_iname_backend(kernel, iname_to_split, new_prio = () for prio_iname in prio: if prio_iname == iname_to_split: - new_prio = new_prio + (outer_iname, inner_iname) + new_prio = (*new_prio, outer_iname, inner_iname) else: - new_prio = new_prio + (prio_iname,) + new_prio = (*new_prio, prio_iname) new_priorities.append(new_prio) kernel = kernel.copy( domains=new_domains, iname_slab_increments=iname_slab_increments, instructions=new_insns, - applied_iname_rewrites=kernel.applied_iname_rewrites+(subst_map,), + applied_iname_rewrites=(*kernel.applied_iname_rewrites, subst_map), loop_priority=frozenset(new_priorities)) rule_mapping_context = SubstitutionRuleMappingContext( @@ -630,7 +630,7 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None): .copy( instructions=new_insns, domains=domch.get_domains_with(new_domain), - applied_iname_rewrites=kernel.applied_iname_rewrites + (subst_dict,) + applied_iname_rewrites=(*kernel.applied_iname_rewrites, subst_dict) )) from loopy.match import parse_stack_match @@ -1051,7 +1051,7 @@ def get_iname_duplication_options(kernel): if isinstance(kernel, TranslationUnit): if len([clbl for clbl in kernel.callables_table.values() if isinstance(clbl, CallableKernel)]) == 1: - kernel = kernel[list(kernel.entrypoints)[0]] + kernel = kernel[next(iter(kernel.entrypoints))] assert isinstance(kernel, LoopKernel) @@ -1096,7 +1096,7 @@ def has_schedulable_iname_nesting(kernel): if isinstance(kernel, TranslationUnit): if len([clbl for clbl in kernel.callables_table.values() if isinstance(clbl, CallableKernel)]) == 1: - kernel = kernel[list(kernel.entrypoints)[0]] + kernel = kernel[next(iter(kernel.entrypoints))] return not bool(next(get_iname_duplication_options(kernel), False)) # }}} @@ -1398,7 +1398,7 @@ def affine_map_inames(kernel, old_inames, new_inames, equations): rule_mapping_context.finish_kernel( old_to_new.map_kernel(kernel)) .copy( - applied_iname_rewrites=kernel.applied_iname_rewrites + (subst_dict,) + applied_iname_rewrites=(*kernel.applied_iname_rewrites, subst_dict) )) # }}} @@ -2082,7 +2082,7 @@ def map_domain(kernel, transform_map): substitutions[iname] = subst_from_map var_substitutions[var(iname)] = subst_from_map - applied_iname_rewrites = applied_iname_rewrites + (var_substitutions,) + applied_iname_rewrites = (*applied_iname_rewrites, var_substitutions) del var_substitutions # }}} diff --git a/loopy/transform/pack_and_unpack_args.py b/loopy/transform/pack_and_unpack_args.py index ae5339b5680bf4a16d29fa98ff99b9e67b22ed0d..ca897e00362751427798c3ff99ab28b24361dbf9 100644 --- a/loopy/transform/pack_and_unpack_args.py +++ b/loopy/transform/pack_and_unpack_args.py @@ -266,7 +266,7 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, in_knl_callable.arg_id_to_descr[arg_id].shape): iname_set = iname_set & make_slab(space, iname.name, 0, axis_length) - new_domains = new_domains + [iname_set] + new_domains = [*new_domains, iname_set] # }}} @@ -290,8 +290,8 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, new_ilp_inames), expression=new_call_insn.expression.function(*new_params), assignees=new_assignees) - old_insn_to_new_insns[insn.id] = (packing_insns + [new_call_insn] + - unpacking_insns) + old_insn_to_new_insns[insn.id] = ([ + *packing_insns, new_call_insn, *unpacking_insns]) if old_insn_to_new_insns: new_instructions = [] diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 147b626523a334b8ebad2d049388ad6c54565364..831d0c360eb8a2fa930b630bf499412a8da12488 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -388,11 +388,11 @@ def precompute_for_single_kernel( precompute_outer_inames: Optional[FrozenSet[str]] = None, storage_axis_to_tag=None, - default_tag: Union[None, Tag, str] = None, + default_tag: Union[Tag, str, None] = None, dtype: Optional[ToLoopyTypeConvertible] = None, fetch_bounding_box: bool = False, - temporary_address_space: Union[AddressSpace, None, Type[auto]] = None, + temporary_address_space: Union[AddressSpace, Type[auto], None] = None, compute_insn_id: Optional[str] = None, _enable_mirgecom_workaround: bool = False, ) -> LoopKernel: @@ -1028,7 +1028,7 @@ def precompute_for_single_kernel( and insn.within_inames & prior_storage_axis_names): insn = (insn .with_transformed_expressions( - lambda expr: expr_subst_map(expr, kernel, insn)) # noqa: B023,E501 + lambda expr: expr_subst_map(expr, kernel, insn)) # noqa: B023 .copy(within_inames=frozenset( new_iname for iname in insn.within_inames diff --git a/loopy/transform/realize_reduction.py b/loopy/transform/realize_reduction.py index e981ad4beeb4e94c97bb622061e6f9eba24893b2..f42a8ce14ce4c92a948084a19aa80112d3cc4917 100644 --- a/loopy/transform/realize_reduction.py +++ b/loopy/transform/realize_reduction.py @@ -1124,7 +1124,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, red_realize_ctx=red_realize_ctx, name_based_on="acc_"+red_iname, nvars=nresults, - shape=outer_local_iname_sizes + (size,), + shape=(*outer_local_iname_sizes, size), dtypes=reduction_dtypes, address_space=AddressSpace.LOCAL) @@ -1151,7 +1151,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, init_insn = make_assignment( id=init_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(base_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(base_exec_iname))] for acc_var in acc_vars), expression=neutral, within_inames=( @@ -1234,7 +1234,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, transfer_insn = make_assignment( id=transfer_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(red_iname),)] + acc_var[(*outer_local_iname_vars, var(red_iname))] for acc_var in acc_vars), expression=expression, **transfer_red_realize_ctx.get_insn_kwargs()) @@ -1269,12 +1269,11 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, arg_dtypes, _strip_if_scalar(acc_vars, tuple( acc_var[ - outer_local_iname_vars + (var(stage_exec_iname),)] + (*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars)), _strip_if_scalar(acc_vars, tuple( acc_var[ - outer_local_iname_vars + ( - var(stage_exec_iname) + new_size,)] + (*outer_local_iname_vars, var(stage_exec_iname) + new_size)] for acc_var in acc_vars)), red_realize_ctx.boxed_callables_table[0], orig_kernel.target) @@ -1282,7 +1281,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, stage_insn = make_assignment( id=stage_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(stage_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars), expression=expression, within_inames=( @@ -1307,9 +1306,9 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, if nresults == 1: assert len(acc_vars) == 1 - return acc_vars[0][outer_local_iname_vars + (0,)] + return acc_vars[0][(*outer_local_iname_vars, 0)] else: - return [acc_var[outer_local_iname_vars + (0,)] for acc_var in + return [acc_var[(*outer_local_iname_vars, 0)] for acc_var in acc_vars] # }}} @@ -1419,7 +1418,7 @@ def map_scan_seq(red_realize_ctx, expr, nresults, arg_dtypes, assignees=acc_vars, within_inames=( red_realize_ctx.surrounding_within_inames - - frozenset((scan_param.sweep_iname,) + expr.inames)), + - frozenset((scan_param.sweep_iname, *expr.inames))), within_inames_is_final=True, depends_on=init_insn_depends_on, expression=expression, @@ -1558,7 +1557,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, red_realize_ctx=red_realize_ctx, name_based_on="acc_"+scan_param.scan_iname, nvars=nresults, - shape=outer_local_iname_sizes + (scan_size,), + shape=(*outer_local_iname_sizes, scan_size), dtypes=reduction_dtypes, address_space=AddressSpace.LOCAL) @@ -1579,7 +1578,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, init_insn = make_assignment( id=init_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(base_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(base_exec_iname))] for acc_var in acc_vars), expression=neutral, within_inames=base_iname_deps | frozenset([base_exec_iname]), @@ -1640,8 +1639,10 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, f"{red_realize_ctx.id_prefix}_{scan_param.scan_iname}_transfer") transfer_insn = make_assignment( id=transfer_id, - assignees=(acc_var[outer_local_iname_vars - + (var(scan_param.sweep_iname) - sweep_lower_bound_expr,)],), + assignees=(acc_var[( + *outer_local_iname_vars, + var(scan_param.sweep_iname) - sweep_lower_bound_expr) + ],), expression=pre_scan_result_i, within_inames=( red_realize_ctx.surrounding_within_inames @@ -1684,8 +1685,8 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, assignees=(read_var,), expression=( acc_var[ - outer_local_iname_vars - + (var(stage_exec_iname) - cur_size,)]), + (*outer_local_iname_vars, + var(stage_exec_iname) - cur_size)]), within_inames=( base_iname_deps | frozenset([stage_exec_iname])), within_inames_is_final=True, @@ -1713,7 +1714,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, _strip_if_scalar(acc_vars, read_vars), _strip_if_scalar(acc_vars, tuple( acc_var[ - outer_local_iname_vars + (var(stage_exec_iname),)] + (*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars)), red_realize_ctx.boxed_callables_table[0], orig_kernel.target) @@ -1721,7 +1722,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, write_stage_insn = make_assignment( id=write_stage_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(stage_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars), expression=expression, within_inames=( @@ -1744,9 +1745,9 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, if nresults == 1: assert len(acc_vars) == 1 - return acc_vars[0][outer_local_iname_vars + (output_idx,)] + return acc_vars[0][(*outer_local_iname_vars, output_idx)] else: - return [acc_var[outer_local_iname_vars + (output_idx,)] + return [acc_var[(*outer_local_iname_vars, output_idx)] for acc_var in acc_vars] # }}} diff --git a/loopy/transform/save.py b/loopy/transform/save.py index bd25dec364d414a56d4d8da1e11d108c3043506c..2b874f679e20d4876aeb59801bcd9e76eb807883 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -77,7 +77,7 @@ class LivenessAnalysis: for idx, (item, next_item) in enumerate(zip( reversed(self.schedule), - reversed(self.schedule + [None]))): + reversed([*self.schedule, None]))): sched_idx = len(self.schedule) - idx - 1 # Look at next_item @@ -760,7 +760,7 @@ def save_and_reload_temporaries(program, entrypoint=None): if entrypoint is None: if len(program.entrypoints) != 1: raise LoopyError("Missing argument 'entrypoint'.") - entrypoint = list(program.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) knl = program[entrypoint] diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index 422d22568287b46afa5f27dfe35288c0df0068a9..9c3bafdabf480bb04c79daf106e0f0263f215e14 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) class ExprDescriptor(ImmutableRecord): - __slots__ = ["insn", "expr", "unif_var_dict"] + __slots__ = ["expr", "insn", "unif_var_dict"] # {{{ extract_subst diff --git a/loopy/translation_unit.py b/loopy/translation_unit.py index 4afdfcef775a7e56df5b4be473d10a3c402e5d7e..0f740dac547c87034f22278ab36264edd44d5b0f 100644 --- a/loopy/translation_unit.py +++ b/loopy/translation_unit.py @@ -45,7 +45,6 @@ from typing_extensions import Concatenate, ParamSpec, Self from pymbolic.primitives import Call, Variable from loopy.diagnostic import DirectCallUncachedWarning, LoopyError -from loopy.kernel import LoopKernel from loopy.kernel.function_interface import ( CallableKernel, InKernelCallable, @@ -61,6 +60,7 @@ from loopy.target import TargetBase if TYPE_CHECKING: + from loopy.kernel import LoopKernel from loopy.target.execution import ExecutorBase @@ -336,6 +336,7 @@ class TranslationUnit: ep_name, = self.entrypoints entrypoint = self[ep_name] + from loopy import LoopKernel if not isinstance(entrypoint, LoopKernel): raise ValueError("default entrypoint is not a kernel") @@ -749,7 +750,7 @@ class CallablesInferenceContext: # }}} -TUnitOrKernelT = TypeVar("TUnitOrKernelT", LoopKernel, TranslationUnit) +TUnitOrKernelT = TypeVar("TUnitOrKernelT", "LoopKernel", TranslationUnit) # {{{ helper functions @@ -778,6 +779,7 @@ def check_each_kernel( *args: P.args, **kwargs: P.kwargs ) -> None: + from loopy import LoopKernel if isinstance(t_unit_or_kernel, TranslationUnit): for clbl in t_unit_or_kernel.callables_table.values(): if isinstance(clbl, CallableKernel): @@ -807,6 +809,7 @@ def for_each_kernel( *args: P.args, **kwargs: P.kwargs ) -> TUnitOrKernelT: + from loopy import LoopKernel if isinstance(t_unit_or_kernel, TranslationUnit): t_unit = t_unit_or_kernel new_callables = {} @@ -886,7 +889,7 @@ def resolve_callables(t_unit: TranslationUnit) -> TranslationUnit: # get loopy specific callables known_callables.update(get_loopy_callables()) - callables_table = {} + callables_table: dict[FunctionIdT, InKernelCallable] = {} # callables: name of the calls seen in the program callables = {name for name, clbl in t_unit.callables_table.items() diff --git a/loopy/types.py b/loopy/types.py index b43026bdb2c5d8ea3b7414f7f97916d7ba54e736..f784799e375c31630beaddd4aaffa53b0259ab7f 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -202,13 +202,13 @@ class OpaqueType(LoopyType): # }}} -ToLoopyTypeConvertible: TypeAlias = Union[Type[auto], None, np.dtype, LoopyType] +ToLoopyTypeConvertible: TypeAlias = Union[Type[auto], np.dtype, LoopyType, None] def to_loopy_type(dtype: ToLoopyTypeConvertible, allow_auto: bool = False, allow_none: bool = False, for_atomic: bool = False - ) -> Union[Type[auto], None, LoopyType]: + ) -> Union[Type[auto], LoopyType, None]: if dtype is None: if allow_none: return None diff --git a/proto-tests/test_fem_assembly.py b/proto-tests/test_fem_assembly.py index 9103c42cc9ef9f8e1b2d72fbb160e8cc4bd319ab..b52ec460119d105de1616c9175d013652fa90946 100644 --- a/proto-tests/test_fem_assembly.py +++ b/proto-tests/test_fem_assembly.py @@ -28,7 +28,7 @@ def test_laplacian_stiffness(ctx_factory): % dict(Nb=Nb, Nq=Nq, dim=dim), [ "dPsi(ij, dxi) := sum_float32(@ax_b," - " jacInv[ax_b,dxi,K,q] * DPsi[ax_b,ij,q])", # noqa + " jacInv[ax_b,dxi,K,q] * DPsi[ax_b,ij,q])", "A[K, i, j] = sum_float32(q, w[q] * jacDet[K,q] * (" "sum_float32(dx_axis, dPsi$one(i,dx_axis)*dPsi$two(j,dx_axis))))" ], @@ -77,7 +77,7 @@ def test_laplacian_stiffness(ctx_factory): Ncloc = 16 # noqa knl = lp.split_iname(knl, "K", Ncloc, outer_iname="Ko", inner_iname="Kloc") - knl = lp.precompute(knl, "dPsi$one", np.float32, ["dx_axis"], default_tag=None) # noqa + knl = lp.precompute(knl, "dPsi$one", np.float32, ["dx_axis"], default_tag=None) knl = lp.tag_inames(knl, {"j": "ilp.seq"}) return knl, ["Ko", "Kloc"] diff --git a/proto-tests/test_sem.py b/proto-tests/test_sem.py index d87126cfbaa69906f656b3888ba313aeac02a7d9..acb7d34251f3bc7e02e921b7aa120158afae4ee1 100644 --- a/proto-tests/test_sem.py +++ b/proto-tests/test_sem.py @@ -53,7 +53,7 @@ def test_laplacian(ctx_factory): [ lp.GlobalArg("u", dtype, shape=field_shape, order=order), lp.GlobalArg("lap", dtype, shape=field_shape, order=order), - lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(6, *field_shape), order=order), lp.GlobalArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -139,7 +139,7 @@ def test_laplacian_lmem(ctx_factory): [ lp.GlobalArg("u", dtype, shape=field_shape, order=order), lp.GlobalArg("lap", dtype, shape=field_shape, order=order), - lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(6, *field_shape), order=order), lp.GlobalArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -216,7 +216,7 @@ def test_laplacian_lmem_ilp(ctx_factory): [ lp.GlobalArg("u", dtype, shape=field_shape, order=order), lp.GlobalArg("lap", dtype, shape=field_shape, order=order), - lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(6, *field_shape), order=order), lp.GlobalArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -320,7 +320,7 @@ def test_advect(ctx_factory): lp.GlobalArg("Nu", dtype, shape=field_shape, order=order), lp.GlobalArg("Nv", dtype, shape=field_shape, order=order), lp.GlobalArg("Nw", dtype, shape=field_shape, order=order), - lp.GlobalArg("G", dtype, shape=(9,)+field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(9, *field_shape), order=order), lp.GlobalArg("D", dtype, shape=(N, N), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -359,7 +359,7 @@ def test_advect_dealias(ctx_factory): K_sym = var("K") # noqa field_shape = (N, N, N, K_sym) - interim_field_shape = (M, M, M, K_sym) # noqa + interim_field_shape = (M, M, M, K_sym) # 1. direction-by-direction similarity transform on u # 2. invert diagonal diff --git a/proto-tests/test_sem_tim.py b/proto-tests/test_sem_tim.py index 2949b39d3f484d996cf64f3731ab718462d304c9..2d0c2fe7e7021fffb59f5bcd09ec2d45e9bab7cc 100644 --- a/proto-tests/test_sem_tim.py +++ b/proto-tests/test_sem_tim.py @@ -53,7 +53,7 @@ def test_laplacian(ctx_factory): [ lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(6, *field_shape), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -140,7 +140,7 @@ def test_laplacian_lmem(ctx_factory): [ lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(6, *field_shape), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -230,7 +230,7 @@ def test_laplacian_lmem_ilp(ctx_factory): [ lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(6, *field_shape), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -328,7 +328,7 @@ def test_advect(ctx_factory): lp.ArrayArg("Nu", dtype, shape=field_shape, order=order), lp.ArrayArg("Nv", dtype, shape=field_shape, order=order), lp.ArrayArg("Nw", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(9,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(9, *field_shape), order=order), lp.ArrayArg("D", dtype, shape=(N, N), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -367,7 +367,7 @@ def test_advect_dealias(ctx_factory): K_sym = var("K") # noqa field_shape = (N, N, N, K_sym) - interim_field_shape = (M, M, M, K_sym) # noqa + interim_field_shape = (M, M, M, K_sym) # 1. direction-by-direction similarity transform on u # 2. invert diagonal diff --git a/proto-tests/test_tim.py b/proto-tests/test_tim.py index eb8125cdb7473921f66d212366368caee587394f..7a519d808544fef69122c272a829931f24e58576 100644 --- a/proto-tests/test_tim.py +++ b/proto-tests/test_tim.py @@ -38,7 +38,7 @@ def test_tim2d(ctx_factory): [ lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(3,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(3, *field_shape), order=order), # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), # lp.ImageArg("D", dtype, shape=(n, n)), @@ -46,7 +46,7 @@ def test_tim2d(ctx_factory): ], name="semlap2D", assumptions="K>=1") - unroll = 32 # noqa + unroll = 32 seq_knl = knl knl = lp.add_prefetch(knl, "D", ["m", "j", "i", "o"], default_tag="l.auto") @@ -101,13 +101,13 @@ def test_red2d(ctx_factory): [ lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(3,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(3, *field_shape), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap2D", assumptions="K>=1") - unroll = 32 # noqa + unroll = 32 seq_knl = knl knl = lp.add_prefetch(knl, "D", ["m", "j", "i", "o"], default_tag="l.auto") @@ -168,7 +168,7 @@ def test_tim3d(ctx_factory): lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(6, *field_shape), order=order), # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), # lp.ImageArg("D", dtype, shape=(n, n)), diff --git a/pyproject.toml b/pyproject.toml index c23c2973d7a5d58d220805a9ac838541dfa07279..e07302dc367a809fe84e7c3708051beed7acb90c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,9 +99,7 @@ extend-select = [ # TODO # "UP", # pyupgrade - # "RUF", # ruff - - "RUF022", # __all__ isn't sorted + "RUF", # ruff ] extend-ignore = [ "C90", # McCabe complexity diff --git a/test/test_apps.py b/test/test_apps.py index c4cffaee1d7fdc8c700615bfbfcd45fc74b38dcd..ce8b97015512e7e4f7b86bbbc6ee74adf4348fe7 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -27,7 +27,7 @@ import numpy as np import pytest import pyopencl as cl -import pyopencl.clmath # noqa +import pyopencl.clmath import pyopencl.clrandom # noqa import loopy as lp diff --git a/test/test_callables.py b/test/test_callables.py index 44a94e43a0717ac575a145afc7caec2d501ae763..d7771d20cbfdf778059e1e1b0a3bbf485228bd9c 100644 --- a/test/test_callables.py +++ b/test/test_callables.py @@ -26,7 +26,7 @@ import numpy as np import pytest import pyopencl as cl -import pyopencl.clrandom # noqa: F401 +import pyopencl.clrandom from pyopencl.tools import ( # noqa: F401 pytest_generate_tests_for_pyopencl as pytest_generate_tests, ) diff --git a/test/test_dg.py b/test/test_dg.py index bc134d9cb2b154dfcf179b88139108d46ef46b0b..04104620845820b3d46c83060532e8327a835282 100644 --- a/test/test_dg.py +++ b/test/test_dg.py @@ -25,7 +25,7 @@ import logging # noqa import numpy as np import pyopencl as cl -import pyopencl.array # noqa +import pyopencl.array from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests, ) diff --git a/test/test_diff.py b/test/test_diff.py index 626ddb70e4f3d7939a223056c1f9741dd8e97f3a..5b7d0bbca888a9655374848e2d96d9caeb59c24b 100644 --- a/test/test_diff.py +++ b/test/test_diff.py @@ -23,11 +23,11 @@ THE SOFTWARE. import logging import sys -import numpy as np # noqa +import numpy as np import numpy.linalg as la import pyopencl as cl -import pyopencl.clrandom # noqa +import pyopencl.clrandom import loopy as lp diff --git a/test/test_domain.py b/test/test_domain.py index c422e131dc3870aa3353803f7876f123ccebb015..843bcf3175152483b1019a2ebb6778d88c922f99 100644 --- a/test/test_domain.py +++ b/test/test_domain.py @@ -24,11 +24,11 @@ import logging import sys import numpy as np -import pytest # noqa +import pytest import pyopencl as cl -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.clmath +import pyopencl.clrandom import loopy as lp diff --git a/test/test_expression.py b/test/test_expression.py index 1b973e9a66e9eafdc87fc3096a66a8dea2fb46cd..b4b856e2b180e3440848c5cfeb5f9c88da798a38 100644 --- a/test/test_expression.py +++ b/test/test_expression.py @@ -27,8 +27,8 @@ import numpy as np import pytest import pyopencl as cl -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.clmath +import pyopencl.clrandom from pymbolic.mapper.evaluator import EvaluationMapper import loopy as lp diff --git a/test/test_fortran.py b/test/test_fortran.py index 8f1291bba95ae7166be894fa362d5f984164a629..aa7c241eac3e7968d224a8f342e47e5367eba01e 100644 --- a/test/test_fortran.py +++ b/test/test_fortran.py @@ -28,7 +28,7 @@ import numpy as np import pytest import pyopencl as cl -import pyopencl.clrandom # noqa +import pyopencl.clrandom import loopy as lp @@ -136,7 +136,7 @@ def test_assign_single_precision_scalar(ctx_factory): t_unit = lp.parse_fortran(fortran_src) import re - assert re.search("1.1000000[0-9]*f", lp.generate_code_v2(t_unit).device_code()) + assert re.search(r"1.1000000[0-9]*f", lp.generate_code_v2(t_unit).device_code()) a_dev = cl.array.empty(queue, 1, dtype=np.float64, order="F") t_unit(queue, a=a_dev) diff --git a/test/test_isl.py b/test/test_isl.py index fc1312f7c8c296cbd95036d055cf0427edd71c74..d61031dfca4f0a83a046f7ea9a17f3a25f210419 100644 --- a/test/test_isl.py +++ b/test/test_isl.py @@ -26,7 +26,7 @@ import islpy as isl def test_aff_to_expr(): s = isl.Space.create_from_names(isl.Context(), ["a", "b"]) zero = isl.Aff.zero_on_domain(isl.LocalSpace.from_space(s)) - one = zero.set_constant_val(1) # noqa + one = zero.set_constant_val(1) a = zero.set_coefficient_val(isl.dim_type.in_, 0, 1) b = zero.set_coefficient_val(isl.dim_type.in_, 1, 1) diff --git a/test/test_loopy.py b/test/test_loopy.py index bfa6073289018df15e6751348e62e5f86aa74f3f..319dd5d0f5828a63fc5a56b079ce2890a63fb599 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -27,9 +27,9 @@ import numpy as np import pytest import pyopencl as cl -import pyopencl.array # noqa -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.array +import pyopencl.clmath +import pyopencl.clrandom import loopy as lp @@ -3271,7 +3271,7 @@ def test_sep_array_ordering(ctx_factory): """ x[k, i] = k """, - [lp.GlobalArg("x", shape=("noutputs", "m"), dim_tags="sep,C")] + [...], + [lp.GlobalArg("x", shape=("noutputs", "m"), dim_tags="sep,C"), ...], fixed_parameters=dict(noutputs=n), ) knl = lp.tag_inames(knl, "k:unr") diff --git a/test/test_reduction.py b/test/test_reduction.py index 0ca1a26505ff7117a77ea15cb16d027ffc68ed8c..b8b32fb08e3ad70d2a452599fe5b9a331dfef597 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -27,8 +27,8 @@ import numpy as np import pytest import pyopencl as cl -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.clmath +import pyopencl.clrandom import pyopencl.version import loopy as lp diff --git a/test/test_scan.py b/test/test_scan.py index 986a30daa0f382d7953313db509bc1a0cdb10a28..5cb7573e4b80599e5fbc65ee5a484f19a7f5faee 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -30,8 +30,8 @@ import numpy as np import pytest import pyopencl as cl -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.clmath +import pyopencl.clrandom import loopy as lp @@ -402,7 +402,7 @@ def test_segmented_scan(ctx_factory, n, segment_boundaries_indices, iname_tag): arr = np.ones(n, dtype=np.float32) segment_boundaries = np.zeros(n, dtype=np.int32) - segment_boundaries[(segment_boundaries_indices,)] = 1 + segment_boundaries[segment_boundaries_indices,] = 1 knl = lp.make_kernel( "{[i,j]: 0<=i