diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 32707fe869e7f08e3012a7cb2b57954822bf62ac..2c6c77e7783afd993ff26d71e60daab3c760d5bd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,6 +20,13 @@ jobs: pipx install ruff ruff check + typos: + name: Typos + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: crate-ci/typos@master + pylint: name: Pylint runs-on: ubuntu-latest diff --git a/MEMO b/MEMO index f4e5c34e48e62d5c951d01fcb212a9117e361def..5a9438811e271741e58ea52b8958ebcb2b985afd 100644 --- a/MEMO +++ b/MEMO @@ -7,7 +7,7 @@ Documentation Notes Things to consider ^^^^^^^^^^^^^^^^^^ -- Depedencies are pointwise for shared loop dimensions +- Dependencies are pointwise for shared loop dimensions and global over non-shared ones (between dependent and ancestor) - multiple insns could fight over which iname gets local axis 0 diff --git a/contrib/mem-pattern-explorer/pattern_vis.py b/contrib/mem-pattern-explorer/pattern_vis.py index 82a2b96029d1a9005724557a587f0359bcfb6c9b..bbde231740fef0d2dd3f5942ad4ec24cd641795b 100644 --- a/contrib/mem-pattern-explorer/pattern_vis.py +++ b/contrib/mem-pattern-explorer/pattern_vis.py @@ -76,7 +76,7 @@ class ArrayAccessPatternContext: class Array: def __init__(self, ctx, name, shape, strides, elements_per_row=None): # Each array element stores a tuple: - # (timestamp, subgroup, g0, g1, g2, ) of last acccess + # (timestamp, subgroup, g0, g1, g2, ) of last access assert len(shape) == len(strides) diff --git a/doc/misc.rst b/doc/misc.rst index 3fea6fdd44e11f88ae1249cde3b07c97f72f7dae..be1c964cdec45603961f4e6f84dce24e9b5015ab 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -158,7 +158,7 @@ In the meantime, you can generate code simply by saying:: print(cg_result.host_code()) print(cg_result.device_code()) -Additionally, for C-based languages, header defintions are available via:: +Additionally, for C-based languages, header definitions are available via:: loopy.generate_header(knl) @@ -338,8 +338,8 @@ This list is always growing, but here are a few pointers: Use :func:`loopy.join_inames`. -In what sense does Loopy suport vectorization? ----------------------------------------------- +In what sense does Loopy support vectorization? +----------------------------------------------- There are really two ways in which the OpenCL/CUDA model of computation exposes vectorization: @@ -352,7 +352,7 @@ vectorization: e.g. ``float4``, which support arithmetic with implicit vector semantics as well as a number of 'intrinsic' functions. -Loopy suports both. The first one, SIMT, is accessible by tagging inames with, +Loopy supports both. The first one, SIMT, is accessible by tagging inames with, e.g., ``l.0```. Accessing the second one requires using both execution- and data-reshaping capabilities in loopy. To start with, you need an array that has an axis with the length of the desired vector. If that's not yet available, diff --git a/loopy/__init__.py b/loopy/__init__.py index 275d4f26e90c0b160cc7c39172c048f34180a6b4..1eebb82237e6258fc26b15692c312816eabf075e 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -563,18 +563,18 @@ def make_copy_kernel(new_dim_tags, old_dim_tags=None): indices = ["i%d" % i for i in range(rank)] shape = ["n%d" % i for i in range(rank)] - commad_indices = ", ".join(indices) + command_indices = ", ".join(indices) bounds = " and ".join( f"0<={ind}<{shape_i}" for ind, shape_i in zip(indices, shape)) set_str = "{{[{}]: {} }}".format( - commad_indices, + command_indices, bounds ) result = make_kernel(set_str, "output[%s] = input[%s]" - % (commad_indices, commad_indices), + % (command_indices, command_indices), lang_version=MOST_RECENT_LANGUAGE_VERSION, default_offset=auto) diff --git a/loopy/check.py b/loopy/check.py index 17887cff5f3ffa5adcb2ee99e88c6f25b43527ac..c2b3d8cd3e7ef2de034e5e51c9976c675a0798c5 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -1123,7 +1123,7 @@ def _check_variable_access_ordered_inner(kernel): # for each *pred*, we will calculate all the direct/indirect # instructions that can be reached. seen_successors = set() - # first let us start with direct sucessors + # first let us start with direct successors to_check = edges[pred].copy() while to_check: successor = to_check.pop() @@ -1219,7 +1219,7 @@ def check_variable_access_ordered(kernel): """Checks that between each write to a variable and all other accesses to the variable there is either: - * a direct/indirect depdendency edge, or + * a direct/indirect dependency edge, or * an explicit statement that no ordering is necessary (expressed through a bi-directional :attr:`loopy.InstructionBase.no_sync_with`) """ diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py index 530e92678e1a568e9e31f02392c625ea734e07e7..fc9eace87512285abd8288c167f90d82a6b23e63 100644 --- a/loopy/frontend/fortran/translator.py +++ b/loopy/frontend/fortran/translator.py @@ -200,7 +200,7 @@ class Scope: return None raise TranslationError( - "no type for '%s' found in 'implict none' routine" + "no type for '%s' found in 'implicit none' routine" % name) from None return self.implicit_types.get(name[0], np.dtype(np.int32)) @@ -426,7 +426,7 @@ class F2LoopyTranslator(FTreeWalkerBase): scope.implicit_types = None for stmt, specs in node.items: - if scope.implict_types is None: + if scope.implict_types is None: # spellchecker: disable-line raise TranslationError("implicit decl not allowed after " "'implicit none'") tp = self.dtype_from_stmt(stmt) diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 160b6415b2d3743ae3c87a2591b09d39d780fdea..28aa3be30562f3ce690da00de954706471f2ce91 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -186,7 +186,7 @@ def simplify_pw_aff(pw_aff, context=None): continue if aff_i.gist(dom_j).is_equal(aff_j): - # aff_i is sufficient to conver aff_j, eliminate aff_j + # aff_i is sufficient to cover aff_j, eliminate aff_j new_pieces = pieces[:] if i < j: new_pieces.pop(j) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index a9b3bb07ef341fcf5e707c1783aa4cab2f052b4b..7bf4cb845d9a9f719f6ac0af644f7006b632da7f 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -164,7 +164,7 @@ class LoopKernel(Taggable): .. attribute:: loop_priority A frozenset of priority constraints to the kernel. Each such constraint - is a tuple of inames. Inames occuring in such a tuple will be scheduled + is a tuple of inames. Inames occurring in such a tuple will be scheduled earlier than any iname following in the tuple. This applies only to inames with non-parallel implementation tags. @@ -515,7 +515,7 @@ class LoopKernel(Taggable): for iname in inames: home_domain_index = hdm[iname] if home_domain_index in domain_indices: - # nothin' new + # nothing new continue domain_path_to_root = [home_domain_index] + ppd[home_domain_index] diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index c4cc880a0db607fcea6fb3f89968a2b8e99b9da1..f359eec3394a2ec2007bb98ddb44119f340c2523 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1884,7 +1884,7 @@ def add_inferred_inames(knl): # {{{ apply single-writer heuristic @for_each_kernel -def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True, +def apply_single_writer_dependency_heuristic(kernel, warn_if_used=True, error_if_used=False): logger.debug("%s: default deps" % kernel.name) @@ -2023,7 +2023,7 @@ class SliceToInameReplacer(IdentityMapper): .. attribute:: subarray_ref_bounds A :class:`list` (one entry for each :class:`SubArrayRef` to be created) - of :class:`dict` instances to store the slices enountered in the + of :class:`dict` instances to store the slices encountered in the expressions as a mapping from ``iname`` to a tuple of ``(start, stop, step)``, which describes the boxy (i.e. affine) constraints imposed on the ``iname`` by the corresponding slice notation its intended to @@ -2574,7 +2574,7 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): knl = guess_arg_shape_if_requested(knl, default_order) knl = apply_default_order_to_args(knl, default_order) knl = resolve_dependencies(knl) - knl = apply_single_writer_depencency_heuristic(knl, warn_if_used=False) + knl = apply_single_writer_dependency_heuristic(knl, warn_if_used=False) # ------------------------------------------------------------------------- # Ordering dependency: diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index e3fcf108af40f21396011aa155e5048f2058387a..e81e4dafc535328b7436fd2d3751927a2cb42e5f 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -672,7 +672,7 @@ class ScalarCallable(InKernelCallable): class CallableKernel(InKernelCallable): """ - Records informations about a callee kernel. Also provides interface through + Records information about a callee kernel. Also provides interface through member methods to make the callee kernel compatible to be called from a caller kernel. diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 198b7c03f3d450a1d4afcca8d47f9862d88c6a76..b9b86b53b1c7ae51d161bd1c59cd0da964b9ac6f 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -934,7 +934,7 @@ class CallInstruction(MultiAssignmentBase): A tuple of `:class:loopy.Optional`. If an entry is not empty, it contains the type that will be assigned to the new temporary variable - created from the assigment. + created from the assignment. .. automethod:: __init__ """ @@ -1099,7 +1099,7 @@ def is_array_call(assignees, expression): Returns *True* is the instruction is an array call. An array call is a function call applied to array type objects. If any of - the arguemnts or assignees to the function is an array, + the arguments or assignees to the function is an array, :meth:`is_array_call` will return *True*. """ from pymbolic.primitives import Call, Subscript @@ -1460,7 +1460,7 @@ class BarrierInstruction(_DataObliviousInstruction): .. attribute:: mem_kind A string, ``"global"`` or ``"local"``. Chooses which memory type to - sychronize, for targets that require this (e.g. OpenCL) + synchronize, for targets that require this (e.g. OpenCL) The textual syntax in a :mod:`loopy` kernel is:: diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 5ed9b2ad3511ae4f1f7875ccced1b2ae2eff0ea3..0826ed010ee3f1c57b155492181bd09e30760411 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -263,7 +263,7 @@ def find_all_insn_inames(kernel): if insn.within_inames_is_final: continue - # {{{ depdency-based propagation + # {{{ dependency-based propagation inames_old = insn_id_to_inames[insn.id] inames_new = inames_old | guess_iname_deps_based_on_var_use( @@ -513,8 +513,8 @@ def get_dot_dependency_graph(kernel, callables_table, iname_cluster=True, """ # make sure all automatically added stuff shows up - from loopy.kernel.creation import apply_single_writer_depencency_heuristic - kernel = apply_single_writer_depencency_heuristic(kernel, warn_if_used=False) + from loopy.kernel.creation import apply_single_writer_dependency_heuristic + kernel = apply_single_writer_dependency_heuristic(kernel, warn_if_used=False) if iname_cluster and not kernel.linearization: try: @@ -1252,9 +1252,9 @@ def find_recursive_dependencies(kernel, insn_ids): for insn_id in queue: insn = kernel.id_to_insn[insn_id] - additionals = insn.depends_on - result - result.update(additionals) - new_queue.extend(additionals) + additional = insn.depends_on - result + result.update(additional) + new_queue.extend(additional) queue = new_queue @@ -1735,7 +1735,7 @@ def get_global_barrier_order(kernel): @memoize_on_first_arg def find_most_recent_global_barrier(kernel, insn_id): - """Return the id of the latest occuring global barrier which the + """Return the id of the latest occurring global barrier which the given instruction (indirectly or directly) depends on, or *None* if this instruction does not depend on a global barrier. @@ -1995,7 +1995,7 @@ def infer_args_are_input_output(kernel): elif isinstance(arg, (ConstantArg, ImageArg, ValueArg)): pass else: - raise NotImplementedError("Unkonwn argument type %s." % type(arg)) + raise NotImplementedError("Unknown argument type %s." % type(arg)) if not (arg.is_input or arg.is_output): raise LoopyError("Kernel argument must be either input or output." diff --git a/loopy/match.py b/loopy/match.py index 889f4e74fab1415e8edf6286fa5952089ae30628..5e409791bf0df92e5ac6beaa499a52c4adb87656 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -1,4 +1,4 @@ -"""Matching functionality for instruction ids and subsitution +"""Matching functionality for instruction ids and substitution rule invocations stacks.""" diff --git a/loopy/options.py b/loopy/options.py index 9c4fa0fb4076c6b9e4381043f328d15b347169b0..d58421e3e62ef07ebe39eca0cb6312360468786e 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -118,7 +118,7 @@ class Options(ImmutableRecord): .. attribute:: cl_exec_manage_array_events - Within the PyOpenCL executor, respect and udpate + Within the PyOpenCL executor, respect and update :attr:`pyopencl.array.Array.events`. Defaults to *True*. @@ -156,7 +156,7 @@ class Options(ImmutableRecord): Allow re-ordering of floating point arithmetic. Re-ordering may give different results as floating point arithmetic is not - associative in addition and mulitplication. Default is *True*. + associative in addition and multiplication. Default is *True*. Note that the implementation of this option is currently incomplete. .. attribute:: build_options diff --git a/loopy/preprocess.py b/loopy/preprocess.py index d24e14cc270bbbf165c4d5b90cc51c89f6165993..7176d9d15d21da213f7f83b0b5a914ea020d6915 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -825,8 +825,8 @@ def preprocess_program(t_unit: TranslationUnit) -> TranslationUnit: from loopy.transform.subst import expand_subst t_unit = expand_subst(t_unit) - from loopy.kernel.creation import apply_single_writer_depencency_heuristic - t_unit = apply_single_writer_depencency_heuristic(t_unit) + from loopy.kernel.creation import apply_single_writer_dependency_heuristic + t_unit = apply_single_writer_dependency_heuristic(t_unit) # Ordering restrictions: # diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index ca45521e3294ba12d3ed1bbf11febf92ab2ef11f..6249b36bae5b006daa04abf815b38466ce509e2d 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -718,7 +718,7 @@ def get_insns_in_topologically_sorted_order( for dep in insn.depends_on: rev_dep_map[dep].add(insn.id) - # For breaking ties, we compare the features of an intruction + # For breaking ties, we compare the features of an instruction # so that instructions with the same set of features are lumped # together. This helps in :method:`schedule_as_many_run_insns_as_possible` # which bails after 5 insns that don't have the same feature. @@ -1196,7 +1196,7 @@ def _generate_loop_schedules_internal( print( "%(warn)swarning:%(reset_all)s '%(iname)s', " "which the schedule is " - "currently stuck inside of, seems mis-nested. " + "currently stuck inside of, seems misnested. " "'%(subdep)s' must occur " "before '%(dep)s', " "but '%(subdep)s must be outside " "'%(iname)s', whereas '%(dep)s' must be back " @@ -1404,7 +1404,7 @@ def _generate_loop_schedules_internal( get_priority_tiers(wanted, sched_state.kernel.loop_priority)) # Update the loop priority set, because some constraints may have - # have been contradictary. + # have been contradictory. loop_priority_set = set().union(*[set(t) for t in priority_tiers]) priority_tiers.append( diff --git a/loopy/statistics.py b/loopy/statistics.py index c9cf9d93860fd9d2bc59e6ad3be1583db600b832..0bd1340c1161053acf81c726146b2dcfa9f3cad6 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -629,7 +629,7 @@ class Op(ImmutableRecord): work-group executes on a single compute unit with all work-items within the work-group sharing local memory. A sub-group is an implementation-dependent grouping of work-items within a work-group, - analagous to an NVIDIA CUDA warp. + analogous to an NVIDIA CUDA warp. .. attribute:: kernel_name @@ -723,7 +723,7 @@ class MemAccess(ImmutableRecord): work-group executes on a single compute unit with all work-items within the work-group sharing local memory. A sub-group is an implementation-dependent grouping of work-items within a work-group, - analagous to an NVIDIA CUDA warp. + analogous to an NVIDIA CUDA warp. .. attribute:: kernel_name @@ -1109,7 +1109,7 @@ def _get_lid_and_gid_strides(knl, array, index): # create lid_strides and gid_strides dicts - # strides are coefficents in flattened index, i.e., we want + # strides are coefficients in flattened index, i.e., we want # lid_strides = {0:l0, 1:l1, 2:l2, ...} and # gid_strides = {0:g0, 1:g1, 2:g2, ...}, # where l0, l1, l2, g0, g1, and g2 come from flattened index @@ -1723,7 +1723,7 @@ def get_op_map(program, count_redundant_work=False, :arg subgroup_size: (currently unused) An :class:`int`, :class:`str` ``"guess"``, or *None* that specifies the sub-group size. An OpenCL sub-group is an implementation-dependent grouping of work-items within - a work-group, analagous to an NVIDIA CUDA warp. subgroup_size is used, + a work-group, analogous to an NVIDIA CUDA warp. subgroup_size is used, e.g., when counting a :class:`MemAccess` whose count_granularity specifies that it should only be counted once per sub-group. If set to *None* an attempt to find the sub-group size using the device will be @@ -1921,7 +1921,7 @@ def get_mem_access_map(program, count_redundant_work=False, :arg subgroup_size: An :class:`int`, :class:`str` ``"guess"``, or *None* that specifies the sub-group size. An OpenCL sub-group is an implementation-dependent grouping of work-items within a work-group, - analagous to an NVIDIA CUDA warp. subgroup_size is used, e.g., when + analogous to an NVIDIA CUDA warp. subgroup_size is used, e.g., when counting a :class:`MemAccess` whose count_granularity specifies that it should only be counted once per sub-group. If set to *None* an attempt to find the sub-group size using the device will be made, if this fails @@ -2085,7 +2085,7 @@ def get_synchronization_map(program, subgroup_size=None, entrypoint=None): :arg subgroup_size: (currently unused) An :class:`int`, :class:`str` ``"guess"``, or *None* that specifies the sub-group size. An OpenCL sub-group is an implementation-dependent grouping of work-items within - a work-group, analagous to an NVIDIA CUDA warp. subgroup_size is used, + a work-group, analogous to an NVIDIA CUDA warp. subgroup_size is used, e.g., when counting a :class:`MemAccess` whose count_granularity specifies that it should only be counted once per sub-group. If set to *None* an attempt to find the sub-group size using the device will be diff --git a/loopy/symbolic.py b/loopy/symbolic.py index d56b54e79a3c52b78ebd408dc9b33ac0db747a42..2a1b140cc654fbdb2267be54f2dcf15393bfa29e 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -683,7 +683,7 @@ class TaggedVariable(LoopyExpressionBase, p.Variable, Taggable): A :class:`frozenset` of subclasses of :class:`pytools.tag.Tag` used to provide metadata on this object. Legacy string tags are converted to :class:`~loopy.LegacyStringInstructionTag` or, if they used to carry - a functional meaning, the tag carrying that same fucntional meaning + a functional meaning, the tag carrying that same functional meaning (e.g. :class:`~loopy.UseStreamingStoreTag`). Inherits from :class:`pymbolic.primitives.Variable` @@ -737,7 +737,7 @@ class Reduction(LoopyExpressionBase): .. attribute:: allow_simultaneous A :class:`bool`. If not *True*, an iname is allowed to be used - in precisely one reduction, to avoid mis-nesting errors. + in precisely one reduction, to avoid misnesting errors. """ init_arg_names = ("operation", "inames", "expr", "allow_simultaneous") diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index fc3238e92a29f492476d28f84edef610e698ace9..9cde501a7e242481ac24519d2888f56656b43f62 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -94,21 +94,21 @@ class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): return f"_lpy_np.dtype(_lpy_np.{name})" raise Exception(f"dtype: {dtype} not recognized") - # {{{ handle non numpy arguements + # {{{ handle non numpy arguments def handle_non_numpy_arg(self, gen, arg): pass # }}} - # {{{ handle allocation of unspecified arguements + # {{{ handle allocation of unspecified arguments def handle_alloc( self, gen: CodeGenerator, arg: ArrayArg, strify: Callable[[Union[ExpressionT, Tuple[ExpressionT]]], str], skip_arg_checks: bool) -> None: """ - Handle allocation of non-specified arguements for C-execution + Handle allocation of non-specified arguments for C-execution """ from pymbolic import var @@ -181,7 +181,7 @@ class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): def initialize_system_args(self, gen): """ - Initializes possibly empty system arguements + Initializes possibly empty system arguments """ pass @@ -238,7 +238,7 @@ class CCompiler: The general strategy here is as follows: 1. A :class:`codepy.Toolchain` is guessed from distutils. - The user may override any flags obtained therein by passing in arguements + The user may override any flags obtained therein by passing in arguments to cc, cflags, etc. 2. The kernel source is built into and object first, then made into a shared diff --git a/loopy/target/execution.py b/loopy/target/execution.py index cb081a3e582b2cf445fd394a499eaac9571f5f4b..21600c7343a6b0e4dd50d5cdf7650f83f2a08fec 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -377,21 +377,21 @@ class ExecutionWrapperGeneratorBase(ABC): # }}} - # {{{ handle non numpy arguements + # {{{ handle non numpy arguments def handle_non_numpy_arg(self, gen: CodeGenerator, arg): raise NotImplementedError() # }}} - # {{{ handle allocation of unspecified arguements + # {{{ handle allocation of unspecified arguments def handle_alloc( self, gen: CodeGenerator, arg: ArrayArg, strify: Callable[[Union[ExpressionT, Tuple[ExpressionT]]], str], skip_arg_checks: bool) -> None: """ - Handle allocation of non-specified arguements for C-execution + Handle allocation of non-specified arguments for C-execution """ raise NotImplementedError() @@ -647,7 +647,7 @@ class ExecutionWrapperGeneratorBase(ABC): def initialize_system_args(self, gen): """ - Override to intialize any default system args + Override to initialize any default system args """ raise NotImplementedError() @@ -674,7 +674,7 @@ class ExecutionWrapperGeneratorBase(ABC): """ Generates the wrapping python invoker for this execution target - :arg kernel: the loopy :class:`LoopKernel`(s) to be executued + :arg kernel: the loopy :class:`LoopKernel`(s) to be executed :codegen_result: the loopy :class:`CodeGenerationResult` created by code generation @@ -944,7 +944,7 @@ class ExecutorBase: # }}} -# {{{ code highlighers +# {{{ code highlighters def get_highlighted_code(text, python=False): diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index ce2a150b0aea261b31ef1258df649f7072b720d7..31d1cfd2dc6142e383cfab416cd96b8b864e0790 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -112,7 +112,7 @@ class ExprToISPCExprMapper(ExpressionToCExpressionMapper): if (isinstance(ary, TemporaryVariable) and ary.address_space == AddressSpace.PRIVATE): - # generate access code for acccess to private-index temporaries + # generate access code for access to private-index temporaries gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs() if lsize: diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index e2f3ecda2709e413314cfa0036af861ab1f8dbfd..14383e54f21a7d0229701226aa9e23d4d827d172 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -642,7 +642,7 @@ class OpenCLCASTBuilder(CFamilyASTBuilder): from loopy.target.c import FunctionDeclarationWrapper assert isinstance(fdecl, FunctionDeclarationWrapper) if not codegen_state.is_entrypoint: - # auxiliary kernels need not mention opencl speicific qualifiers + # auxiliary kernels need not mention opencl specific qualifiers # for a functions signature return preambles, fdecl @@ -908,7 +908,7 @@ class OpenCLCASTBuilder(CFamilyASTBuilder): # }}} -# {{{ volatile mem acccess target +# {{{ volatile mem access target class VolatileMemExpressionToOpenCLCExpressionMapper( ExpressionToOpenCLCExpressionMapper): diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index ec702e39be7b1de64bda9c08be4788c5c334d409..ecaea9b57b27895eb30e175025fb7a19a08593eb 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -324,7 +324,7 @@ class ExpressionToPyOpenCLCExpressionMapper(ExpressionToOpenCLCExpressionMapper) # -ffp-contract=fast which is the default for PTX codegen, but # for some unknown reason, clang fails to see the FMAs. # - # We need to do this only for complex as we haev temporaries + # We need to do this only for complex as we have temporaries # only in complex. For reals, the code generated looks like # # res = c + a * b @@ -879,7 +879,7 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): value_arg_code = generate_value_arg_setup( codegen_state.kernel, regular_arg_names) - arry_arg_code = generate_array_arg_setup( + array_arg_code = generate_array_arg_setup( codegen_state.kernel, regular_arg_names) if struct_overflow_arg_names: @@ -952,7 +952,7 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): "argument count of the kernel ({_lpy_knl.num_args}).'"), Line(), value_arg_code, - arry_arg_code, + array_arg_code, overflow_args_code, Assign("_lpy_evt", f"{self.target.pyopencl_module_name}.enqueue_nd_range_kernel(" @@ -1207,7 +1207,7 @@ class PyOpenCLCASTBuilder(OpenCLCASTBuilder): # }}} -# {{{ volatile mem acccess target +# {{{ volatile mem access target class VolatileMemPyOpenCLCASTBuilder(PyOpenCLCASTBuilder): def get_expression_to_c_expression_mapper(self, codegen_state): diff --git a/loopy/transform/array_buffer_map.py b/loopy/transform/array_buffer_map.py index ec3737233ceff1266baf003e3c5e2278be13b682..7e7b6459ca6a3467fe221fdf673180afb2e89585 100644 --- a/loopy/transform/array_buffer_map.py +++ b/loopy/transform/array_buffer_map.py @@ -413,17 +413,17 @@ class ArrayToBufferMap(ArrayToBufferMapBase): except_inames=frozenset(self.primed_sweep_inames)) s2s_domain = stor2sweep.domain() - s2s_domain, aligned_g_s2s_parm_dom = isl.align_two( + s2s_domain, aligned_g_s2s_param_dom = isl.align_two( s2s_domain, global_s2s_par_dom) arg_restrictions = ( - aligned_g_s2s_parm_dom + aligned_g_s2s_param_dom .eliminate(dim_type.set, 0, - aligned_g_s2s_parm_dom.dim(dim_type.set)) + aligned_g_s2s_param_dom.dim(dim_type.set)) .remove_divs()) return (arg_restrictions & s2s_domain).is_subset( - aligned_g_s2s_parm_dom) + aligned_g_s2s_param_dom) class NoOpArrayToBufferMap(ArrayToBufferMapBase): diff --git a/loopy/transform/callable.py b/loopy/transform/callable.py index 1fe40a37034ce0a3c95e9dcf86e42fd9e8ae9d1c..d683cbd29b2aff05434e09e01d7e37d6c7617476 100644 --- a/loopy/transform/callable.py +++ b/loopy/transform/callable.py @@ -102,7 +102,7 @@ def merge(translation_units): if (prg_i.callables_table[clbl_name] != prg_j.callables_table[clbl_name]): # TODO: generate unique names + rename for the colliding - # callables (if entrypoints are colliding that shuold still + # callables (if entrypoints are colliding that should still # be an error) raise NotImplementedError("Translation units to be merged" " must have different callable names" diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 088d896431f4ceaec28f01bdbfff75693c9dae9c..ddfc9b5e8241ca35b1d5d3c616ed667aa975a4a6 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -252,7 +252,7 @@ def add_prefetch_for_single_kernel(kernel, callables_table, var_name, footprint_subscripts, var_descr) # Our _not_provided is actually a different object from the one in the - # precompute module, but precompute acutally uses that to adjust its + # precompute module, but precompute actually uses that to adjust its # warning message. from loopy.transform.precompute import precompute_for_single_kernel @@ -653,7 +653,7 @@ def set_argument_order(kernel, arg_names): :arg arg_names: A list (or comma-separated string) or argument names. All arguments must be in this list. """ - # FIXME: @inducer -- shoulld this only affect the root kernel, or should it + # FIXME: @inducer -- should this only affect the root kernel, or should it # take a within? if isinstance(arg_names, str): diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py index bb828221ffef9cbc58e8cb2a900530e5cab0a88f..6c2688d9015d47582a8aed73cc5033103970be04 100644 --- a/loopy/transform/diff.py +++ b/loopy/transform/diff.py @@ -154,7 +154,7 @@ class LoopyDiffMapper(DifferentiationMapper, RuleAwareIdentityMapper): dc = self.diff_context if expr.function.name in dc.kernel.substitutions: - # FIXME: Deal with subsitution rules + # FIXME: Deal with substitution rules # Need to use chain rule here, too. raise NotImplementedError("substitution rules in differentiation") else: @@ -382,8 +382,8 @@ def diff_kernel(kernel, diff_outputs, by, diff_iname_prefix="diff_i", assert isinstance(kernel, LoopKernel) - from loopy.kernel.creation import apply_single_writer_depencency_heuristic - kernel = apply_single_writer_depencency_heuristic(kernel, warn_if_used=True) + from loopy.kernel.creation import apply_single_writer_dependency_heuristic + kernel = apply_single_writer_dependency_heuristic(kernel, warn_if_used=True) if isinstance(diff_outputs, str): diff_outputs = [ diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 18df3dae48dbc05b151046380de6b720456714c4..b835373da9d2b67894eea4e49f4f4d8b5067c580 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1540,7 +1540,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): break if not found: - raise LoopyError("invlaid tag kind: %s" % kind) + raise LoopyError("invalid tag kind: %s" % kind) from loopy.match import parse_match match = parse_match(insn_match) @@ -2265,7 +2265,7 @@ def add_inames_for_unused_hw_axes(kernel, within=None): Current limitations: * Only one iname in the kernel may be tagged with each of the unused hw axes. - * Occurence of an ``l.auto`` tag when an instruction is missing one of the + * Occurrence of an ``l.auto`` tag when an instruction is missing one of the local hw axes. :arg within: An instruction match as understood by diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 62991662861050294560ce8a2110e3e2fd078325..374587da57bc4857d498a730727510f711aab382 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -425,7 +425,7 @@ def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False, if not nosync_to_add and not empty_ok: raise LoopyError("No nosync annotations were added as a result " "of this call. add_nosync will (by default) only add them to " - "accompany existing depencies or group exclusions. Maybe you want " + "accompany existing dependencies or group exclusions. Maybe you want " "to pass force=True?") new_instructions = list(kernel.instructions) diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index b5c7aa7a164a7013fff86d3892cc4fb212d76e96..422d22568287b46afa5f27dfe35288c0df0068a9 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -327,8 +327,8 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, # {{{ establish the relevant definition of lhs_name for each usage site dep_kernel = expand_subst(kernel) - from loopy.kernel.creation import apply_single_writer_depencency_heuristic - dep_kernel = apply_single_writer_depencency_heuristic(dep_kernel) + from loopy.kernel.creation import apply_single_writer_dependency_heuristic + dep_kernel = apply_single_writer_dependency_heuristic(dep_kernel) assigning_insn_ids = {insn.id for insn in dep_kernel.instructions if lhs_name in insn.assignee_var_names()} @@ -354,7 +354,7 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, if len(rel_def_ids) > 1: raise LoopyError("more than one write to '%s' found in " - "depdendencies of '%s'--definition cannot be resolved " + "dependencies of '%s'--definition cannot be resolved " "(writer instructions ids: %s)" % (lhs_name, usage_insn_id, ", ".join(rel_def_ids))) @@ -433,7 +433,7 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, for i in indices: if not isinstance(i, Variable): raise LoopyError("In defining instruction '%s': " - "asignee index '%s' is not a plain variable. " + "assignee index '%s' is not a plain variable. " "Perhaps use loopy.affine_map_inames() " "to perform substitution." % (def_id, i)) diff --git a/loopy/translation_unit.py b/loopy/translation_unit.py index c0d1b0b0545436b8dfc7b45249f5509f415db12c..76e795b7601f0734180b216ffbb45dd57e33c60c 100644 --- a/loopy/translation_unit.py +++ b/loopy/translation_unit.py @@ -196,7 +196,7 @@ class TranslationUnit: .. attribute:: func_id_to_in_knl_callables_mappers A :class:`frozenset` of functions of the signature ``(target: - TargetBase, function_indentifier: str)`` that returns an instance + TargetBase, function_identifier: str)`` that returns an instance of :class:`loopy.kernel.function_interface.InKernelCallable` or *None*. .. automethod:: executor diff --git a/loopy/types.py b/loopy/types.py index 143715a3964dd31bceaf126b84d384d53fdcf6e0..a837d1c46e2688469c3ad7bfa1dc04361b279cbc 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -165,7 +165,7 @@ class AtomicNumpyType(NumpyType, AtomicType): class OpaqueType(LoopyType): """An opaque data type is truly opaque - it has no allocations, no temporaries of that type, etc. The only thing allowed is to be pass in - through one ValueArg and go out to another. It is introduced to accomodate + through one ValueArg and go out to another. It is introduced to accommodate functional calls to external libraries. """ def __init__(self, name: str) -> None: diff --git a/proto-tests/test_tim.py b/proto-tests/test_tim.py index 7ee30313c20713bc9a5d7732ce36d504934c8783..eb8125cdb7473921f66d212366368caee587394f 100644 --- a/proto-tests/test_tim.py +++ b/proto-tests/test_tim.py @@ -190,7 +190,7 @@ def test_tim3d(ctx_factory): knl = lp.split_iname(knl, "k", n, inner_tag="l.2") # , slabs=(0, 1)) knl = lp.split_iname(knl, "i", n, inner_tag="l.0") # , slabs=(0, 1)) -# knl = lp.tag_inames(knl, dict(k_nner="unr")) +# knl = lp.tag_inames(knl, dict(k_inner="unr")) knl = lp.tag_inames(knl, dict(o="unr")) knl = lp.tag_inames(knl, dict(m="unr")) diff --git a/pyproject.toml b/pyproject.toml index 6f7b977b686c07bc34771a345f151a361d1ba07d..9dadd57f557f66b42e8044fad9a6114e39184823 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,3 +90,29 @@ module = [ "IPython.*", ] ignore_missing_imports = true + +[tool.typos.default] +extend-ignore-re = [ + "(?Rm)^.*(#|//)\\s*spellchecker:\\s*disable-line$" +] + +[tool.typos.default.extend-words] +# like the numpy function, array range +arange = "arange" +# N-Dimensional +ND = "ND" +# used for 'diff_output' +dout = "dout" +# an element-wise slice of array u +ue = "ue" +# used in an ordering context, "ab" / "ba" +ba = "ba" + +"dependees" = "dependees" + +[tool.typos.files] +extend-exclude = [ + "loopy/target/c/compyte", + "notes/*/*.eps", +] + diff --git a/test/test_apps.py b/test/test_apps.py index 207bc7ee2045bfa0318ffb46ff96ea04a7d401fd..c4cffaee1d7fdc8c700615bfbfcd45fc74b38dcd 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -324,7 +324,7 @@ def test_rob_stroud_bernstein_full(): def test_stencil(ctx_factory): ctx = ctx_factory() - # n=32 causes corner case behavior in size calculations for temprorary (a + # n=32 causes corner case behavior in size calculations for temporary (a # non-unifiable, two-constant-segments PwAff as the base index) n = 256 diff --git a/test/test_c_execution.py b/test/test_c_execution.py index e703d941535f26c57e2dc29c2f279d5dd77c69ec..6208b9aed3fe4fb64e1fb1186bef696a45837546 100644 --- a/test/test_c_execution.py +++ b/test/test_c_execution.py @@ -95,17 +95,17 @@ def test_c_target_strides_nonsquare(): from loopy.target.c import ExecutableCTarget def __get_kernel(order="C"): - indicies = ["i", "j", "k"] - sizes = tuple(np.random.randint(1, 11, size=len(indicies))) + indices = ["i", "j", "k"] + sizes = tuple(np.random.randint(1, 11, size=len(indices))) # create domain strings domain_template = "{{ [{iname}]: 0 <= {iname} < {size} }}" domains = [] - for idx, size in zip(indicies, sizes): + for idx, size in zip(indices, sizes): domains.append(domain_template.format( iname=idx, size=size)) statement = "out[{indexed}] = 2 * a[{indexed}]".format( - indexed=", ".join(indicies)) + indexed=", ".join(indices)) return lp.make_kernel( domains, statement, @@ -142,17 +142,17 @@ def test_c_optimizations(): from loopy.target.c import ExecutableCTarget def __get_kernel(order="C"): - indicies = ["i", "j", "k"] - sizes = tuple(np.random.randint(1, 11, size=len(indicies))) + indices = ["i", "j", "k"] + sizes = tuple(np.random.randint(1, 11, size=len(indices))) # create domain strings domain_template = "{{ [{iname}]: 0 <= {iname} < {size} }}" domains = [] - for idx, size in zip(indicies, sizes): + for idx, size in zip(indices, sizes): domains.append(domain_template.format( iname=idx, size=size)) statement = "out[{indexed}] = 2 * a[{indexed}]".format( - indexed=", ".join(indicies)) + indexed=", ".join(indices)) return lp.make_kernel( domains, statement, diff --git a/test/test_callables.py b/test/test_callables.py index d58247a75dc7c0d7eae79f322f853cb79815b150..44a94e43a0717ac575a145afc7caec2d501ae763 100644 --- a/test/test_callables.py +++ b/test/test_callables.py @@ -1397,8 +1397,8 @@ def test_inline_deps(ctx_factory): prg = lp.merge([parent_knl, child_knl]) inlined = lp.inline_callable_kernel(prg, "func") - from loopy.kernel.creation import apply_single_writer_depencency_heuristic - apply_single_writer_depencency_heuristic(inlined, error_if_used=True) + from loopy.kernel.creation import apply_single_writer_dependency_heuristic + apply_single_writer_dependency_heuristic(inlined, error_if_used=True) _evt, (a_dev,) = inlined(cq) diff --git a/test/test_loopy.py b/test/test_loopy.py index e9aa47ef4bfeffadd948ede094b4d817422d897e..34310171fc7823f5dc52666527a32e93a3148d35 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2306,7 +2306,7 @@ def test_barrier_in_overridden_get_grid_size_expanded_kernel(): from testlib import GridOverride - # artifically expand via overridden_get_grid_sizes_for_insn_ids + # artificially expand via overridden_get_grid_sizes_for_insn_ids knl = prog["loopy_kernel"] knl = knl.copy(overridden_get_grid_sizes_for_insn_ids=GridOverride( knl.copy(), vecsize))