diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 32707fe869e7f08e3012a7cb2b57954822bf62ac..2c6c77e7783afd993ff26d71e60daab3c760d5bd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,6 +20,13 @@ jobs:
                 pipx install ruff
                 ruff check
 
+    typos:
+        name: Typos
+        runs-on: ubuntu-latest
+        steps:
+        -   uses: actions/checkout@v4
+        -   uses: crate-ci/typos@master
+
     pylint:
         name: Pylint
         runs-on: ubuntu-latest
diff --git a/MEMO b/MEMO
index f4e5c34e48e62d5c951d01fcb212a9117e361def..5a9438811e271741e58ea52b8958ebcb2b985afd 100644
--- a/MEMO
+++ b/MEMO
@@ -7,7 +7,7 @@ Documentation Notes
 Things to consider
 ^^^^^^^^^^^^^^^^^^
 
-- Depedencies are pointwise for shared loop dimensions
+- Dependencies are pointwise for shared loop dimensions
   and global over non-shared ones (between dependent and ancestor)
 
 - multiple insns could fight over which iname gets local axis 0
diff --git a/contrib/mem-pattern-explorer/pattern_vis.py b/contrib/mem-pattern-explorer/pattern_vis.py
index 82a2b96029d1a9005724557a587f0359bcfb6c9b..bbde231740fef0d2dd3f5942ad4ec24cd641795b 100644
--- a/contrib/mem-pattern-explorer/pattern_vis.py
+++ b/contrib/mem-pattern-explorer/pattern_vis.py
@@ -76,7 +76,7 @@ class ArrayAccessPatternContext:
 class Array:
     def __init__(self, ctx, name, shape, strides, elements_per_row=None):
         # Each array element stores a tuple:
-        # (timestamp, subgroup, g0, g1, g2, ) of last acccess
+        # (timestamp, subgroup, g0, g1, g2, ) of last access
 
         assert len(shape) == len(strides)
 
diff --git a/doc/misc.rst b/doc/misc.rst
index 3fea6fdd44e11f88ae1249cde3b07c97f72f7dae..be1c964cdec45603961f4e6f84dce24e9b5015ab 100644
--- a/doc/misc.rst
+++ b/doc/misc.rst
@@ -158,7 +158,7 @@ In the meantime, you can generate code simply by saying::
     print(cg_result.host_code())
     print(cg_result.device_code())
 
-Additionally, for C-based languages, header defintions are available via::
+Additionally, for C-based languages, header definitions are available via::
 
     loopy.generate_header(knl)
 
@@ -338,8 +338,8 @@ This list is always growing, but here are a few pointers:
 
   Use :func:`loopy.join_inames`.
 
-In what sense does Loopy suport vectorization?
-----------------------------------------------
+In what sense does Loopy support vectorization?
+-----------------------------------------------
 
 There are really two ways in which the OpenCL/CUDA model of computation exposes
 vectorization:
@@ -352,7 +352,7 @@ vectorization:
   e.g. ``float4``, which support arithmetic with implicit vector semantics
   as well as a number of 'intrinsic' functions.
 
-Loopy suports both. The first one, SIMT, is accessible by tagging inames with,
+Loopy supports both. The first one, SIMT, is accessible by tagging inames with,
 e.g., ``l.0```. Accessing the second one requires using both execution- and
 data-reshaping capabilities in loopy. To start with, you need an array that
 has an axis with the length of the desired vector. If that's not yet available,
diff --git a/loopy/__init__.py b/loopy/__init__.py
index 275d4f26e90c0b160cc7c39172c048f34180a6b4..1eebb82237e6258fc26b15692c312816eabf075e 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -563,18 +563,18 @@ def make_copy_kernel(new_dim_tags, old_dim_tags=None):
 
     indices = ["i%d" % i for i in range(rank)]
     shape = ["n%d" % i for i in range(rank)]
-    commad_indices = ", ".join(indices)
+    command_indices = ", ".join(indices)
     bounds = " and ".join(
             f"0<={ind}<{shape_i}"
             for ind, shape_i in zip(indices, shape))
 
     set_str = "{{[{}]: {} }}".format(
-                commad_indices,
+                command_indices,
                 bounds
                 )
     result = make_kernel(set_str,
             "output[%s] = input[%s]"
-            % (commad_indices, commad_indices),
+            % (command_indices, command_indices),
             lang_version=MOST_RECENT_LANGUAGE_VERSION,
             default_offset=auto)
 
diff --git a/loopy/check.py b/loopy/check.py
index 17887cff5f3ffa5adcb2ee99e88c6f25b43527ac..c2b3d8cd3e7ef2de034e5e51c9976c675a0798c5 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -1123,7 +1123,7 @@ def _check_variable_access_ordered_inner(kernel):
             # for each *pred*, we will calculate all the direct/indirect
             # instructions that can be reached.
             seen_successors = set()
-            # first let us start with direct sucessors
+            # first let us start with direct successors
             to_check = edges[pred].copy()
             while to_check:
                 successor = to_check.pop()
@@ -1219,7 +1219,7 @@ def check_variable_access_ordered(kernel):
     """Checks that between each write to a variable and all other accesses to
     the variable there is either:
 
-    * a direct/indirect depdendency edge, or
+    * a direct/indirect dependency edge, or
     * an explicit statement that no ordering is necessary (expressed
       through a bi-directional :attr:`loopy.InstructionBase.no_sync_with`)
     """
diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py
index 530e92678e1a568e9e31f02392c625ea734e07e7..fc9eace87512285abd8288c167f90d82a6b23e63 100644
--- a/loopy/frontend/fortran/translator.py
+++ b/loopy/frontend/fortran/translator.py
@@ -200,7 +200,7 @@ class Scope:
                     return None
 
                 raise TranslationError(
-                        "no type for '%s' found in 'implict none' routine"
+                        "no type for '%s' found in 'implicit none' routine"
                         % name) from None
 
             return self.implicit_types.get(name[0], np.dtype(np.int32))
@@ -426,7 +426,7 @@ class F2LoopyTranslator(FTreeWalkerBase):
             scope.implicit_types = None
 
         for stmt, specs in node.items:
-            if scope.implict_types is None:
+            if scope.implict_types is None:  # spellchecker: disable-line
                 raise TranslationError("implicit decl not allowed after "
                         "'implicit none'")
             tp = self.dtype_from_stmt(stmt)
diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py
index 160b6415b2d3743ae3c87a2591b09d39d780fdea..28aa3be30562f3ce690da00de954706471f2ce91 100644
--- a/loopy/isl_helpers.py
+++ b/loopy/isl_helpers.py
@@ -186,7 +186,7 @@ def simplify_pw_aff(pw_aff, context=None):
                     continue
 
                 if aff_i.gist(dom_j).is_equal(aff_j):
-                    # aff_i is sufficient to conver aff_j, eliminate aff_j
+                    # aff_i is sufficient to cover aff_j, eliminate aff_j
                     new_pieces = pieces[:]
                     if i < j:
                         new_pieces.pop(j)
diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index a9b3bb07ef341fcf5e707c1783aa4cab2f052b4b..7bf4cb845d9a9f719f6ac0af644f7006b632da7f 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -164,7 +164,7 @@ class LoopKernel(Taggable):
     .. attribute:: loop_priority
 
         A frozenset of priority constraints to the kernel. Each such constraint
-        is a tuple of inames. Inames occuring in such a tuple will be scheduled
+        is a tuple of inames. Inames occurring in such a tuple will be scheduled
         earlier than any iname following in the tuple. This applies only to inames
         with non-parallel implementation tags.
 
@@ -515,7 +515,7 @@ class LoopKernel(Taggable):
         for iname in inames:
             home_domain_index = hdm[iname]
             if home_domain_index in domain_indices:
-                # nothin' new
+                # nothing new
                 continue
 
             domain_path_to_root = [home_domain_index] + ppd[home_domain_index]
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index c4cc880a0db607fcea6fb3f89968a2b8e99b9da1..f359eec3394a2ec2007bb98ddb44119f340c2523 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -1884,7 +1884,7 @@ def add_inferred_inames(knl):
 # {{{ apply single-writer heuristic
 
 @for_each_kernel
-def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True,
+def apply_single_writer_dependency_heuristic(kernel, warn_if_used=True,
         error_if_used=False):
     logger.debug("%s: default deps" % kernel.name)
 
@@ -2023,7 +2023,7 @@ class SliceToInameReplacer(IdentityMapper):
     .. attribute:: subarray_ref_bounds
 
         A :class:`list` (one entry for each :class:`SubArrayRef` to be created)
-        of :class:`dict` instances to store the slices enountered in the
+        of :class:`dict` instances to store the slices encountered in the
         expressions as a mapping from ``iname`` to a tuple of ``(start, stop,
         step)``, which describes the boxy (i.e. affine) constraints imposed on
         the ``iname`` by the corresponding slice notation its intended to
@@ -2574,7 +2574,7 @@ def make_function(domains, instructions, kernel_data=None, **kwargs):
     knl = guess_arg_shape_if_requested(knl, default_order)
     knl = apply_default_order_to_args(knl, default_order)
     knl = resolve_dependencies(knl)
-    knl = apply_single_writer_depencency_heuristic(knl, warn_if_used=False)
+    knl = apply_single_writer_dependency_heuristic(knl, warn_if_used=False)
 
     # -------------------------------------------------------------------------
     # Ordering dependency:
diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py
index e3fcf108af40f21396011aa155e5048f2058387a..e81e4dafc535328b7436fd2d3751927a2cb42e5f 100644
--- a/loopy/kernel/function_interface.py
+++ b/loopy/kernel/function_interface.py
@@ -672,7 +672,7 @@ class ScalarCallable(InKernelCallable):
 
 class CallableKernel(InKernelCallable):
     """
-    Records informations about a callee kernel. Also provides interface through
+    Records information about a callee kernel. Also provides interface through
     member methods to make the callee kernel compatible to be called from a
     caller kernel.
 
diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py
index 198b7c03f3d450a1d4afcca8d47f9862d88c6a76..b9b86b53b1c7ae51d161bd1c59cd0da964b9ac6f 100644
--- a/loopy/kernel/instruction.py
+++ b/loopy/kernel/instruction.py
@@ -934,7 +934,7 @@ class CallInstruction(MultiAssignmentBase):
 
         A tuple of `:class:loopy.Optional`. If an entry is not empty, it
         contains the type that will be assigned to the new temporary variable
-        created from the assigment.
+        created from the assignment.
 
     .. automethod:: __init__
     """
@@ -1099,7 +1099,7 @@ def is_array_call(assignees, expression):
     Returns *True* is the instruction is an array call.
 
     An array call is a function call applied to array type objects. If any of
-    the arguemnts or assignees to the function is an array,
+    the arguments or assignees to the function is an array,
     :meth:`is_array_call` will return *True*.
     """
     from pymbolic.primitives import Call, Subscript
@@ -1460,7 +1460,7 @@ class BarrierInstruction(_DataObliviousInstruction):
     .. attribute:: mem_kind
 
         A string, ``"global"`` or ``"local"``. Chooses which memory type to
-        sychronize, for targets that require this (e.g. OpenCL)
+        synchronize, for targets that require this (e.g. OpenCL)
 
     The textual syntax in a :mod:`loopy` kernel is::
 
diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py
index 5ed9b2ad3511ae4f1f7875ccced1b2ae2eff0ea3..0826ed010ee3f1c57b155492181bd09e30760411 100644
--- a/loopy/kernel/tools.py
+++ b/loopy/kernel/tools.py
@@ -263,7 +263,7 @@ def find_all_insn_inames(kernel):
             if insn.within_inames_is_final:
                 continue
 
-            # {{{ depdency-based propagation
+            # {{{ dependency-based propagation
 
             inames_old = insn_id_to_inames[insn.id]
             inames_new = inames_old | guess_iname_deps_based_on_var_use(
@@ -513,8 +513,8 @@ def get_dot_dependency_graph(kernel, callables_table, iname_cluster=True,
     """
 
     # make sure all automatically added stuff shows up
-    from loopy.kernel.creation import apply_single_writer_depencency_heuristic
-    kernel = apply_single_writer_depencency_heuristic(kernel, warn_if_used=False)
+    from loopy.kernel.creation import apply_single_writer_dependency_heuristic
+    kernel = apply_single_writer_dependency_heuristic(kernel, warn_if_used=False)
 
     if iname_cluster and not kernel.linearization:
         try:
@@ -1252,9 +1252,9 @@ def find_recursive_dependencies(kernel, insn_ids):
 
         for insn_id in queue:
             insn = kernel.id_to_insn[insn_id]
-            additionals = insn.depends_on - result
-            result.update(additionals)
-            new_queue.extend(additionals)
+            additional = insn.depends_on - result
+            result.update(additional)
+            new_queue.extend(additional)
 
         queue = new_queue
 
@@ -1735,7 +1735,7 @@ def get_global_barrier_order(kernel):
 
 @memoize_on_first_arg
 def find_most_recent_global_barrier(kernel, insn_id):
-    """Return the id of the latest occuring global barrier which the
+    """Return the id of the latest occurring global barrier which the
     given instruction (indirectly or directly) depends on, or *None* if this
     instruction does not depend on a global barrier.
 
@@ -1995,7 +1995,7 @@ def infer_args_are_input_output(kernel):
         elif isinstance(arg, (ConstantArg, ImageArg, ValueArg)):
             pass
         else:
-            raise NotImplementedError("Unkonwn argument type %s." % type(arg))
+            raise NotImplementedError("Unknown argument type %s." % type(arg))
 
         if not (arg.is_input or arg.is_output):
             raise LoopyError("Kernel argument must be either input or output."
diff --git a/loopy/match.py b/loopy/match.py
index 889f4e74fab1415e8edf6286fa5952089ae30628..5e409791bf0df92e5ac6beaa499a52c4adb87656 100644
--- a/loopy/match.py
+++ b/loopy/match.py
@@ -1,4 +1,4 @@
-"""Matching functionality for instruction ids and subsitution
+"""Matching functionality for instruction ids and substitution
 rule invocations stacks."""
 
 
diff --git a/loopy/options.py b/loopy/options.py
index 9c4fa0fb4076c6b9e4381043f328d15b347169b0..d58421e3e62ef07ebe39eca0cb6312360468786e 100644
--- a/loopy/options.py
+++ b/loopy/options.py
@@ -118,7 +118,7 @@ class Options(ImmutableRecord):
 
     .. attribute:: cl_exec_manage_array_events
 
-        Within the PyOpenCL executor, respect and udpate
+        Within the PyOpenCL executor, respect and update
         :attr:`pyopencl.array.Array.events`.
 
         Defaults to *True*.
@@ -156,7 +156,7 @@ class Options(ImmutableRecord):
 
         Allow re-ordering of floating point arithmetic. Re-ordering may
         give different results as floating point arithmetic is not
-        associative in addition and mulitplication. Default is *True*.
+        associative in addition and multiplication. Default is *True*.
         Note that the implementation of this option is currently incomplete.
 
     .. attribute:: build_options
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index d24e14cc270bbbf165c4d5b90cc51c89f6165993..7176d9d15d21da213f7f83b0b5a914ea020d6915 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -825,8 +825,8 @@ def preprocess_program(t_unit: TranslationUnit) -> TranslationUnit:
     from loopy.transform.subst import expand_subst
     t_unit = expand_subst(t_unit)
 
-    from loopy.kernel.creation import apply_single_writer_depencency_heuristic
-    t_unit = apply_single_writer_depencency_heuristic(t_unit)
+    from loopy.kernel.creation import apply_single_writer_dependency_heuristic
+    t_unit = apply_single_writer_dependency_heuristic(t_unit)
 
     # Ordering restrictions:
     #
diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index ca45521e3294ba12d3ed1bbf11febf92ab2ef11f..6249b36bae5b006daa04abf815b38466ce509e2d 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -718,7 +718,7 @@ def get_insns_in_topologically_sorted_order(
         for dep in insn.depends_on:
             rev_dep_map[dep].add(insn.id)
 
-    # For breaking ties, we compare the features of an intruction
+    # For breaking ties, we compare the features of an instruction
     # so that instructions with the same set of features are lumped
     # together. This helps in :method:`schedule_as_many_run_insns_as_possible`
     # which bails after 5 insns that don't have the same feature.
@@ -1196,7 +1196,7 @@ def _generate_loop_schedules_internal(
                                 print(
                                     "%(warn)swarning:%(reset_all)s '%(iname)s', "
                                     "which the schedule is "
-                                    "currently stuck inside of, seems mis-nested. "
+                                    "currently stuck inside of, seems misnested. "
                                     "'%(subdep)s' must occur " "before '%(dep)s', "
                                     "but '%(subdep)s must be outside "
                                     "'%(iname)s', whereas '%(dep)s' must be back "
@@ -1404,7 +1404,7 @@ def _generate_loop_schedules_internal(
                     get_priority_tiers(wanted, sched_state.kernel.loop_priority))
 
             # Update the loop priority set, because some constraints may have
-            # have been contradictary.
+            # have been contradictory.
             loop_priority_set = set().union(*[set(t) for t in priority_tiers])
 
             priority_tiers.append(
diff --git a/loopy/statistics.py b/loopy/statistics.py
index c9cf9d93860fd9d2bc59e6ad3be1583db600b832..0bd1340c1161053acf81c726146b2dcfa9f3cad6 100755
--- a/loopy/statistics.py
+++ b/loopy/statistics.py
@@ -629,7 +629,7 @@ class Op(ImmutableRecord):
        work-group executes on a single compute unit with all work-items within
        the work-group sharing local memory. A sub-group is an
        implementation-dependent grouping of work-items within a work-group,
-       analagous to an NVIDIA CUDA warp.
+       analogous to an NVIDIA CUDA warp.
 
     .. attribute:: kernel_name
 
@@ -723,7 +723,7 @@ class MemAccess(ImmutableRecord):
        work-group executes on a single compute unit with all work-items within
        the work-group sharing local memory. A sub-group is an
        implementation-dependent grouping of work-items within a work-group,
-       analagous to an NVIDIA CUDA warp.
+       analogous to an NVIDIA CUDA warp.
 
     .. attribute:: kernel_name
 
@@ -1109,7 +1109,7 @@ def _get_lid_and_gid_strides(knl, array, index):
 
     # create lid_strides and gid_strides dicts
 
-    # strides are coefficents in flattened index, i.e., we want
+    # strides are coefficients in flattened index, i.e., we want
     # lid_strides = {0:l0, 1:l1, 2:l2, ...} and
     # gid_strides = {0:g0, 1:g1, 2:g2, ...},
     # where l0, l1, l2, g0, g1, and g2 come from flattened index
@@ -1723,7 +1723,7 @@ def get_op_map(program, count_redundant_work=False,
     :arg subgroup_size: (currently unused) An :class:`int`, :class:`str`
         ``"guess"``, or *None* that specifies the sub-group size. An OpenCL
         sub-group is an implementation-dependent grouping of work-items within
-        a work-group, analagous to an NVIDIA CUDA warp. subgroup_size is used,
+        a work-group, analogous to an NVIDIA CUDA warp. subgroup_size is used,
         e.g., when counting a :class:`MemAccess` whose count_granularity
         specifies that it should only be counted once per sub-group. If set to
         *None* an attempt to find the sub-group size using the device will be
@@ -1921,7 +1921,7 @@ def get_mem_access_map(program, count_redundant_work=False,
     :arg subgroup_size: An :class:`int`, :class:`str` ``"guess"``, or
         *None* that specifies the sub-group size. An OpenCL sub-group is an
         implementation-dependent grouping of work-items within a work-group,
-        analagous to an NVIDIA CUDA warp. subgroup_size is used, e.g., when
+        analogous to an NVIDIA CUDA warp. subgroup_size is used, e.g., when
         counting a :class:`MemAccess` whose count_granularity specifies that it
         should only be counted once per sub-group. If set to *None* an attempt
         to find the sub-group size using the device will be made, if this fails
@@ -2085,7 +2085,7 @@ def get_synchronization_map(program, subgroup_size=None, entrypoint=None):
     :arg subgroup_size: (currently unused) An :class:`int`, :class:`str`
         ``"guess"``, or *None* that specifies the sub-group size. An OpenCL
         sub-group is an implementation-dependent grouping of work-items within
-        a work-group, analagous to an NVIDIA CUDA warp. subgroup_size is used,
+        a work-group, analogous to an NVIDIA CUDA warp. subgroup_size is used,
         e.g., when counting a :class:`MemAccess` whose count_granularity
         specifies that it should only be counted once per sub-group. If set to
         *None* an attempt to find the sub-group size using the device will be
diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index d56b54e79a3c52b78ebd408dc9b33ac0db747a42..2a1b140cc654fbdb2267be54f2dcf15393bfa29e 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -683,7 +683,7 @@ class TaggedVariable(LoopyExpressionBase, p.Variable, Taggable):
         A :class:`frozenset` of subclasses of :class:`pytools.tag.Tag` used to
         provide metadata on this object. Legacy string tags are converted to
         :class:`~loopy.LegacyStringInstructionTag` or, if they used to carry
-        a functional meaning, the tag carrying that same fucntional meaning
+        a functional meaning, the tag carrying that same functional meaning
         (e.g. :class:`~loopy.UseStreamingStoreTag`).
 
     Inherits from :class:`pymbolic.primitives.Variable`
@@ -737,7 +737,7 @@ class Reduction(LoopyExpressionBase):
     .. attribute:: allow_simultaneous
 
         A :class:`bool`. If not *True*, an iname is allowed to be used
-        in precisely one reduction, to avoid mis-nesting errors.
+        in precisely one reduction, to avoid misnesting errors.
     """
 
     init_arg_names = ("operation", "inames", "expr", "allow_simultaneous")
diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py
index fc3238e92a29f492476d28f84edef610e698ace9..9cde501a7e242481ac24519d2888f56656b43f62 100644
--- a/loopy/target/c/c_execution.py
+++ b/loopy/target/c/c_execution.py
@@ -94,21 +94,21 @@ class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase):
             return f"_lpy_np.dtype(_lpy_np.{name})"
         raise Exception(f"dtype: {dtype} not recognized")
 
-    # {{{ handle non numpy arguements
+    # {{{ handle non numpy arguments
 
     def handle_non_numpy_arg(self, gen, arg):
         pass
 
     # }}}
 
-    # {{{ handle allocation of unspecified arguements
+    # {{{ handle allocation of unspecified arguments
 
     def handle_alloc(
             self, gen: CodeGenerator, arg: ArrayArg,
             strify: Callable[[Union[ExpressionT, Tuple[ExpressionT]]], str],
             skip_arg_checks: bool) -> None:
         """
-        Handle allocation of non-specified arguements for C-execution
+        Handle allocation of non-specified arguments for C-execution
         """
         from pymbolic import var
 
@@ -181,7 +181,7 @@ class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase):
 
     def initialize_system_args(self, gen):
         """
-        Initializes possibly empty system arguements
+        Initializes possibly empty system arguments
         """
         pass
 
@@ -238,7 +238,7 @@ class CCompiler:
     The general strategy here is as follows:
 
     1.  A :class:`codepy.Toolchain` is guessed from distutils.
-        The user may override any flags obtained therein by passing in arguements
+        The user may override any flags obtained therein by passing in arguments
         to cc, cflags, etc.
 
     2.  The kernel source is built into and object first, then made into a shared
diff --git a/loopy/target/execution.py b/loopy/target/execution.py
index cb081a3e582b2cf445fd394a499eaac9571f5f4b..21600c7343a6b0e4dd50d5cdf7650f83f2a08fec 100644
--- a/loopy/target/execution.py
+++ b/loopy/target/execution.py
@@ -377,21 +377,21 @@ class ExecutionWrapperGeneratorBase(ABC):
 
     # }}}
 
-    # {{{ handle non numpy arguements
+    # {{{ handle non numpy arguments
 
     def handle_non_numpy_arg(self, gen: CodeGenerator, arg):
         raise NotImplementedError()
 
     # }}}
 
-    # {{{ handle allocation of unspecified arguements
+    # {{{ handle allocation of unspecified arguments
 
     def handle_alloc(
             self, gen: CodeGenerator, arg: ArrayArg,
             strify: Callable[[Union[ExpressionT, Tuple[ExpressionT]]], str],
             skip_arg_checks: bool) -> None:
         """
-        Handle allocation of non-specified arguements for C-execution
+        Handle allocation of non-specified arguments for C-execution
         """
         raise NotImplementedError()
 
@@ -647,7 +647,7 @@ class ExecutionWrapperGeneratorBase(ABC):
 
     def initialize_system_args(self, gen):
         """
-        Override to intialize any default system args
+        Override to initialize any default system args
         """
         raise NotImplementedError()
 
@@ -674,7 +674,7 @@ class ExecutionWrapperGeneratorBase(ABC):
         """
         Generates the wrapping python invoker for this execution target
 
-        :arg kernel: the loopy :class:`LoopKernel`(s) to be executued
+        :arg kernel: the loopy :class:`LoopKernel`(s) to be executed
         :codegen_result: the loopy :class:`CodeGenerationResult` created
         by code generation
 
@@ -944,7 +944,7 @@ class ExecutorBase:
 
 # }}}
 
-# {{{ code highlighers
+# {{{ code highlighters
 
 
 def get_highlighted_code(text, python=False):
diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py
index ce2a150b0aea261b31ef1258df649f7072b720d7..31d1cfd2dc6142e383cfab416cd96b8b864e0790 100644
--- a/loopy/target/ispc.py
+++ b/loopy/target/ispc.py
@@ -112,7 +112,7 @@ class ExprToISPCExprMapper(ExpressionToCExpressionMapper):
 
         if (isinstance(ary, TemporaryVariable)
                 and ary.address_space == AddressSpace.PRIVATE):
-            # generate access code for acccess to private-index temporaries
+            # generate access code for access to private-index temporaries
 
             gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs()
             if lsize:
diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py
index e2f3ecda2709e413314cfa0036af861ab1f8dbfd..14383e54f21a7d0229701226aa9e23d4d827d172 100644
--- a/loopy/target/opencl.py
+++ b/loopy/target/opencl.py
@@ -642,7 +642,7 @@ class OpenCLCASTBuilder(CFamilyASTBuilder):
         from loopy.target.c import FunctionDeclarationWrapper
         assert isinstance(fdecl, FunctionDeclarationWrapper)
         if not codegen_state.is_entrypoint:
-            # auxiliary kernels need not mention opencl speicific qualifiers
+            # auxiliary kernels need not mention opencl specific qualifiers
             # for a functions signature
             return preambles, fdecl
 
@@ -908,7 +908,7 @@ class OpenCLCASTBuilder(CFamilyASTBuilder):
 # }}}
 
 
-# {{{ volatile mem acccess target
+# {{{ volatile mem access target
 
 class VolatileMemExpressionToOpenCLCExpressionMapper(
         ExpressionToOpenCLCExpressionMapper):
diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py
index ec702e39be7b1de64bda9c08be4788c5c334d409..ecaea9b57b27895eb30e175025fb7a19a08593eb 100644
--- a/loopy/target/pyopencl.py
+++ b/loopy/target/pyopencl.py
@@ -324,7 +324,7 @@ class ExpressionToPyOpenCLCExpressionMapper(ExpressionToOpenCLCExpressionMapper)
                 # -ffp-contract=fast which is the default for PTX codegen, but
                 # for some unknown reason, clang fails to see the FMAs.
                 #
-                # We need to do this only for complex as we haev temporaries
+                # We need to do this only for complex as we have temporaries
                 # only in complex. For reals, the code generated looks like
                 #
                 #    res = c + a * b
@@ -879,7 +879,7 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase):
 
         value_arg_code = generate_value_arg_setup(
                 codegen_state.kernel, regular_arg_names)
-        arry_arg_code = generate_array_arg_setup(
+        array_arg_code = generate_array_arg_setup(
                 codegen_state.kernel, regular_arg_names)
 
         if struct_overflow_arg_names:
@@ -952,7 +952,7 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase):
                    "argument count of the kernel ({_lpy_knl.num_args}).'"),
             Line(),
             value_arg_code,
-            arry_arg_code,
+            array_arg_code,
             overflow_args_code,
             Assign("_lpy_evt",
                    f"{self.target.pyopencl_module_name}.enqueue_nd_range_kernel("
@@ -1207,7 +1207,7 @@ class PyOpenCLCASTBuilder(OpenCLCASTBuilder):
 # }}}
 
 
-# {{{ volatile mem acccess target
+# {{{ volatile mem access target
 
 class VolatileMemPyOpenCLCASTBuilder(PyOpenCLCASTBuilder):
     def get_expression_to_c_expression_mapper(self, codegen_state):
diff --git a/loopy/transform/array_buffer_map.py b/loopy/transform/array_buffer_map.py
index ec3737233ceff1266baf003e3c5e2278be13b682..7e7b6459ca6a3467fe221fdf673180afb2e89585 100644
--- a/loopy/transform/array_buffer_map.py
+++ b/loopy/transform/array_buffer_map.py
@@ -413,17 +413,17 @@ class ArrayToBufferMap(ArrayToBufferMapBase):
                 except_inames=frozenset(self.primed_sweep_inames))
 
         s2s_domain = stor2sweep.domain()
-        s2s_domain, aligned_g_s2s_parm_dom = isl.align_two(
+        s2s_domain, aligned_g_s2s_param_dom = isl.align_two(
                 s2s_domain, global_s2s_par_dom)
 
         arg_restrictions = (
-                aligned_g_s2s_parm_dom
+                aligned_g_s2s_param_dom
                 .eliminate(dim_type.set, 0,
-                    aligned_g_s2s_parm_dom.dim(dim_type.set))
+                    aligned_g_s2s_param_dom.dim(dim_type.set))
                 .remove_divs())
 
         return (arg_restrictions & s2s_domain).is_subset(
-                aligned_g_s2s_parm_dom)
+                aligned_g_s2s_param_dom)
 
 
 class NoOpArrayToBufferMap(ArrayToBufferMapBase):
diff --git a/loopy/transform/callable.py b/loopy/transform/callable.py
index 1fe40a37034ce0a3c95e9dcf86e42fd9e8ae9d1c..d683cbd29b2aff05434e09e01d7e37d6c7617476 100644
--- a/loopy/transform/callable.py
+++ b/loopy/transform/callable.py
@@ -102,7 +102,7 @@ def merge(translation_units):
                 if (prg_i.callables_table[clbl_name]
                         != prg_j.callables_table[clbl_name]):
                     # TODO: generate unique names + rename for the colliding
-                    # callables (if entrypoints are colliding that shuold still
+                    # callables (if entrypoints are colliding that should still
                     # be an error)
                     raise NotImplementedError("Translation units to be merged"
                                               " must have different callable names"
diff --git a/loopy/transform/data.py b/loopy/transform/data.py
index 088d896431f4ceaec28f01bdbfff75693c9dae9c..ddfc9b5e8241ca35b1d5d3c616ed667aa975a4a6 100644
--- a/loopy/transform/data.py
+++ b/loopy/transform/data.py
@@ -252,7 +252,7 @@ def add_prefetch_for_single_kernel(kernel, callables_table, var_name,
                     footprint_subscripts, var_descr)
 
     # Our _not_provided is actually a different object from the one in the
-    # precompute module, but precompute acutally uses that to adjust its
+    # precompute module, but precompute actually uses that to adjust its
     # warning message.
 
     from loopy.transform.precompute import precompute_for_single_kernel
@@ -653,7 +653,7 @@ def set_argument_order(kernel, arg_names):
     :arg arg_names: A list (or comma-separated string) or argument
         names. All arguments must be in this list.
     """
-    # FIXME: @inducer -- shoulld this only affect the root kernel, or should it
+    # FIXME: @inducer -- should this only affect the root kernel, or should it
     # take a within?
 
     if isinstance(arg_names, str):
diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py
index bb828221ffef9cbc58e8cb2a900530e5cab0a88f..6c2688d9015d47582a8aed73cc5033103970be04 100644
--- a/loopy/transform/diff.py
+++ b/loopy/transform/diff.py
@@ -154,7 +154,7 @@ class LoopyDiffMapper(DifferentiationMapper, RuleAwareIdentityMapper):
         dc = self.diff_context
 
         if expr.function.name in dc.kernel.substitutions:
-            # FIXME: Deal with subsitution rules
+            # FIXME: Deal with substitution rules
             # Need to use chain rule here, too.
             raise NotImplementedError("substitution rules in differentiation")
         else:
@@ -382,8 +382,8 @@ def diff_kernel(kernel, diff_outputs, by, diff_iname_prefix="diff_i",
 
     assert isinstance(kernel, LoopKernel)
 
-    from loopy.kernel.creation import apply_single_writer_depencency_heuristic
-    kernel = apply_single_writer_depencency_heuristic(kernel, warn_if_used=True)
+    from loopy.kernel.creation import apply_single_writer_dependency_heuristic
+    kernel = apply_single_writer_dependency_heuristic(kernel, warn_if_used=True)
 
     if isinstance(diff_outputs, str):
         diff_outputs = [
diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 18df3dae48dbc05b151046380de6b720456714c4..b835373da9d2b67894eea4e49f4f4d8b5067c580 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -1540,7 +1540,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None):
                 break
 
         if not found:
-            raise LoopyError("invlaid tag kind: %s" % kind)
+            raise LoopyError("invalid tag kind: %s" % kind)
 
     from loopy.match import parse_match
     match = parse_match(insn_match)
@@ -2265,7 +2265,7 @@ def add_inames_for_unused_hw_axes(kernel, within=None):
     Current limitations:
 
     * Only one iname in the kernel may be tagged with each of the unused hw axes.
-    * Occurence of an ``l.auto`` tag when an instruction is missing one of the
+    * Occurrence of an ``l.auto`` tag when an instruction is missing one of the
       local hw axes.
 
     :arg within: An instruction match as understood by
diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index 62991662861050294560ce8a2110e3e2fd078325..374587da57bc4857d498a730727510f711aab382 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -425,7 +425,7 @@ def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False,
     if not nosync_to_add and not empty_ok:
         raise LoopyError("No nosync annotations were added as a result "
                 "of this call. add_nosync will (by default) only add them to "
-                "accompany existing depencies or group exclusions. Maybe you want "
+                "accompany existing dependencies or group exclusions. Maybe you want "
                 "to pass force=True?")
 
     new_instructions = list(kernel.instructions)
diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py
index b5c7aa7a164a7013fff86d3892cc4fb212d76e96..422d22568287b46afa5f27dfe35288c0df0068a9 100644
--- a/loopy/transform/subst.py
+++ b/loopy/transform/subst.py
@@ -327,8 +327,8 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
     # {{{ establish the relevant definition of lhs_name for each usage site
 
     dep_kernel = expand_subst(kernel)
-    from loopy.kernel.creation import apply_single_writer_depencency_heuristic
-    dep_kernel = apply_single_writer_depencency_heuristic(dep_kernel)
+    from loopy.kernel.creation import apply_single_writer_dependency_heuristic
+    dep_kernel = apply_single_writer_dependency_heuristic(dep_kernel)
     assigning_insn_ids = {insn.id
                           for insn in dep_kernel.instructions
                           if lhs_name in insn.assignee_var_names()}
@@ -354,7 +354,7 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
 
         if len(rel_def_ids) > 1:
             raise LoopyError("more than one write to '%s' found in "
-                    "depdendencies of '%s'--definition cannot be resolved "
+                    "dependencies of '%s'--definition cannot be resolved "
                     "(writer instructions ids: %s)"
                     % (lhs_name, usage_insn_id, ", ".join(rel_def_ids)))
 
@@ -433,7 +433,7 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
         for i in indices:
             if not isinstance(i, Variable):
                 raise LoopyError("In defining instruction '%s': "
-                        "asignee index '%s' is not a plain variable. "
+                        "assignee index '%s' is not a plain variable. "
                         "Perhaps use loopy.affine_map_inames() "
                         "to perform substitution." % (def_id, i))
 
diff --git a/loopy/translation_unit.py b/loopy/translation_unit.py
index c0d1b0b0545436b8dfc7b45249f5509f415db12c..76e795b7601f0734180b216ffbb45dd57e33c60c 100644
--- a/loopy/translation_unit.py
+++ b/loopy/translation_unit.py
@@ -196,7 +196,7 @@ class TranslationUnit:
     .. attribute:: func_id_to_in_knl_callables_mappers
 
         A :class:`frozenset` of functions of the signature ``(target:
-        TargetBase, function_indentifier: str)`` that returns an instance
+        TargetBase, function_identifier: str)`` that returns an instance
         of :class:`loopy.kernel.function_interface.InKernelCallable` or *None*.
 
     .. automethod:: executor
diff --git a/loopy/types.py b/loopy/types.py
index 143715a3964dd31bceaf126b84d384d53fdcf6e0..a837d1c46e2688469c3ad7bfa1dc04361b279cbc 100644
--- a/loopy/types.py
+++ b/loopy/types.py
@@ -165,7 +165,7 @@ class AtomicNumpyType(NumpyType, AtomicType):
 class OpaqueType(LoopyType):
     """An opaque data type is truly opaque - it has no allocations, no
     temporaries of that type, etc. The only thing allowed is to be pass in
-    through one ValueArg and go out to another. It is introduced to accomodate
+    through one ValueArg and go out to another. It is introduced to accommodate
     functional calls to external libraries.
     """
     def __init__(self, name: str) -> None:
diff --git a/proto-tests/test_tim.py b/proto-tests/test_tim.py
index 7ee30313c20713bc9a5d7732ce36d504934c8783..eb8125cdb7473921f66d212366368caee587394f 100644
--- a/proto-tests/test_tim.py
+++ b/proto-tests/test_tim.py
@@ -190,7 +190,7 @@ def test_tim3d(ctx_factory):
     knl = lp.split_iname(knl, "k", n, inner_tag="l.2")  # , slabs=(0, 1))
     knl = lp.split_iname(knl, "i", n, inner_tag="l.0")  # , slabs=(0, 1))
 
-#    knl = lp.tag_inames(knl, dict(k_nner="unr"))
+#    knl = lp.tag_inames(knl, dict(k_inner="unr"))
 
     knl = lp.tag_inames(knl, dict(o="unr"))
     knl = lp.tag_inames(knl, dict(m="unr"))
diff --git a/pyproject.toml b/pyproject.toml
index 6f7b977b686c07bc34771a345f151a361d1ba07d..9dadd57f557f66b42e8044fad9a6114e39184823 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -90,3 +90,29 @@ module = [
     "IPython.*",
 ]
 ignore_missing_imports = true
+
+[tool.typos.default]
+extend-ignore-re = [
+  "(?Rm)^.*(#|//)\\s*spellchecker:\\s*disable-line$"
+]
+
+[tool.typos.default.extend-words]
+# like the numpy function, array range
+arange = "arange"
+# N-Dimensional
+ND = "ND"
+# used for 'diff_output'
+dout = "dout"
+# an element-wise slice of array u
+ue = "ue"
+# used in an ordering context, "ab" / "ba"
+ba = "ba"
+
+"dependees" = "dependees"
+
+[tool.typos.files]
+extend-exclude = [
+  "loopy/target/c/compyte",
+  "notes/*/*.eps",
+]
+
diff --git a/test/test_apps.py b/test/test_apps.py
index 207bc7ee2045bfa0318ffb46ff96ea04a7d401fd..c4cffaee1d7fdc8c700615bfbfcd45fc74b38dcd 100644
--- a/test/test_apps.py
+++ b/test/test_apps.py
@@ -324,7 +324,7 @@ def test_rob_stroud_bernstein_full():
 def test_stencil(ctx_factory):
     ctx = ctx_factory()
 
-    # n=32 causes corner case behavior in size calculations for temprorary (a
+    # n=32 causes corner case behavior in size calculations for temporary (a
     # non-unifiable, two-constant-segments PwAff as the base index)
 
     n = 256
diff --git a/test/test_c_execution.py b/test/test_c_execution.py
index e703d941535f26c57e2dc29c2f279d5dd77c69ec..6208b9aed3fe4fb64e1fb1186bef696a45837546 100644
--- a/test/test_c_execution.py
+++ b/test/test_c_execution.py
@@ -95,17 +95,17 @@ def test_c_target_strides_nonsquare():
     from loopy.target.c import ExecutableCTarget
 
     def __get_kernel(order="C"):
-        indicies = ["i", "j", "k"]
-        sizes = tuple(np.random.randint(1, 11, size=len(indicies)))
+        indices = ["i", "j", "k"]
+        sizes = tuple(np.random.randint(1, 11, size=len(indices)))
         # create domain strings
         domain_template = "{{ [{iname}]: 0 <= {iname} < {size} }}"
         domains = []
-        for idx, size in zip(indicies, sizes):
+        for idx, size in zip(indices, sizes):
             domains.append(domain_template.format(
                 iname=idx,
                 size=size))
         statement = "out[{indexed}] = 2 * a[{indexed}]".format(
-            indexed=", ".join(indicies))
+            indexed=", ".join(indices))
         return lp.make_kernel(
                 domains,
                 statement,
@@ -142,17 +142,17 @@ def test_c_optimizations():
     from loopy.target.c import ExecutableCTarget
 
     def __get_kernel(order="C"):
-        indicies = ["i", "j", "k"]
-        sizes = tuple(np.random.randint(1, 11, size=len(indicies)))
+        indices = ["i", "j", "k"]
+        sizes = tuple(np.random.randint(1, 11, size=len(indices)))
         # create domain strings
         domain_template = "{{ [{iname}]: 0 <= {iname} < {size} }}"
         domains = []
-        for idx, size in zip(indicies, sizes):
+        for idx, size in zip(indices, sizes):
             domains.append(domain_template.format(
                 iname=idx,
                 size=size))
         statement = "out[{indexed}] = 2 * a[{indexed}]".format(
-            indexed=", ".join(indicies))
+            indexed=", ".join(indices))
         return lp.make_kernel(
                 domains,
                 statement,
diff --git a/test/test_callables.py b/test/test_callables.py
index d58247a75dc7c0d7eae79f322f853cb79815b150..44a94e43a0717ac575a145afc7caec2d501ae763 100644
--- a/test/test_callables.py
+++ b/test/test_callables.py
@@ -1397,8 +1397,8 @@ def test_inline_deps(ctx_factory):
     prg = lp.merge([parent_knl, child_knl])
     inlined = lp.inline_callable_kernel(prg, "func")
 
-    from loopy.kernel.creation import apply_single_writer_depencency_heuristic
-    apply_single_writer_depencency_heuristic(inlined, error_if_used=True)
+    from loopy.kernel.creation import apply_single_writer_dependency_heuristic
+    apply_single_writer_dependency_heuristic(inlined, error_if_used=True)
 
     _evt, (a_dev,) = inlined(cq)
 
diff --git a/test/test_loopy.py b/test/test_loopy.py
index e9aa47ef4bfeffadd948ede094b4d817422d897e..34310171fc7823f5dc52666527a32e93a3148d35 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2306,7 +2306,7 @@ def test_barrier_in_overridden_get_grid_size_expanded_kernel():
 
     from testlib import GridOverride
 
-    # artifically expand via overridden_get_grid_sizes_for_insn_ids
+    # artificially expand via overridden_get_grid_sizes_for_insn_ids
     knl = prog["loopy_kernel"]
     knl = knl.copy(overridden_get_grid_sizes_for_insn_ids=GridOverride(
         knl.copy(), vecsize))