diff --git a/MEMO b/MEMO
index f4e5c34e48e62d5c951d01fcb212a9117e361def..340a3da17a59c67047f5386764a62bd3122d37e7 100644
--- a/MEMO
+++ b/MEMO
@@ -10,7 +10,7 @@ Things to consider
- Depedencies are pointwise for shared loop dimensions
and global over non-shared ones (between dependent and ancestor)
-- multiple insns could fight over which iname gets local axis 0
+- multiple stmts could fight over which iname gets local axis 0
-> complicated optimization problem
- Every loop in loopy is opened at most once.
@@ -35,7 +35,7 @@ Things to consider
- Loopy as a data model for implementing custom rewritings
- We won't generate WAW barrier-needing dependencies
- from one instruction to itself.
+ from one statement to itself.
- Loopy is semi-interactive.
@@ -45,7 +45,7 @@ Things to consider
- Dependency on non-local global writes is ill-formed
-- No substitution rules allowed on lhs of insns
+- No substitution rules allowed on lhs of stmts
To-do
^^^^^
@@ -78,7 +78,7 @@ Fixes:
old inames may still be around, so the rewrite may or may not have to be
applied.
-- Group instructions by dependency/inames for scheduling, to
+- Group statements by dependency/inames for scheduling, to
increase sched. scalability
- What if no universally valid precompute base index expression is found?
@@ -109,7 +109,7 @@ Future ideas
- Check for unordered (no-dependency) writes to the same location
-- Vanilla C string instructions?
+- Vanilla C string statements?
- Barriers for data exchanged via global vars?
@@ -183,7 +183,7 @@ Dealt with
- Add dependencies after the fact
-- Scalar insn priority
+- Scalar stmt priority
- ScalarArg is a bad name
-> renamed to ValueArg
@@ -209,8 +209,8 @@ Dealt with
-> pending better prefetch spec
- Prefetch by sample access
-- How is intra-instruction ordering of ILP loops going to be determined?
- (taking into account that it could vary even per-instruction?)
+- How is intra-statement ordering of ILP loops going to be determined?
+ (taking into account that it could vary even per-statement?)
- Sharing of checks across ILP instances
@@ -257,7 +257,7 @@ Dealt with
property.
- Just touching a variable written to by a non-idempotent
- instruction makes that instruction also not idempotent
+ statement makes that statement also not idempotent
-> Idempotent renamed to boostable.
-> Done.
@@ -274,7 +274,7 @@ Dealt with
- Slab decomposition for ILP
-> I don't think that's possible.
-- It is hard to understand error messages that referred to instructions that
+- It is hard to understand error messages that referred to statements that
are generated during preprocessing.
-> Expose preprocessing to the user so she can inspect the preprocessed
@@ -314,7 +314,7 @@ Dealt with
- Make syntax for iname dependencies
-- make syntax for insn dependencies
+- make syntax for stmt dependencies
- Implement get_problems()
diff --git a/README.rst b/README.rst
index 0e551fbede0460a2e7c76167b54d672afdf81286..f58a75de11cf33c71eab293b60d014b12b233b5a 100644
--- a/README.rst
+++ b/README.rst
@@ -28,7 +28,7 @@ It can capture the following types of optimizations:
* Loopy Unrolling
* Loop tiling with efficient handling of boundary cases
* Prefetching/copy optimizations
-* Instruction level parallelism
+* Statement level parallelism
* and many more
Loopy targets array-type computations, such as the following:
diff --git a/doc/images/dep-graph-correct.svg b/doc/images/dep-graph-correct.svg
index 397cb2d101792ac07b6e5ae8897d41cd0a457f54..0bd743391a7304c3ba5c89559f297acdb43b865a 100644
--- a/doc/images/dep-graph-correct.svg
+++ b/doc/images/dep-graph-correct.svg
@@ -32,15 +32,15 @@
out[(j, i)] <- a[(i, j)]
-
-insn
-
+
+stmt
+
out[(ii, jj)] <- 2*out[(ii, jj)]
-
-transpose->insn
+
+transpose->stmt
diff --git a/doc/images/dep-graph-incorrect.svg b/doc/images/dep-graph-incorrect.svg
index 363080aef591a2d0ccdb4d0a6ac63520f5bf43df..d072248afc0f1b17ced03f470403feacb1e2d41f 100644
--- a/doc/images/dep-graph-incorrect.svg
+++ b/doc/images/dep-graph-incorrect.svg
@@ -24,15 +24,15 @@
out[(j, i)] <- a[(i, j)]
-
-insn
-
+
+stmt
+
out[(i, j)] <- 2*out[(i, j)]
-
-transpose->insn
+
+transpose->stmt
diff --git a/doc/images/dep-graph-nesting.svg b/doc/images/dep-graph-nesting.svg
index 72cb9c4632eb5951b77d4ac1e21c59b6294af150..a50ca0509604518dc19130dcccd7e73bd24a883d 100644
--- a/doc/images/dep-graph-nesting.svg
+++ b/doc/images/dep-graph-nesting.svg
@@ -17,9 +17,9 @@
i
-
-insn
-
+
+stmt
+
a[(i, j)] <- 0
diff --git a/doc/misc.rst b/doc/misc.rst
index 9db3b85a7d96c9ccf56592bcefb2b8639984f4f8..4dba9c7c1c03febf6fde12a4dedc899dcc15a1f9 100644
--- a/doc/misc.rst
+++ b/doc/misc.rst
@@ -138,11 +138,11 @@ This example is included in the :mod:`loopy` distribution as
What this does is find nearby "centers" satisfying some criteria
for an array of points ("targets").
-Specifying dependencies for groups of instructions is cumbersome. Help?
+Specifying dependencies for groups of statements is cumbersome. Help?
-----------------------------------------------------------------------
-You can now specify instruction ID prefixes and dependencies for groups
-of instructions, like this::
+You can now specify statement ID prefixes and dependencies for groups
+of statements, like this::
with {id_prefix=init_m}
<> m[0] = ...
@@ -253,7 +253,7 @@ This list is always growing, but here are a few pointers:
Separated array axes must have a fixed size. (See either
:func:`loopy.split_array_axis`.)
-* Realization of Instruction-level parallelism
+* Realization of Statement-level parallelism
Use :func:`loopy.tag_inames` with the ``"ilp"`` tag.
ILP loops must have a fixed size. (See either
@@ -284,7 +284,7 @@ This list is always growing, but here are a few pointers:
Uh-oh. I got a scheduling error. Any hints?
-------------------------------------------
-* Make sure that dependencies between instructions are as
+* Make sure that dependencies between statements are as
you intend.
Use :func:`loopy.show_dependency_graph` to check.
@@ -304,7 +304,7 @@ Uh-oh. I got a scheduling error. Any hints?
* Make sure that your loops are correctly nested.
- Print the kernel to make sure all instructions are within
+ Print the kernel to make sure all statements are within
the set of inames you intend them to be in.
* One iname is one for loop.
diff --git a/doc/ref_creation.rst b/doc/ref_creation.rst
index 92eff09c9e3ecacfd8bb9030a9e4b9f002fefc71..9cc02be742d3618df3f3190dafdde3a2c9678076 100644
--- a/doc/ref_creation.rst
+++ b/doc/ref_creation.rst
@@ -6,7 +6,7 @@
Reference: Creating Kernels
===========================
-From Loop Domains and Instructions
+From Loop Domains and Statements
----------------------------------
.. autofunction:: make_kernel
diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst
index 3f01b0764f71e9ce2de86a66cc71f56473a7dc9f..cbf881354d410ae073ea30674b6456d0c684702d 100644
--- a/doc/ref_kernel.rst
+++ b/doc/ref_kernel.rst
@@ -17,7 +17,7 @@ Example::
A kernel's iteration domain is given by a list of :class:`islpy.BasicSet`
instances (which parametrically represent multi-dimensional sets of
tuples of integers). They define the integer values of the loop variables
-for which instructions (see below) will be executed.
+for which statements (see below) will be executed.
It is written in :ref:`isl-syntax`. :mod:`loopy` calls the loop variables
*inames*. In this case, *i* is the sole iname. The loop
domain is given as a conjunction of affine equality
@@ -46,7 +46,7 @@ inside of the 'l' loop.
The idea is that domains form a forest (a collection of trees), and a
"sub-forest" is extracted that covers all the inames for each
-instruction. Each individual sub-tree is then checked for branching,
+statement. Each individual sub-tree is then checked for branching,
which is ill-formed. It is declared ill-formed because intersecting, in
the above case, the l, i, and j domains could result in restrictions from the
i domain affecting the j domain by way of how i affects l--which would
@@ -59,7 +59,7 @@ Inames
Loops are (by default) entered exactly once. This is necessary to preserve
dependency semantics--otherwise e.g. a fetch could happen inside one loop nest,
-and then the instruction using that fetch could be inside a wholly different
+and then the statement using that fetch could be inside a wholly different
loop nest.
.. _isl-syntax:
@@ -134,7 +134,7 @@ Tag Meaning
``"l.N"`` Local (intra-group) axis N ("local")
``"g.N"`` Group-number axis N ("group")
``"unr"`` Unroll
-``"ilp"`` | ``"ilp.unr"`` Unroll using instruction-level parallelism
+``"ilp"`` | ``"ilp.unr"`` Unroll using statement-level parallelism
``"ilp.seq"`` Realize parallel iname as innermost loop
``"like.INAME"`` Can be used when tagging inames to tag like another
``"unused.g"`` | ``"unused.l"`` Can be to tag as the next unused group/local axis
@@ -147,18 +147,18 @@ Tag Meaning
* Restricts loops to be innermost
* Duplicates reduction storage for any reductions nested around ILP usage
* Causes a loop (unrolled or not) to be opened/generated for each
- involved instruction
+ involved statement
.. }}}
-.. _instructions:
+.. _statements:
-Instructions
+Statements
------------
.. {{{
-.. autoclass:: InstructionBase
+.. autoclass:: StatementBase
.. _assignments:
@@ -172,18 +172,18 @@ Assignment objects
Textual Assignment Syntax
^^^^^^^^^^^^^^^^^^^^^^^^^
-The general syntax of an instruction is a simple assignment::
+The general syntax of an statement is a simple assignment::
LHS[i,j,k] = EXPRESSION
Several extensions of this syntax are defined, as discussed below. They
may be combined freely.
-You can also use an instruction to declare a new temporary variable. (See
+You can also use an statement to declare a new temporary variable. (See
:ref:`temporaries`.) See :ref:`types` for what types are acceptable. If the
``LHS`` has a subscript, bounds on the indices are inferred (which must be
constants at the time of kernel creation) and the declared temporary is
-created as an array. Instructions declaring temporaries have the following
+created as an array. Statements declaring temporaries have the following
form::
LHS[i,j,k] = EXPRESSION
@@ -193,31 +193,31 @@ automatically. This uses the following syntax::
<> LHS[i,j,k] = EXPRESSION
-Lastly, each instruction may optionally have a number of attributes
+Lastly, each statement may optionally have a number of attributes
specified, using the following format::
LHS[i,j,k] = EXPRESSION {attr1,attr2=value1:value2}
These are usually key-value pairs. The following attributes are recognized:
-* ``id=value`` sets the instruction's identifier to ``value``. ``value``
+* ``id=value`` sets the statement's identifier to ``value``. ``value``
must be unique within the kernel. This identifier is used to refer to the
- instruction after it has been created, such as from ``dep`` attributes
+ statement after it has been created, such as from ``dep`` attributes
(see below) or from :mod:`context matches `.
-* ``id_prefix=value`` also sets the instruction's identifier, however
+* ``id_prefix=value`` also sets the statement's identifier, however
uniqueness is ensured by loopy itself, by appending further components
(often numbers) to the given ``id_prefix``.
-* ``inames=i:j:k`` forces the instruction to reside within the loops over
+* ``inames=i:j:k`` forces the statement to reside within the loops over
:ref:`inames` ``i``, ``j`` and ``k`` (and only those).
.. note::
- The default for the inames that the instruction depends on is
- the inames used in the instruction itself plus the common
+ The default for the inames that the statement depends on is
+ the inames used in the statement itself plus the common
subset of inames shared by writers of all variables read by the
- instruction.
+ statement.
You can add a plus sign ("``+``") to the front of this option
value to indicate that you would like the inames you specify here
@@ -232,9 +232,9 @@ These are usually key-value pairs. The following attributes are recognized:
This is a shortcut for calling :func:`loopy.duplicate_inames` later
(once the kernel is created).
-* ``dep=id1:id2`` creates a dependency of this instruction on the
- instructions with identifiers ``id1`` and ``id2``. The meaning of this
- dependency is that the code generated for this instruction is required to
+* ``dep=id1:id2`` creates a dependency of this statement on the
+ statements with identifiers ``id1`` and ``id2``. The meaning of this
+ dependency is that the code generated for this statement is required to
appear textually after all of these dependees' generated code.
Identifiers here are allowed to be wildcards as defined by the Python
@@ -246,14 +246,14 @@ These are usually key-value pairs. The following attributes are recognized:
Since specifying all possible dependencies is cumbersome and
error-prone, :mod:`loopy` employs a heuristic to automatically find
dependencies. Specifically, :mod:`loopy` will automatically add
- a dependency to an instruction reading a variable if there is
- exactly one instruction writing that variable. ("Variable" here may
+ a dependency to an statement reading a variable if there is
+ exactly one statement writing that variable. ("Variable" here may
mean either temporary variable or kernel argument.)
If each variable in a kernel is only written once, then this
heuristic should be able to compute all required dependencies.
- Conversely, if a variable is written by two different instructions,
+ Conversely, if a variable is written by two different statements,
all ordering around that variable needs to be specified explicitly.
It is recommended to use :func:`get_dot_dependency_graph` to
visualize the dependency graph of possible orderings.
@@ -262,14 +262,14 @@ These are usually key-value pairs. The following attributes are recognized:
heuristic and indicate that the specified list of dependencies is
exhaustive.
-* ``dep_query=...`` provides an alternative way of specifying instruction
+* ``dep_query=...`` provides an alternative way of specifying statement
dependencies. The given string is parsed as a match expression object by
:func:`loopy.match.parse_match`. Upon kernel generation, this match
- expression is used to match instructions in the kernel and add them as
+ expression is used to match statements in the kernel and add them as
dependencies.
* ``nosync=id1:id2`` prescribes that no barrier synchronization is necessary
- for the instructions with identifiers ``id1`` and ``id2``, even if a
+ for the statements with identifiers ``id1`` and ``id2``, even if a
dependency chain exists and variables are accessed in an apparently racy
way.
@@ -287,8 +287,8 @@ These are usually key-value pairs. The following attributes are recognized:
* `any`
As an example, ``nosync=id1@local:id2@global`` prescribes that no local
- synchronization is needed with instruction ``id1`` and no global
- synchronization is needed with instruction ``id2``.
+ synchronization is needed with statement ``id1`` and no global
+ synchronization is needed with statement ``id2``.
``nosync=id1@any`` has the same effect as ``nosync=id1``.
@@ -296,25 +296,25 @@ These are usually key-value pairs. The following attributes are recognized:
just like ``dep_query`` and ``dep``. As with ``nosync``, ``nosync_query``
accepts an optional `@scope` suffix.
-* ``priority=integer`` sets the instructions priority to the value
- ``integer``. Instructions with higher priority will be scheduled sooner,
+* ``priority=integer`` sets the statements priority to the value
+ ``integer``. Statements with higher priority will be scheduled sooner,
if possible. Note that the scheduler may still schedule a lower-priority
- instruction ahead of a higher-priority one if loop orders or dependencies
+ statement ahead of a higher-priority one if loop orders or dependencies
require it.
-* ``if=variable1:variable2`` Only execute this instruction if all condition
+* ``if=variable1:variable2`` Only execute this statement if all condition
variables (which must be scalar variables) evaluate to ``true`` (as
defined by C).
-* ``tags=tag1:tag2`` Apply tags to this instruction that can then be used
+* ``tags=tag1:tag2`` Apply tags to this statement that can then be used
for :ref:`context-matching`.
-* ``groups=group1:group2`` Make this instruction part of the given
- instruction groups. See :class:`InstructionBase.groups`.
+* ``groups=group1:group2`` Make this statement part of the given
+ statement groups. See :class:`StatementBase.groups`.
-* ``conflicts_grp=group1:group2`` Make this instruction conflict with the
- given instruction groups. See
- :class:`InstructionBase.conflicts_with_groups`.
+* ``conflicts_grp=group1:group2`` Make this statement conflict with the
+ given statement groups. See
+ :class:`StatementBase.conflicts_with_groups`.
* ``atomic`` The update embodied by the assignment is carried out
atomically. See :attr:`Assignment.atomicity` for precise semantics.
@@ -340,15 +340,15 @@ Loopy's expressions are a slight superset of the expressions supported by
TODO: Functions
TODO: Reductions
-Function Call Instructions
+Function Call Statements
^^^^^^^^^^^^^^^^^^^^^^^^^^
-.. autoclass:: CallInstruction
+.. autoclass:: CallStatement
-C Block Instructions
+C Block Statements
^^^^^^^^^^^^^^^^^^^^
-.. autoclass:: CInstruction
+.. autoclass:: CStatement
Atomic Operations
^^^^^^^^^^^^^^^^^
@@ -363,15 +363,15 @@ Atomic Operations
.. autoclass:: AtomicUpdate
-No-Op Instruction
+No-Op Statement
^^^^^^^^^^^^^^^^^
-.. autoclass:: NoOpInstruction
+.. autoclass:: NoOpStatement
-Barrier Instructions
+Barrier Statements
^^^^^^^^^^^^^^^^^^^^
-.. autoclass:: BarrierInstruction
+.. autoclass:: BarrierStatement
.. }}}
diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst
index d293e3ebe998a632bd547f94a67e675ff0592bfb..a3e43c1fb2429248b01fc230f07c4a090faa6418 100644
--- a/doc/ref_transform.rst
+++ b/doc/ref_transform.rst
@@ -59,18 +59,18 @@ Padding Data
.. autofunction:: add_padding
-Manipulating Instructions
+Manipulating Statements
-------------------------
-.. autofunction:: set_instruction_priority
+.. autofunction:: set_statement_priority
.. autofunction:: add_dependency
-.. autofunction:: remove_instructions
+.. autofunction:: remove_statements
-.. autofunction:: replace_instruction_ids
+.. autofunction:: replace_statement_ids
-.. autofunction:: tag_instructions
+.. autofunction:: tag_statements
.. autofunction:: add_nosync
@@ -135,7 +135,7 @@ Setting options
Matching contexts
-----------------
-TODO: Matching instruction tags
+TODO: Matching statement tags
.. automodule:: loopy.match
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index 8b85387259228777f028fb70b1c0cf2efcc2d2ef..20b3610e0dd0522a552f4e762ae8abbeec15b3cd 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -88,7 +88,7 @@ The parts that you see here are the two main components of a loopy kernel:
passed to the kernel by the user that, in this case, determines the
length of the vector being multiplied.
-* The **instructions** to be executed. These are generally scalar
+* The **statements** to be executed. These are generally scalar
assignments between array elements, consisting of a left hand
side and a right hand side. See :ref:`assignments` for the
full syntax of an assignment.
@@ -121,9 +121,9 @@ always see loopy's view of a kernel by printing it.
INAME IMPLEMENTATION TAGS:
i: None
---------------------------------------------------------------------------
- INSTRUCTIONS:
+ STATEMENTS:
for i
- out[i] = 2*a[i] {id=insn}
+ out[i] = 2*a[i] {id=stmt}
end i
---------------------------------------------------------------------------
@@ -132,7 +132,7 @@ than there was in the input. Most of this comes from default values that
loopy assumes to cover common use cases. These defaults can all be
overridden.
-We've seen the domain and the instructions above, and we'll discuss the
+We've seen the domain and the statements above, and we'll discuss the
'iname-to-tag-map' in :ref:`implementing-inames`. The remaining big chunk
of added information is in the 'arguments' section, where we observe the
following:
@@ -307,13 +307,13 @@ that:
``i==17``. Your program is only correct if it produces a valid result
irrespective of this ordering.
-* In addition, there is (by default) no ordering between instructions
- either. In other words, loopy is free to execute the instructions above
+* In addition, there is (by default) no ordering between statements
+ either. In other words, loopy is free to execute the statements above
in any order whatsoever.
Reading the above two rules, you'll notice that our transpose-and-multiply
kernel is incorrect, because it only computes the desired result if the
-first instruction completes before the second one. To fix this, we declare
+first statement completes before the second one. To fix this, we declare
an explicit dependency:
.. doctest::
@@ -327,8 +327,8 @@ an explicit dependency:
... """)
``{id=transpose}`` assigns the identifier *transpose* to the first
-instruction, and ``{dep=transpose}`` declares a dependency of the second
-instruction on the first. Looking at loopy's view of this kernel, we see
+statement, and ``{dep=transpose}`` declares a dependency of the second
+statement on the first. Looking at loopy's view of this kernel, we see
that these dependencies show up there, too:
.. doctest::
@@ -340,14 +340,14 @@ that these dependencies show up there, too:
...
---------------------------------------------------------------------------
DEPENDENCIES: (use loopy.show_dependency_graph to visualize)
- insn : transpose
+ stmt : transpose
---------------------------------------------------------------------------
These dependencies are in a ``dependent : prerequisite`` format that should
be familiar if you have previously dealt with Makefiles. For larger
kernels, these dependency lists can become quite verbose, and there is an
increasing risk that required dependencies are missed. To help catch these,
-loopy can also show an instruction dependency graph, using
+loopy can also show an statement dependency graph, using
:func:`loopy.show_dependency_graph`:
.. image:: images/dep-graph-incorrect.svg
@@ -360,16 +360,16 @@ graph will open in a browser window.
Since manually notating lots of dependencies is cumbersome, loopy has
a heuristic:
- If a variable is written by exactly one instruction, then all
- instructions reading that variable will automatically depend on the
- writing instruction.
+ If a variable is written by exactly one statement, then all
+ statements reading that variable will automatically depend on the
+ writing statement.
The intent of this heuristic is to cover the common case of a
precomputed result being stored and used many times. Generally, these
dependencies are *in addition* to any manual dependencies added via
``{dep=...}``. It is possible (but rare) that the heuristic adds undesired
dependencies. In this case, ``{dep=*...}`` (i.e. a leading asterisk) to
-prevent the heuristic from adding dependencies for this instruction.
+prevent the heuristic from adding dependencies for this statement.
Loops and dependencies
~~~~~~~~~~~~~~~~~~~~~~
@@ -395,7 +395,7 @@ Let us take a look at the generated code for the above kernel:
}
}
-While our requested instruction ordering has been obeyed, something is
+While our requested statement ordering has been obeyed, something is
still not right:
.. doctest::
@@ -404,7 +404,7 @@ still not right:
False
For the kernel to perform the desired computation, *all
-instances* (loop iterations) of the first instruction need to be completed,
+instances* (loop iterations) of the first statement need to be completed,
not just the one for the current values of *(i, j)*.
Dependencies in loopy act *within* the largest common set of shared
@@ -960,7 +960,7 @@ Consider the following example:
a_temp[lid(0)] = a[16 * gid(0) + lid(0)];
acc_k = 0.0f;
}
- barrier(CLK_LOCAL_MEM_FENCE) /* for a_temp (insn_0_k_update depends on insn) */;
+ barrier(CLK_LOCAL_MEM_FENCE) /* for a_temp (stmt_0_k_update depends on stmt) */;
if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
{
for (int k = 0; k <= 15; ++k)
@@ -971,8 +971,8 @@ Consider the following example:
Observe that *a_temp* was automatically placed in local memory, because
it is written in parallel across values of the group-local iname
-*i_inner*. In addition, :mod:`loopy` has emitted a barrier instruction to
-achieve the :ref:`ordering` specified by the instruction dependencies.
+*i_inner*. In addition, :mod:`loopy` has emitted a barrier statement to
+achieve the :ref:`ordering` specified by the statement dependencies.
(The ``priority=10`` attribute was added to make the output of the test
deterministic.)
@@ -1045,7 +1045,7 @@ earlier:
acc_k = 0.0f;
if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
a_fetch[lid(0)] = a[16 * gid(0) + lid(0)];
- barrier(CLK_LOCAL_MEM_FENCE) /* for a_fetch (insn_k_update depends on a_fetch_rule) */;
+ barrier(CLK_LOCAL_MEM_FENCE) /* for a_fetch (stmt_k_update depends on a_fetch_rule) */;
if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
{
for (int k = 0; k <= 15; ++k)
@@ -1108,7 +1108,7 @@ work item:
:mod:`loopy` supports two kinds of barriers:
* *Local barriers* ensure consistency of local memory accesses to items within
- *the same* work group. This synchronizes with all instructions in the work
+ *the same* work group. This synchronizes with all statements in the work
group.
* *Global barriers* ensure consistency of global memory accesses
@@ -1123,7 +1123,7 @@ all work items reach the same barrier, the kernel will hang during execution.
Barrier insertion
~~~~~~~~~~~~~~~~~
-By default, :mod:`loopy` inserts local barriers between two instructions when it
+By default, :mod:`loopy` inserts local barriers between two statements when it
detects that a dependency involving local memory may occur across work items. To
see this in action, take a look at the section on :ref:`local_temporaries`.
@@ -1156,11 +1156,11 @@ this, :mod:`loopy` will complain that global barrier needs to be inserted:
>>> cgr = lp.generate_code_v2(knl)
Traceback (most recent call last):
...
- MissingBarrierError: Dependency 'rotate depends on maketmp' (for variable 'arr') requires synchronization by a global barrier (add a 'no_sync_with' instruction option to state that no synchronization is needed)
+ MissingBarrierError: Dependency 'rotate depends on maketmp' (for variable 'arr') requires synchronization by a global barrier (add a 'no_sync_with' statement option to state that no synchronization is needed)
-The syntax for a inserting a global barrier instruction is
+The syntax for a inserting a global barrier statement is
``... gbarrier``. :mod:`loopy` also supports manually inserting local
-barriers. The syntax for a local barrier instruction is ``... lbarrier``.
+barriers. The syntax for a local barrier statement is ``... lbarrier``.
Saving temporaries across global barriers
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1173,7 +1173,7 @@ of how to use :func:`loopy.save_and_reload_temporaries` which is helpful for
that purpose.
Let us start with an example. Consider the kernel from above with a
-``... gbarrier`` instruction that has already been inserted.
+``... gbarrier`` statement that has already been inserted.
.. doctest::
@@ -1202,7 +1202,7 @@ Here is what happens when we try to generate code for the kernel:
MissingDefinitionError: temporary variable 'tmp' gets used in subkernel 'rotate_v2_0' without a definition (maybe you forgot to call loopy.save_and_reload_temporaries?)
This happens due to the kernel splitting done by :mod:`loopy`. The splitting
-happens when the instruction schedule is generated. To see the schedule, we
+happens when the statement schedule is generated. To see the schedule, we
should call :func:`loopy.get_one_scheduled_kernel`:
>>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl))
@@ -1222,7 +1222,7 @@ should call :func:`loopy.get_one_scheduled_kernel`:
6: RETURN FROM KERNEL rotate_v2_0
---------------------------------------------------------------------------
-As the error message suggests, taking a look at the generated instruction
+As the error message suggests, taking a look at the generated statement
schedule will show that while ``tmp`` is assigned in the first kernel, the
assignment to ``tmp`` is not seen by the second kernel. Because the temporary is
in private memory, it does not persist across calls to device kernels (the same
@@ -1231,13 +1231,13 @@ goes for local temporaries).
:mod:`loopy` provides a function called
:func:`loopy.save_and_reload_temporaries` for the purpose of handling the
task of saving and restoring temporary values across global barriers. This
-function adds instructions to the kernel without scheduling them. That means
+function adds statements to the kernel without scheduling them. That means
that :func:`loopy.get_one_scheduled_kernel` needs to be called one more time to
-put those instructions into the schedule.
+put those statements into the schedule.
>>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl))
>>> knl = lp.save_and_reload_temporaries(knl)
- >>> knl = lp.get_one_scheduled_kernel(knl) # Schedule added instructions
+ >>> knl = lp.get_one_scheduled_kernel(knl) # Schedule added statements
>>> print(knl)
---------------------------------------------------------------------------
KERNEL: rotate_v2
@@ -1461,7 +1461,7 @@ sign that something is amiss:
>>> evt, (out,) = knl(queue, a=a_mat_dev)
Traceback (most recent call last):
...
- WriteRaceConditionWarning: in kernel transpose: instruction 'a_fetch_rule' looks invalid: it assigns to indices based on local IDs, but its temporary 'a_fetch' cannot be made local because a write race across the iname(s) 'j_inner' would emerge. (Do you need to add an extra iname to your prefetch?) (add 'write_race_local(a_fetch_rule)' to silenced_warnings kernel attribute to disable)
+ WriteRaceConditionWarning: in kernel transpose: statement 'a_fetch_rule' looks invalid: it assigns to indices based on local IDs, but its temporary 'a_fetch' cannot be made local because a write race across the iname(s) 'j_inner' would emerge. (Do you need to add an extra iname to your prefetch?) (add 'write_race_local(a_fetch_rule)' to silenced_warnings kernel attribute to disable)
When we ask to see the code, the issue becomes apparent:
@@ -1495,7 +1495,7 @@ Barriers
~~~~~~~~
:mod:`loopy` may infer the need for a barrier when it is not necessary. The
-``no_sync_with`` instruction attribute can be used to resolve this.
+``no_sync_with`` statement attribute can be used to resolve this.
See also :func:`loopy.add_nosync`.
@@ -1868,16 +1868,16 @@ Now to make things more interesting, we'll create a kernel with barriers:
for (int j = 0; j <= 9; ++j)
for (int i = 0; i <= 49; ++i)
{
- barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn rev-depends on insn_0) */;
+ barrier(CLK_LOCAL_MEM_FENCE) /* for c (stmt rev-depends on stmt_0) */;
c[990 * i + 99 * j + lid(0) + 1] = 2 * a[980 * i + 98 * j + lid(0) + 1];
- barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn_0 depends on insn) */;
+ barrier(CLK_LOCAL_MEM_FENCE) /* for c (stmt_0 depends on stmt) */;
e[980 * i + 98 * j + lid(0) + 1] = c[990 * i + 99 * j + 1 + lid(0) + 1] + c[990 * i + 99 * j + -1 + lid(0) + 1];
}
}
}
-In this kernel, when a thread performs the second instruction it uses data
-produced by *different* threads during the first instruction. Because of this,
+In this kernel, when a thread performs the second statement it uses data
+produced by *different* threads during the first statement. Because of this,
barriers are required for correct execution, so loopy inserts them. Now we'll
count the barriers using :func:`loopy.get_synchronization_map`:
diff --git a/examples/fortran/ipython-integration-demo.ipynb b/examples/fortran/ipython-integration-demo.ipynb
index c2b34f1d13f7b4971c0c2e88a0ae5013ed12ee23..27347bd94aaddb5de50ca4d9a90e42ba75b806a7 100644
--- a/examples/fortran/ipython-integration-demo.ipynb
+++ b/examples/fortran/ipython-integration-demo.ipynb
@@ -82,8 +82,8 @@
"INAME IMPLEMENTATION TAGS:\n",
"i: None\n",
"---------------------------------------------------------------------------\n",
- "INSTRUCTIONS:\n",
- "[i] out[i] <- a # insn0\n",
+ "STATEMENTS:\n",
+ "[i] out[i] <- a # stmt0\n",
"---------------------------------------------------------------------------\n"
]
}
@@ -167,8 +167,8 @@
"i_inner: l.0\n",
"i_outer: g.0\n",
"---------------------------------------------------------------------------\n",
- "INSTRUCTIONS:\n",
- "[i_inner,i_outer] out[i_inner + i_outer*128] <- a # insn0\n",
+ "STATEMENTS:\n",
+ "[i_inner,i_outer] out[i_inner + i_outer*128] <- a # stmt0\n",
"---------------------------------------------------------------------------\n"
]
}
diff --git a/examples/python/ispc-stream-harness.py b/examples/python/ispc-stream-harness.py
index fa581d4262e2f06addf81aeaecca5ed2f8f8c8f1..edf49c8aab9d35cb406d7ab65b18b4edb998da63 100644
--- a/examples/python/ispc-stream-harness.py
+++ b/examples/python/ispc-stream-harness.py
@@ -59,10 +59,10 @@ def main():
with open("tasksys.cpp", "r") as ts_file:
tasksys_source = ts_file.read()
- def make_knl(name, insn, vars):
+ def make_knl(name, stmt, vars):
knl = lp.make_kernel(
"{[i]: 0<=i helps find segfaults
- result = Block([printf_insn, result])
+ result = Block([printf_stmt, result])
# }}}
return result
-def generate_call_code(codegen_state, insn):
+def generate_call_code(codegen_state, stmt):
kernel = codegen_state.kernel
# {{{ vectorization handling
if codegen_state.vectorization_info:
- if insn.atomicity:
+ if stmt.atomicity:
raise Unvectorizable("atomic operation")
# }}}
result = codegen_state.ast_builder.emit_multiple_assignment(
- codegen_state, insn)
+ codegen_state, stmt)
# {{{ tracing
@@ -243,11 +243,11 @@ def generate_call_code(codegen_state, insn):
return result
-def generate_c_instruction_code(codegen_state, insn):
+def generate_c_statement_code(codegen_state, stmt):
kernel = codegen_state.kernel
if codegen_state.vectorization_info is not None:
- raise Unvectorizable("C instructions cannot be vectorized")
+ raise Unvectorizable("C statements cannot be vectorized")
body = []
@@ -255,7 +255,7 @@ def generate_c_instruction_code(codegen_state, insn):
from cgen import Initializer, Block, Line
from pymbolic.primitives import Variable
- for name, iname_expr in insn.iname_exprs:
+ for name, iname_expr in stmt.iname_exprs:
if (isinstance(iname_expr, Variable)
and name not in codegen_state.var_subst_map):
# No need, the bare symbol will work
@@ -270,7 +270,7 @@ def generate_c_instruction_code(codegen_state, insn):
if body:
body.append(Line())
- body.extend(Line(l) for l in insn.code.split("\n"))
+ body.extend(Line(l) for l in stmt.code.split("\n"))
return Block(body)
diff --git a/loopy/diagnostic.py b/loopy/diagnostic.py
index 512e4ac8619f33856d0a8ed929de0b574f7da014..1c30de8a35dc215ce05e28138cf1632429133fda 100644
--- a/loopy/diagnostic.py
+++ b/loopy/diagnostic.py
@@ -100,7 +100,7 @@ class MissingDefinitionError(LoopyError):
pass
-class UnscheduledInstructionError(LoopyError):
+class UnscheduledStatementError(LoopyError):
pass
diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py
index e801d09dcf10750ce09af647e0b14f4641fa1fb2..391ec1414e36cb096038195b2c0e11ab33b67dbb 100644
--- a/loopy/frontend/fortran/translator.py
+++ b/loopy/frontend/fortran/translator.py
@@ -113,12 +113,12 @@ class Scope(object):
self.active_loopy_inames = set()
- self.instructions = []
+ self.statements = []
self.temporary_variables = []
self.used_names = set()
- self.previous_instruction_id = None
+ self.previous_statement_id = None
def known_names(self):
return (self.used_names
@@ -205,12 +205,12 @@ class F2LoopyTranslator(FTreeWalkerBase):
self.scope_stack = []
- self.insn_id_counter = 0
+ self.stmt_id_counter = 0
self.condition_id_counter = 0
self.kernels = []
- self.instruction_tags = []
+ self.statement_tags = []
self.conditions = []
self.conditions_data = []
@@ -220,23 +220,23 @@ class F2LoopyTranslator(FTreeWalkerBase):
self.block_nest = []
- def add_expression_instruction(self, lhs, rhs):
+ def add_expression_statement(self, lhs, rhs):
scope = self.scope_stack[-1]
- new_id = intern("insn%d" % self.insn_id_counter)
- self.insn_id_counter += 1
+ new_id = intern("stmt%d" % self.stmt_id_counter)
+ self.stmt_id_counter += 1
from loopy.kernel.data import Assignment
- insn = Assignment(
+ stmt = Assignment(
lhs, rhs,
within_inames=frozenset(
scope.active_loopy_inames),
id=new_id,
predicates=frozenset(self.conditions),
- tags=tuple(self.instruction_tags))
+ tags=tuple(self.statement_tags))
- scope.previous_instruction_id = new_id
- scope.instructions.append(insn)
+ scope.previous_statement_id = new_id
+ scope.statements.append(stmt)
# {{{ map_XXX functions
@@ -413,7 +413,7 @@ class F2LoopyTranslator(FTreeWalkerBase):
rhs = scope.process_expression_for_loopy(self.parse_expr(node, node.expr))
- self.add_expression_instruction(lhs, rhs)
+ self.add_expression_statement(lhs, rhs)
def map_Allocate(self, node):
raise NotImplementedError("allocate")
@@ -464,7 +464,7 @@ class F2LoopyTranslator(FTreeWalkerBase):
from pymbolic import var
cond_var = var(cond_name)
- self.add_expression_instruction(
+ self.add_expression_statement(
cond_var, self.parse_expr(node, node.expr))
cond_expr = cond_var
@@ -646,16 +646,16 @@ class F2LoopyTranslator(FTreeWalkerBase):
if begin_tag_match:
tag = begin_tag_match.group(1)
- if tag in self.instruction_tags:
+ if tag in self.statement_tags:
raise TranslationError("nested begin tag for tag '%s'" % tag)
- self.instruction_tags.append(tag)
+ self.statement_tags.append(tag)
elif end_tag_match:
tag = end_tag_match.group(1)
- if tag not in self.instruction_tags:
+ if tag not in self.statement_tags:
raise TranslationError(
"end tag without begin tag for tag '%s'" % tag)
- self.instruction_tags.remove(tag)
+ self.statement_tags.remove(tag)
elif faulty_loopy_pragma_match is not None:
from warnings import warn
@@ -710,7 +710,7 @@ class F2LoopyTranslator(FTreeWalkerBase):
knl = lp.make_kernel(
sub.index_sets,
- sub.instructions,
+ sub.statements,
kernel_data,
name=sub.subprogram_name,
default_order="F",
diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index bdef1133e5e0a37d66e6d427dba91cc42213b3b1..9005fcbfa74e881c108498b9d3196bf1412b0868 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -109,10 +109,10 @@ class LoopKernel(ImmutableRecordWithoutPickling):
a list of :class:`islpy.BasicSet` instances
representing the :ref:`domain-tree`.
- .. attribute:: instructions
+ .. attribute:: statements
- A list of :class:`InstructionBase` instances, e.g.
- :class:`Assignment`. See :ref:`instructions`.
+ A list of :class:`StatementBase` instances, e.g.
+ :class:`Assignment`. See :ref:`statements`.
.. attribute:: args
@@ -186,7 +186,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
# {{{ constructor
- def __init__(self, domains, instructions, args=[], schedule=None,
+ def __init__(self, domains, statements=None, args=[], schedule=None,
name="loopy_kernel",
preambles=[],
preamble_generators=[],
@@ -213,9 +213,14 @@ class LoopKernel(ImmutableRecordWithoutPickling):
state=kernel_state.INITIAL,
target=None,
- overridden_get_grid_sizes_for_insn_ids=None):
+ overridden_get_grid_sizes_for_stmt_ids=None,
+
+ # compat
+ statements=None,
+ overridden_get_grid_sizes_for_stmt_ids=None,
+ ):
"""
- :arg overridden_get_grid_sizes_for_insn_ids: A callable. When kernels get
+ :arg overridden_get_grid_sizes_for_stmt_ids: A callable. When kernels get
intersected in slab decomposition, their grid sizes shouldn't
change. This provides a way to forward sub-kernel grid size requests.
"""
@@ -224,6 +229,23 @@ class LoopKernel(ImmutableRecordWithoutPickling):
from loopy.kernel.tools import SetOperationCacheManager
cache_manager = SetOperationCacheManager()
+ if statements is not None and statements is not None:
+ raise TypeError("may not specify both statements and statements")
+ elif statements is None and statements is None:
+ raise TypeError(
+ "must specify exactly one of statements and statements")
+ elif statements is not None:
+ statements = statements
+
+ if (overridden_get_grid_sizes_for_stmt_ids is not None
+ and overridden_get_grid_sizes_for_stmt_ids is not None):
+ raise TypeError("may not specify both "
+ "overridden_get_grid_sizes_for_stmt_ids "
+ "and overridden_get_grid_sizes_for_stmt_ids{")
+ elif overridden_get_grid_sizes_for_stmt_ids is not None:
+ overridden_get_grid_sizes_for_stmt_ids = \
+ overridden_get_grid_sizes_for_stmt_ids
+
# {{{ process assumptions
if assumptions is None:
@@ -266,7 +288,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
ImmutableRecordWithoutPickling.__init__(self,
domains=domains,
- instructions=instructions,
+ statements=statements,
args=args,
schedule=schedule,
name=name,
@@ -288,8 +310,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
options=options,
state=state,
target=target,
- overridden_get_grid_sizes_for_insn_ids=(
- overridden_get_grid_sizes_for_insn_ids))
+ overridden_get_grid_sizes_for_stmt_ids=(
+ overridden_get_grid_sizes_for_stmt_ids))
self._kernel_executor_cache = {}
@@ -375,17 +397,17 @@ class LoopKernel(ImmutableRecordWithoutPickling):
def get_var_name_generator(self):
return _UniqueVarNameGenerator(self.all_variable_names())
- def get_instruction_id_generator(self, based_on="insn"):
- used_ids = set(insn.id for insn in self.instructions)
+ def get_statement_id_generator(self, based_on="stmt"):
+ used_ids = set(stmt.id for stmt in self.statements)
return UniqueNameGenerator(used_ids)
- def make_unique_instruction_id(self, insns=None, based_on="insn",
+ def make_unique_statement_id(self, stmts=None, based_on="stmt",
extra_used_ids=set()):
- if insns is None:
- insns = self.instructions
+ if stmts is None:
+ stmts = self.statements
- used_ids = set(insn.id for insn in insns) | extra_used_ids
+ used_ids = set(stmt.id for stmt in stmts) | extra_used_ids
for id_str in generate_unique_names(based_on):
if id_str not in used_ids:
@@ -393,9 +415,9 @@ class LoopKernel(ImmutableRecordWithoutPickling):
def all_group_names(self):
result = set()
- for insn in self.instructions:
- result.update(insn.groups)
- result.update(insn.conflicts_with_groups)
+ for stmt in self.statements:
+ result.update(stmt.groups)
+ result.update(stmt.conflicts_with_groups)
return frozenset(result)
@@ -417,8 +439,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
@property
@memoize_method
- def id_to_insn(self):
- return dict((insn.id, insn) for insn in self.instructions)
+ def id_to_stmt(self):
+ return dict((stmt.id, stmt) for stmt in self.statements)
# }}}
@@ -659,35 +681,35 @@ class LoopKernel(ImmutableRecordWithoutPickling):
return intern_frozenset_of_ids(all_params-all_inames)
@memoize_method
- def all_insn_inames(self):
- """Return a mapping from instruction ids to inames inside which
+ def all_stmt_inames(self):
+ """Return a mapping from statement ids to inames inside which
they should be run.
"""
result = {}
- for insn in self.instructions:
- result[insn.id] = insn.within_inames
+ for stmt in self.statements:
+ result[stmt.id] = stmt.within_inames
return result
@memoize_method
def all_referenced_inames(self):
result = set()
- for inames in six.itervalues(self.all_insn_inames()):
+ for inames in six.itervalues(self.all_stmt_inames()):
result.update(inames)
return result
- def insn_inames(self, insn):
- if isinstance(insn, str):
- insn = self.id_to_insn[insn]
- return insn.within_inames
+ def stmt_inames(self, stmt):
+ if isinstance(stmt, str):
+ stmt = self.id_to_stmt[stmt]
+ return stmt.within_inames
@memoize_method
- def iname_to_insns(self):
+ def iname_to_stmts(self):
result = dict(
(iname, set()) for iname in self.all_inames())
- for insn in self.instructions:
- for iname in self.insn_inames(insn):
- result[iname].add(insn.id)
+ for stmt in self.statements:
+ for iname in self.stmt_inames(stmt):
+ result[iname].add(stmt.id)
return result
@@ -727,31 +749,31 @@ class LoopKernel(ImmutableRecordWithoutPickling):
# {{{ dependency wrangling
@memoize_method
- def recursive_insn_dep_map(self):
- """Returns a :class:`dict` mapping an instruction IDs *a*
- to all instruction IDs it directly or indirectly depends
+ def recursive_stmt_dep_map(self):
+ """Returns a :class:`dict` mapping an statement IDs *a*
+ to all statement IDs it directly or indirectly depends
on.
"""
result = {}
- def compute_deps(insn_id):
+ def compute_deps(stmt_id):
try:
- return result[insn_id]
+ return result[stmt_id]
except KeyError:
pass
- insn = self.id_to_insn[insn_id]
- insn_result = set(insn.depends_on)
+ stmt = self.id_to_stmt[stmt_id]
+ stmt_result = set(stmt.depends_on)
- for dep in list(insn.depends_on):
- insn_result.update(compute_deps(dep))
+ for dep in list(stmt.depends_on):
+ stmt_result.update(compute_deps(dep))
- result[insn_id] = frozenset(insn_result)
- return insn_result
+ result[stmt_id] = frozenset(stmt_result)
+ return stmt_result
- for insn in self.instructions:
- compute_deps(insn.id)
+ for stmt in self.statements:
+ compute_deps(stmt.id)
return result
@@ -762,7 +784,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
@memoize_method
def reader_map(self):
"""
- :return: a dict that maps variable names to ids of insns that read that
+ :return: a dict that maps variable names to ids of stmts that read that
variable.
"""
result = {}
@@ -771,39 +793,39 @@ class LoopKernel(ImmutableRecordWithoutPickling):
set(arg.name for arg in self.args)
| set(six.iterkeys(self.temporary_variables)))
- for insn in self.instructions:
- for var_name in insn.read_dependency_names() & admissible_vars:
- result.setdefault(var_name, set()).add(insn.id)
+ for stmt in self.statements:
+ for var_name in stmt.read_dependency_names() & admissible_vars:
+ result.setdefault(var_name, set()).add(stmt.id)
return result
@memoize_method
def writer_map(self):
"""
- :return: a dict that maps variable names to ids of insns that write
+ :return: a dict that maps variable names to ids of stmts that write
to that variable.
"""
result = {}
- for insn in self.instructions:
- for var_name in insn.assignee_var_names():
- result.setdefault(var_name, set()).add(insn.id)
+ for stmt in self.statements:
+ for var_name in stmt.assignee_var_names():
+ result.setdefault(var_name, set()).add(stmt.id)
return result
@memoize_method
def get_read_variables(self):
result = set()
- for insn in self.instructions:
- result.update(insn.read_dependency_names())
+ for stmt in self.statements:
+ result.update(stmt.read_dependency_names())
return result
@memoize_method
def get_written_variables(self):
return frozenset(
var_name
- for insn in self.instructions
- for var_name in insn.assignee_var_names())
+ for stmt in self.statements
+ for var_name in stmt.assignee_var_names())
@memoize_method
def get_temporary_to_base_storage_map(self):
@@ -902,29 +924,29 @@ class LoopKernel(ImmutableRecordWithoutPickling):
constants_only=True)))
@memoize_method
- def get_grid_sizes_for_insn_ids(self, insn_ids, ignore_auto=False):
+ def get_grid_sizes_for_stmt_ids(self, stmt_ids, ignore_auto=False):
"""Return a tuple (global_size, local_size) containing a grid that
- could accommodate execution of all instructions whose IDs are given
- in *insn_ids*.
+ could accommodate execution of all statements whose IDs are given
+ in *stmt_ids*.
- :arg insn_ids: a :class:`frozenset` of instruction IDs
+ :arg stmt_ids: a :class:`frozenset` of statement IDs
*global_size* and *local_size* are :class:`islpy.PwAff` objects.
"""
- if self.overridden_get_grid_sizes_for_insn_ids:
- return self.overridden_get_grid_sizes_for_insn_ids(
- insn_ids,
+ if self.overridden_get_grid_sizes_for_stmt_ids:
+ return self.overridden_get_grid_sizes_for_stmt_ids(
+ stmt_ids,
ignore_auto=ignore_auto)
- all_inames_by_insns = set()
- for insn_id in insn_ids:
- all_inames_by_insns |= self.insn_inames(insn_id)
+ all_inames_by_stmts = set()
+ for stmt_id in stmt_ids:
+ all_inames_by_stmts |= self.stmt_inames(stmt_id)
- if not all_inames_by_insns <= self.all_inames():
- raise RuntimeError("some inames collected from instructions (%s) "
+ if not all_inames_by_stmts <= self.all_inames():
+ raise RuntimeError("some inames collected from statements (%s) "
"are not present in domain (%s)"
- % (", ".join(sorted(all_inames_by_insns)),
+ % (", ".join(sorted(all_inames_by_stmts)),
", ".join(sorted(self.all_inames()))))
global_sizes = {}
@@ -934,7 +956,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
GroupIndexTag, LocalIndexTag,
AutoLocalIndexTagBase)
- for iname in all_inames_by_insns:
+ for iname in all_inames_by_stmts:
tag = self.iname_to_tag.get(iname)
if isinstance(tag, GroupIndexTag):
@@ -995,18 +1017,18 @@ class LoopKernel(ImmutableRecordWithoutPickling):
return (to_dim_tuple(global_sizes, "global"),
to_dim_tuple(local_sizes, "local", forced_sizes=self.local_sizes))
- def get_grid_sizes_for_insn_ids_as_exprs(self, insn_ids, ignore_auto=False):
+ def get_grid_sizes_for_stmt_ids_as_exprs(self, stmt_ids, ignore_auto=False):
"""Return a tuple (global_size, local_size) containing a grid that
- could accommodate execution of all instructions whose IDs are given
- in *insn_ids*.
+ could accommodate execution of all statements whose IDs are given
+ in *stmt_ids*.
- :arg insn_ids: a :class:`frozenset` of instruction IDs
+ :arg stmt_ids: a :class:`frozenset` of statement IDs
*global_size* and *local_size* are :mod:`pymbolic` expressions
"""
- grid_size, group_size = self.get_grid_sizes_for_insn_ids(
- insn_ids, ignore_auto)
+ grid_size, group_size = self.get_grid_sizes_for_stmt_ids(
+ stmt_ids, ignore_auto)
def tup_to_exprs(tup):
from loopy.symbolic import pw_aff_to_expr
@@ -1016,23 +1038,23 @@ class LoopKernel(ImmutableRecordWithoutPickling):
def get_grid_size_upper_bounds(self, ignore_auto=False):
"""Return a tuple (global_size, local_size) containing a grid that
- could accommodate execution of *all* instructions in the kernel.
+ could accommodate execution of *all* statements in the kernel.
*global_size* and *local_size* are :class:`islpy.PwAff` objects.
"""
- return self.get_grid_sizes_for_insn_ids(
- frozenset(insn.id for insn in self.instructions),
+ return self.get_grid_sizes_for_stmt_ids(
+ frozenset(stmt.id for stmt in self.statements),
ignore_auto=ignore_auto)
def get_grid_size_upper_bounds_as_exprs(self, ignore_auto=False):
"""Return a tuple (global_size, local_size) containing a grid that
- could accommodate execution of *all* instructions in the kernel.
+ could accommodate execution of *all* statements in the kernel.
*global_size* and *local_size* are :mod:`pymbolic` expressions
"""
- return self.get_grid_sizes_for_insn_ids_as_exprs(
- frozenset(insn.id for insn in self.instructions),
+ return self.get_grid_sizes_for_stmt_ids_as_exprs(
+ frozenset(stmt.id for stmt in self.statements),
ignore_auto=ignore_auto)
# }}}
@@ -1058,12 +1080,12 @@ class LoopKernel(ImmutableRecordWithoutPickling):
# {{{ nosync sets
@memoize_method
- def get_nosync_set(self, insn_id, scope):
+ def get_nosync_set(self, stmt_id, scope):
assert scope in ("local", "global")
return frozenset(
- insn_id
- for insn_id, nosync_scope in self.id_to_insn[insn_id].no_sync_with
+ stmt_id
+ for stmt_id, nosync_scope in self.id_to_stmt[stmt_id].no_sync_with
if nosync_scope == scope or nosync_scope == "any")
# }}}
@@ -1094,7 +1116,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
"tags",
"variables",
"rules",
- "instructions",
+ "Statements",
+ "statements",
"Dependencies",
"schedule",
])
@@ -1171,18 +1194,18 @@ class LoopKernel(ImmutableRecordWithoutPickling):
for rule_name in natsorted(six.iterkeys(kernel.substitutions)):
lines.append(str(kernel.substitutions[rule_name]))
- if "instructions" in what:
+ if "Statements" in what or "statements" in what:
lines.extend(sep)
if show_labels:
- lines.append("INSTRUCTIONS:")
+ lines.append("STATEMENTS:")
- from loopy.kernel.tools import stringify_instruction_list
- lines.extend(stringify_instruction_list(kernel))
+ from loopy.kernel.tools import stringify_statement_list
+ lines.extend(stringify_statement_list(kernel))
dep_lines = []
- for insn in kernel.instructions:
- if insn.depends_on:
- dep_lines.append("%s : %s" % (insn.id, ",".join(insn.depends_on)))
+ for stmt in kernel.statements:
+ if stmt.depends_on:
+ dep_lines.append("%s : %s" % (stmt.id, ",".join(stmt.depends_on)))
if "Dependencies" in what and dep_lines:
lines.extend(sep)
@@ -1307,7 +1330,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
hash_fields = (
"domains",
- "instructions",
+ "statements",
"args",
"schedule",
"name",
@@ -1387,6 +1410,21 @@ class LoopKernel(ImmutableRecordWithoutPickling):
# }}}
+ # {{{ "statement" compat goop
+
+ @property
+ def id_to_stmt(self):
+ return self.id_to_stmt
+
+ @property
+ def statements(self):
+ return self.statements
+
+ def get_statement_id_generator(self, based_on="stmt"):
+ return self.get_statement_id_generator(based_on)
+
+ # }}}
+
# }}}
# vim: foldmethod=marker
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index c6618d62f7543fd2bc5461762ed19714e998fc14..b49a7d9a96077530a3f55d6a001110c6692aa68c 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -31,7 +31,7 @@ from pymbolic.mapper import CSECachingMapperMixin
from loopy.tools import intern_frozenset_of_ids
from loopy.symbolic import IdentityMapper, WalkMapper
from loopy.kernel.data import (
- InstructionBase,
+ StatementBase,
MultiAssignmentBase, Assignment,
SubstitutionRule)
from loopy.diagnostic import LoopyError, warn_with_kernel
@@ -73,7 +73,7 @@ WORD_RE = re.compile(r"\b([a-zA-Z0-9_]+)\b")
BRACE_RE = re.compile(r"\$\{([a-zA-Z0-9_]+)\}")
-def expand_defines(insn, defines, single_valued=True):
+def expand_defines(stmt, defines, single_valued=True):
replacements = [()]
processed_defines = set()
@@ -83,7 +83,7 @@ def expand_defines(insn, defines, single_valued=True):
(WORD_RE, r"\b%s\b"),
]:
- for match in find_regexp.finditer(insn):
+ for match in find_regexp.finditer(stmt):
define_name = match.group(1)
# {{{ don't process the same define multiple times
@@ -118,7 +118,7 @@ def expand_defines(insn, defines, single_valued=True):
for rep in replacements]
for rep in replacements:
- rep_value = insn
+ rep_value = stmt
for pattern, val in rep:
rep_value = re.sub(pattern, str(val), rep_value)
@@ -147,16 +147,16 @@ def expand_defines_in_expr(expr, defines):
# }}}
-# {{{ instruction options
+# {{{ statement options
-def get_default_insn_options_dict():
+def get_default_stmt_options_dict():
return {
"depends_on": frozenset(),
"depends_on_is_final": False,
"no_sync_with": frozenset(),
"groups": frozenset(),
"conflicts_with_groups": frozenset(),
- "insn_id": None,
+ "stmt_id": None,
"inames_to_dup": [],
"priority": 0,
"within_inames_is_final": False,
@@ -172,7 +172,7 @@ from collections import namedtuple
_NosyncParseResult = namedtuple("_NosyncParseResult", "expr, scope")
-def parse_insn_options(opt_dict, options_str, assignee_names=None):
+def parse_stmt_options(opt_dict, options_str, assignee_names=None):
if options_str is None:
return opt_dict
@@ -212,10 +212,10 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None):
raise LoopyError("'id' option may not be specified "
"in a 'with' block")
- result["insn_id"] = intern(opt_value)
+ result["stmt_id"] = intern(opt_value)
elif opt_key == "id_prefix" and opt_value is not None:
- result["insn_id"] = UniqueName(opt_value)
+ result["stmt_id"] = UniqueName(opt_value)
elif opt_key == "priority" and opt_value is not None:
if is_with_block:
@@ -354,7 +354,7 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None):
else:
raise ValueError(
- "unrecognized instruction option '%s' "
+ "unrecognized statement option '%s' "
"(maybe a missing/extraneous =value?)"
% opt_key)
@@ -363,7 +363,7 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None):
# }}}
-# {{{ parse one instruction
+# {{{ parse one statement
WITH_OPTIONS_RE = re.compile(
r"^"
@@ -420,10 +420,10 @@ SUBST_RE = re.compile(
r"^\s*(?P.+?)\s*:=\s*(?P.+)\s*$")
-def parse_insn(groups, insn_options):
+def parse_stmt(groups, stmt_options):
"""
- :return: a tuple ``(insn, inames_to_dup)``, where insn is a
- :class:`Assignment`, a :class:`CallInstruction`,
+ :return: a tuple ``(stmt, inames_to_dup)``, where stmt is a
+ :class:`Assignment`, a :class:`CallStatement`,
or a :class:`SubstitutionRule`
and *inames_to_dup* is None or a list of tuples `(old, new)`.
"""
@@ -488,22 +488,22 @@ def parse_insn(groups, insn_options):
temp_var_types = tuple(temp_var_types)
del new_lhs
- insn_options = parse_insn_options(
- insn_options.copy(),
+ stmt_options = parse_stmt_options(
+ stmt_options.copy(),
groups["options"],
assignee_names=assignee_names)
- insn_id = insn_options.pop("insn_id", None)
- inames_to_dup = insn_options.pop("inames_to_dup", [])
+ stmt_id = stmt_options.pop("stmt_id", None)
+ inames_to_dup = stmt_options.pop("inames_to_dup", [])
kwargs = dict(
id=(
- intern(insn_id)
- if isinstance(insn_id, str)
- else insn_id),
- **insn_options)
+ intern(stmt_id)
+ if isinstance(stmt_id, str)
+ else stmt_id),
+ **stmt_options)
- from loopy.kernel.instruction import make_assignment
+ from loopy.kernel.statement import make_assignment
return make_assignment(
lhs, rhs, temp_var_types, **kwargs
), inames_to_dup
@@ -556,47 +556,47 @@ def parse_subst_rule(groups):
# }}}
-# {{{ parse_special_insn
+# {{{ parse_special_stmt
-def parse_special_insn(groups, insn_options):
- insn_options = parse_insn_options(
- insn_options.copy(),
+def parse_special_stmt(groups, stmt_options):
+ stmt_options = parse_stmt_options(
+ stmt_options.copy(),
groups["options"],
assignee_names=())
- del insn_options["atomicity"]
+ del stmt_options["atomicity"]
- insn_id = insn_options.pop("insn_id", None)
- inames_to_dup = insn_options.pop("inames_to_dup", [])
+ stmt_id = stmt_options.pop("stmt_id", None)
+ inames_to_dup = stmt_options.pop("inames_to_dup", [])
kwargs = dict(
id=(
- intern(insn_id)
- if isinstance(insn_id, str)
- else insn_id),
- **insn_options)
+ intern(stmt_id)
+ if isinstance(stmt_id, str)
+ else stmt_id),
+ **stmt_options)
- from loopy.kernel.instruction import NoOpInstruction, BarrierInstruction
- special_insn_kind = groups["kind"]
+ from loopy.kernel.statement import NoOpStatement, BarrierStatement
+ special_stmt_kind = groups["kind"]
- if special_insn_kind == "gbarrier":
- cls = BarrierInstruction
+ if special_stmt_kind == "gbarrier":
+ cls = BarrierStatement
kwargs["kind"] = "global"
- elif special_insn_kind == "lbarrier":
- cls = BarrierInstruction
+ elif special_stmt_kind == "lbarrier":
+ cls = BarrierStatement
kwargs["kind"] = "local"
- elif special_insn_kind == "nop":
- cls = NoOpInstruction
+ elif special_stmt_kind == "nop":
+ cls = NoOpStatement
else:
raise LoopyError(
- "invalid kind of special instruction: '%s'" % special_insn_kind)
+ "invalid kind of special statement: '%s'" % special_stmt_kind)
return cls(**kwargs), inames_to_dup
# }}}
-# {{{ parse_instructions
+# {{{ parse_statements
_PAREN_PAIRS = {
"(": (+1, "("),
@@ -619,184 +619,184 @@ def _count_open_paren_symbols(s):
return result
-def parse_instructions(instructions, defines):
- if isinstance(instructions, str):
- instructions = [instructions]
+def parse_statements(statements, defines):
+ if isinstance(statements, str):
+ statements = [statements]
substitutions = {}
- new_instructions = []
+ new_statements = []
# {{{ pass 1: interning, comments, whitespace
- for insn in instructions:
- if isinstance(insn, SubstitutionRule):
- substitutions[insn.name] = insn
+ for stmt in statements:
+ if isinstance(stmt, SubstitutionRule):
+ substitutions[stmt.name] = stmt
continue
- elif isinstance(insn, InstructionBase):
+ elif isinstance(stmt, StatementBase):
def intern_if_str(s):
if isinstance(s, str):
return intern(s)
else:
return s
- new_instructions.append(
- insn.copy(
- id=intern(insn.id) if isinstance(insn.id, str) else insn.id,
+ new_statements.append(
+ stmt.copy(
+ id=intern(stmt.id) if isinstance(stmt.id, str) else stmt.id,
depends_on=frozenset(intern_if_str(dep)
- for dep in insn.depends_on),
- groups=frozenset(intern(grp) for grp in insn.groups),
+ for dep in stmt.depends_on),
+ groups=frozenset(intern(grp) for grp in stmt.groups),
conflicts_with_groups=frozenset(
- intern(grp) for grp in insn.conflicts_with_groups),
+ intern(grp) for grp in stmt.conflicts_with_groups),
within_inames=frozenset(
- intern(iname) for iname in insn.within_inames),
+ intern(iname) for iname in stmt.within_inames),
))
continue
- elif not isinstance(insn, str):
- raise TypeError("Instructions must be either an Instruction "
+ elif not isinstance(stmt, str):
+ raise TypeError("Statements must be either an Statement "
"instance or a parseable string. got '%s' instead."
- % type(insn))
+ % type(stmt))
- for insn in insn.split("\n"):
- comment_start = insn.find("#")
+ for stmt in stmt.split("\n"):
+ comment_start = stmt.find("#")
if comment_start >= 0:
- insn = insn[:comment_start]
+ stmt = stmt[:comment_start]
- insn = insn.strip()
- if not insn:
+ stmt = stmt.strip()
+ if not stmt:
continue
- new_instructions.append(insn)
+ new_statements.append(stmt)
# }}}
- instructions = new_instructions
- new_instructions = []
+ statements = new_statements
+ new_statements = []
# {{{ pass 2: join-by-paren
- insn_buffer = None
+ stmt_buffer = None
- for i, insn in enumerate(instructions):
- if isinstance(insn, InstructionBase):
- if insn_buffer is not None:
- raise LoopyError("cannot join instruction lines "
+ for i, stmt in enumerate(statements):
+ if isinstance(stmt, StatementBase):
+ if stmt_buffer is not None:
+ raise LoopyError("cannot join statement lines "
"by paren-like delimiters "
- "across InstructionBase instance at instructions index %d"
+ "across StatementBase instance at statements index %d"
% i)
- new_instructions.append(insn)
+ new_statements.append(stmt)
else:
- if insn_buffer is not None:
- insn_buffer = insn_buffer + " " + insn
- if _count_open_paren_symbols(insn_buffer) == 0:
- new_instructions.append(insn_buffer)
- insn_buffer = None
+ if stmt_buffer is not None:
+ stmt_buffer = stmt_buffer + " " + stmt
+ if _count_open_paren_symbols(stmt_buffer) == 0:
+ new_statements.append(stmt_buffer)
+ stmt_buffer = None
else:
- if _count_open_paren_symbols(insn) == 0:
- new_instructions.append(insn)
+ if _count_open_paren_symbols(stmt) == 0:
+ new_statements.append(stmt)
else:
- insn_buffer = insn
+ stmt_buffer = stmt
- if insn_buffer is not None:
- raise LoopyError("unclosed paren-like delimiter at end of 'instructions' "
+ if stmt_buffer is not None:
+ raise LoopyError("unclosed paren-like delimiter at end of 'statements' "
"while attempting to join lines by paren-like delimiters")
# }}}
- instructions = new_instructions
- new_instructions = []
+ statements = new_statements
+ new_statements = []
# {{{ pass 3: defines
- for insn in instructions:
- if isinstance(insn, InstructionBase):
- new_instructions.append(insn)
+ for stmt in statements:
+ if isinstance(stmt, StatementBase):
+ new_statements.append(stmt)
else:
- for sub_insn in expand_defines(insn, defines, single_valued=False):
- new_instructions.append(sub_insn)
+ for sub_stmt in expand_defines(stmt, defines, single_valued=False):
+ new_statements.append(sub_stmt)
# }}}
- instructions = new_instructions
- new_instructions = []
+ statements = new_statements
+ new_statements = []
- inames_to_dup = [] # one for each parsed_instruction
+ inames_to_dup = [] # one for each parsed_statement
# {{{ pass 4: parsing
- insn_options_stack = [get_default_insn_options_dict()]
+ stmt_options_stack = [get_default_stmt_options_dict()]
if_predicates_stack = [
{'predicates': frozenset(),
- 'insn_predicates': frozenset()}]
+ 'stmt_predicates': frozenset()}]
- for insn in instructions:
- if isinstance(insn, InstructionBase):
- local_w_inames = insn_options_stack[-1]["within_inames"]
+ for stmt in statements:
+ if isinstance(stmt, StatementBase):
+ local_w_inames = stmt_options_stack[-1]["within_inames"]
- if insn.within_inames_is_final:
+ if stmt.within_inames_is_final:
if not (
- local_w_inames <= insn.within_inames):
- raise LoopyError("non-parsed instruction '%s' without "
+ local_w_inames <= stmt.within_inames):
+ raise LoopyError("non-parsed statement '%s' without "
"inames '%s' (but with final iname dependencies) "
"found inside 'for'/'with' block for inames "
"'%s'"
- % (insn.id,
- ", ".join(local_w_inames - insn.within_inames),
- insn_options_stack[-1].within_inames))
+ % (stmt.id,
+ ", ".join(local_w_inames - stmt.within_inames),
+ stmt_options_stack[-1].within_inames))
else:
# not final, add inames from current scope
kwargs = {}
- if insn.id is None:
- kwargs["id"] = insn_options_stack[-1]["insn_id"]
+ if stmt.id is None:
+ kwargs["id"] = stmt_options_stack[-1]["stmt_id"]
- insn = insn.copy(
- within_inames=insn.within_inames | local_w_inames,
+ stmt = stmt.copy(
+ within_inames=stmt.within_inames | local_w_inames,
within_inames_is_final=(
# If it's inside a for/with block, then it's
# final now.
bool(local_w_inames)),
depends_on=(
- (insn.depends_on
- | insn_options_stack[-1]["depends_on"])
- if insn_options_stack[-1]["depends_on"] is not None
- else insn.depends_on),
+ (stmt.depends_on
+ | stmt_options_stack[-1]["depends_on"])
+ if stmt_options_stack[-1]["depends_on"] is not None
+ else stmt.depends_on),
tags=(
- insn.tags
- | insn_options_stack[-1]["tags"]),
+ stmt.tags
+ | stmt_options_stack[-1]["tags"]),
predicates=(
- insn.predicates
- | insn_options_stack[-1]["predicates"]),
+ stmt.predicates
+ | stmt_options_stack[-1]["predicates"]),
groups=(
- insn.groups
- | insn_options_stack[-1]["groups"]),
+ stmt.groups
+ | stmt_options_stack[-1]["groups"]),
conflicts_with_groups=(
- insn.groups
- | insn_options_stack[-1]["conflicts_with_groups"]),
+ stmt.groups
+ | stmt_options_stack[-1]["conflicts_with_groups"]),
**kwargs)
- new_instructions.append(insn)
+ new_statements.append(stmt)
inames_to_dup.append([])
del local_w_inames
continue
- with_options_match = WITH_OPTIONS_RE.match(insn)
+ with_options_match = WITH_OPTIONS_RE.match(stmt)
if with_options_match is not None:
- insn_options_stack.append(
- parse_insn_options(
- insn_options_stack[-1],
+ stmt_options_stack.append(
+ parse_stmt_options(
+ stmt_options_stack[-1],
with_options_match.group("options")))
continue
- for_match = FOR_RE.match(insn)
+ for_match = FOR_RE.match(stmt)
if for_match is not None:
- options = insn_options_stack[-1].copy()
+ options = stmt_options_stack[-1].copy()
added_inames = frozenset(
iname.strip()
for iname in for_match.group("inames").split(",")
@@ -809,13 +809,13 @@ def parse_instructions(instructions, defines):
| added_inames)
options["within_inames_is_final"] = True
- insn_options_stack.append(options)
+ stmt_options_stack.append(options)
del options
continue
- if_match = IF_RE.match(insn)
+ if_match = IF_RE.match(stmt)
if if_match is not None:
- options = insn_options_stack[-1].copy()
+ options = stmt_options_stack[-1].copy()
predicate = if_match.group("predicate")
if not predicate:
raise LoopyError("'if' without predicate encountered")
@@ -827,27 +827,27 @@ def parse_instructions(instructions, defines):
options.get("predicates", frozenset())
| frozenset([predicate]))
- insn_options_stack.append(options)
+ stmt_options_stack.append(options)
#add to the if_stack
if_options = options.copy()
- if_options['insn_predicates'] = options["predicates"]
+ if_options['stmt_predicates'] = options["predicates"]
if_predicates_stack.append(if_options)
del options
del predicate
continue
- elif_match = ELIF_RE.match(insn)
- else_match = ELSE_RE.match(insn)
+ elif_match = ELIF_RE.match(stmt)
+ else_match = ELSE_RE.match(stmt)
if elif_match is not None or else_match is not None:
- prev_predicates = insn_options_stack[-1].get(
+ prev_predicates = stmt_options_stack[-1].get(
"predicates", frozenset())
last_if_predicates = if_predicates_stack[-1].get(
"predicates", frozenset())
- insn_options_stack.pop()
+ stmt_options_stack.pop()
if_predicates_stack.pop()
- outer_predicates = insn_options_stack[-1].get(
+ outer_predicates = stmt_options_stack[-1].get(
"predicates", frozenset())
last_if_predicates = last_if_predicates - outer_predicates
@@ -867,8 +867,8 @@ def parse_instructions(instructions, defines):
raise LoopyError("'else' without 'if'/'elif' encountered")
additional_preds = frozenset()
- options = insn_options_stack[-1].copy()
- if_options = insn_options_stack[-1].copy()
+ options = stmt_options_stack[-1].copy()
+ if_options = stmt_options_stack[-1].copy()
from pymbolic.primitives import LogicalNot
options["predicates"] = (
@@ -881,9 +881,9 @@ def parse_instructions(instructions, defines):
)
if_options["predicates"] = additional_preds
#hold on to this for comparison / stack popping later
- if_options["insn_predicates"] = options["predicates"]
+ if_options["stmt_predicates"] = options["predicates"]
- insn_options_stack.append(options)
+ stmt_options_stack.append(options)
if_predicates_stack.append(if_options)
del options
@@ -892,53 +892,53 @@ def parse_instructions(instructions, defines):
continue
- if insn == "end":
- obj = insn_options_stack.pop()
+ if stmt == "end":
+ obj = stmt_options_stack.pop()
#if this object is the end of an if statement
- if obj['predicates'] == if_predicates_stack[-1]["insn_predicates"] and\
- if_predicates_stack[-1]["insn_predicates"]:
+ if obj['predicates'] == if_predicates_stack[-1]["stmt_predicates"] and\
+ if_predicates_stack[-1]["stmt_predicates"]:
if_predicates_stack.pop()
continue
- insn_match = SPECIAL_INSN_RE.match(insn)
- if insn_match is not None:
- insn, insn_inames_to_dup = parse_special_insn(
- insn_match.groupdict(), insn_options_stack[-1])
- new_instructions.append(insn)
- inames_to_dup.append(insn_inames_to_dup)
+ stmt_match = SPECIAL_INSN_RE.match(stmt)
+ if stmt_match is not None:
+ stmt, stmt_inames_to_dup = parse_special_stmt(
+ stmt_match.groupdict(), stmt_options_stack[-1])
+ new_statements.append(stmt)
+ inames_to_dup.append(stmt_inames_to_dup)
continue
- subst_match = SUBST_RE.match(insn)
+ subst_match = SUBST_RE.match(stmt)
if subst_match is not None:
subst = parse_subst_rule(subst_match.groupdict())
substitutions[subst.name] = subst
continue
- insn_match = INSN_RE.match(insn)
- if insn_match is not None:
- insn, insn_inames_to_dup = parse_insn(
- insn_match.groupdict(), insn_options_stack[-1])
- new_instructions.append(insn)
- inames_to_dup.append(insn_inames_to_dup)
+ stmt_match = INSN_RE.match(stmt)
+ if stmt_match is not None:
+ stmt, stmt_inames_to_dup = parse_stmt(
+ stmt_match.groupdict(), stmt_options_stack[-1])
+ new_statements.append(stmt)
+ inames_to_dup.append(stmt_inames_to_dup)
continue
- insn_match = EMPTY_LHS_INSN_RE.match(insn)
- if insn_match is not None:
- insn, insn_inames_to_dup = parse_insn(
- insn_match.groupdict(), insn_options_stack[-1])
- new_instructions.append(insn)
- inames_to_dup.append(insn_inames_to_dup)
+ stmt_match = EMPTY_LHS_INSN_RE.match(stmt)
+ if stmt_match is not None:
+ stmt, stmt_inames_to_dup = parse_stmt(
+ stmt_match.groupdict(), stmt_options_stack[-1])
+ new_statements.append(stmt)
+ inames_to_dup.append(stmt_inames_to_dup)
continue
- raise LoopyError("instruction parse error: %s" % insn)
+ raise LoopyError("statement parse error: %s" % stmt)
- if len(insn_options_stack) != 1:
+ if len(stmt_options_stack) != 1:
raise LoopyError("unbalanced number of 'for'/'with' and 'end' "
"declarations")
# }}}
- return new_instructions, inames_to_dup, substitutions
+ return new_statements, inames_to_dup, substitutions
# }}}
@@ -1050,10 +1050,10 @@ class IndexRankFinder(CSECachingMapperMixin, WalkMapper):
class ArgumentGuesser:
- def __init__(self, domains, instructions, temporary_variables,
+ def __init__(self, domains, statements, temporary_variables,
subst_rules, default_offset):
self.domains = domains
- self.instructions = instructions
+ self.statements = statements
self.temporary_variables = temporary_variables
self.subst_rules = subst_rules
self.default_offset = default_offset
@@ -1073,15 +1073,15 @@ class ArgumentGuesser:
self.all_names = set()
self.all_written_names = set()
from loopy.symbolic import get_dependencies
- for insn in instructions:
- if isinstance(insn, MultiAssignmentBase):
- for assignee_var_name in insn.assignee_var_names():
+ for stmt in statements:
+ if isinstance(stmt, MultiAssignmentBase):
+ for assignee_var_name in stmt.assignee_var_names():
self.all_written_names.add(assignee_var_name)
self.all_names.update(get_dependencies(
- self.submap(insn.assignees)))
+ self.submap(stmt.assignees)))
self.all_names.update(get_dependencies(
- self.submap(insn.expression)))
+ self.submap(stmt.expression)))
def find_index_rank(self, name):
irf = IndexRankFinder(name)
@@ -1090,8 +1090,8 @@ class ArgumentGuesser:
irf(self.submap(expr))
return expr
- for insn in self.instructions:
- insn.with_transformed_expressions(run_irf)
+ for stmt in self.statements:
+ stmt.with_transformed_expressions(run_irf)
if not irf.index_ranks:
return 0
@@ -1145,11 +1145,11 @@ class ArgumentGuesser:
temp_var_names = set(six.iterkeys(self.temporary_variables))
- for insn in self.instructions:
- if isinstance(insn, MultiAssignmentBase):
+ for stmt in self.statements:
+ if isinstance(stmt, MultiAssignmentBase):
for assignee_var_name, temp_var_type in zip(
- insn.assignee_var_names(),
- insn.temp_var_types):
+ stmt.assignee_var_names(),
+ stmt.temp_var_types):
if temp_var_type is not None:
temp_var_names.add(assignee_var_name)
@@ -1206,14 +1206,14 @@ def check_for_duplicate_names(knl):
def check_for_nonexistent_iname_deps(knl):
- for insn in knl.instructions:
- if not set(insn.within_inames) <= knl.all_inames():
- raise ValueError("In instruction '%s': "
+ for stmt in knl.statements:
+ if not set(stmt.within_inames) <= knl.all_inames():
+ raise ValueError("In statement '%s': "
"cannot force dependency on inames '%s'--"
"they don't exist" % (
- insn.id,
+ stmt.id,
",".join(
- set(insn.within_inames)-knl.all_inames())))
+ set(stmt.within_inames)-knl.all_inames())))
def check_for_multiple_writes_to_loop_bounds(knl):
@@ -1240,8 +1240,8 @@ def check_written_variable_names(knl):
set(arg.name for arg in knl.args)
| set(six.iterkeys(knl.temporary_variables)))
- for insn in knl.instructions:
- for var_name in insn.assignee_var_names():
+ for stmt in knl.statements:
+ for var_name in stmt.assignee_var_names():
if var_name not in admissible_vars:
raise RuntimeError("variable '%s' not declared or not "
"allowed for writing" % var_name)
@@ -1284,7 +1284,7 @@ class CSEToAssignmentMapper(IdentityMapper):
return var
-def expand_cses(instructions, inames_to_dup, cse_prefix="cse_expr"):
+def expand_cses(statements, inames_to_dup, cse_prefix="cse_expr"):
def add_assignment(base_name, expr, dtype, additional_inames):
if base_name is None:
base_name = "var"
@@ -1305,47 +1305,47 @@ def expand_cses(instructions, inames_to_dup, cse_prefix="cse_expr"):
shape=()))
from pymbolic.primitives import Variable
- new_insn = Assignment(
+ new_stmt = Assignment(
id=None,
assignee=Variable(new_var_name),
expression=expr,
- predicates=insn.predicates,
- within_inames=insn.within_inames | additional_inames,
- within_inames_is_final=insn.within_inames_is_final,
+ predicates=stmt.predicates,
+ within_inames=stmt.within_inames | additional_inames,
+ within_inames_is_final=stmt.within_inames_is_final,
)
- newly_created_insn_ids.add(new_insn.id)
- new_insns.append(new_insn)
- if insn_inames_to_dup:
+ newly_created_stmt_ids.add(new_stmt.id)
+ new_stmts.append(new_stmt)
+ if stmt_inames_to_dup:
raise LoopyError("in-line iname duplication not allowed in "
- "an instruction containing a tagged common "
- "subexpression (found in instruction '%s')"
- % insn)
+ "an statement containing a tagged common "
+ "subexpression (found in statement '%s')"
+ % stmt)
- new_inames_to_dup.append(insn_inames_to_dup)
+ new_inames_to_dup.append(stmt_inames_to_dup)
return new_var_name
cseam = CSEToAssignmentMapper(add_assignment=add_assignment)
- new_insns = []
+ new_stmts = []
new_inames_to_dup = []
from pytools import UniqueNameGenerator
var_name_gen = UniqueNameGenerator(forced_prefix=cse_prefix)
- newly_created_insn_ids = set()
+ newly_created_stmt_ids = set()
new_temp_vars = []
- for insn, insn_inames_to_dup in zip(instructions, inames_to_dup):
- if isinstance(insn, MultiAssignmentBase):
- new_insns.append(insn.copy(
- expression=cseam(insn.expression, frozenset())))
- new_inames_to_dup.append(insn_inames_to_dup)
+ for stmt, stmt_inames_to_dup in zip(statements, inames_to_dup):
+ if isinstance(stmt, MultiAssignmentBase):
+ new_stmts.append(stmt.copy(
+ expression=cseam(stmt.expression, frozenset())))
+ new_inames_to_dup.append(stmt_inames_to_dup)
else:
- new_insns.append(insn)
- new_inames_to_dup.append(insn_inames_to_dup)
+ new_stmts.append(stmt)
+ new_inames_to_dup.append(stmt_inames_to_dup)
- return new_insns, new_inames_to_dup, new_temp_vars
+ return new_stmts, new_inames_to_dup, new_temp_vars
# }}}
@@ -1353,25 +1353,25 @@ def expand_cses(instructions, inames_to_dup, cse_prefix="cse_expr"):
# {{{ add_sequential_dependencies
def add_sequential_dependencies(knl):
- new_insns = []
- prev_insn = None
- for insn in knl.instructions:
- depon = insn.depends_on
+ new_stmts = []
+ prev_stmt = None
+ for stmt in knl.statements:
+ depon = stmt.depends_on
if depon is None:
depon = frozenset()
- if prev_insn is not None:
- depon = depon | frozenset((prev_insn.id,))
+ if prev_stmt is not None:
+ depon = depon | frozenset((prev_stmt.id,))
- insn = insn.copy(
+ stmt = stmt.copy(
depends_on=depon,
depends_on_is_final=True)
- new_insns.append(insn)
+ new_stmts.append(stmt)
- prev_insn = insn
+ prev_stmt = stmt
- return knl.copy(instructions=new_insns)
+ return knl.copy(statements=new_stmts)
# }}}
@@ -1379,16 +1379,16 @@ def add_sequential_dependencies(knl):
# {{{ temporary variable creation
def create_temporaries(knl, default_order):
- new_insns = []
+ new_stmts = []
new_temp_vars = knl.temporary_variables.copy()
import loopy as lp
- for insn in knl.instructions:
- if isinstance(insn, MultiAssignmentBase):
+ for stmt in knl.statements:
+ if isinstance(stmt, MultiAssignmentBase):
for assignee_name, temp_var_type in zip(
- insn.assignee_var_names(),
- insn.temp_var_types):
+ stmt.assignee_var_names(),
+ stmt.temp_var_types):
if temp_var_type is None:
continue
@@ -1412,15 +1412,15 @@ def create_temporaries(knl, default_order):
order=default_order,
target=knl.target)
- if isinstance(insn, Assignment):
- insn = insn.copy(temp_var_type=None)
+ if isinstance(stmt, Assignment):
+ stmt = stmt.copy(temp_var_type=None)
else:
- insn = insn.copy(temp_var_types=None)
+ stmt = stmt.copy(temp_var_types=None)
- new_insns.append(insn)
+ new_stmts.append(stmt)
return knl.copy(
- instructions=new_insns,
+ statements=new_stmts,
temporary_variables=new_temp_vars)
# }}}
@@ -1489,9 +1489,9 @@ def determine_shapes_of_temporaries(knl):
vars_needing_shape_inference.add(tv.name)
def feed_all_expressions(receiver):
- for insn in knl.instructions:
- insn.with_transformed_expressions(
- lambda expr: receiver(expr, knl.insn_inames(insn)))
+ for stmt in knl.statements:
+ stmt.with_transformed_expressions(
+ lambda expr: receiver(expr, knl.stmt_inames(stmt)))
var_to_base_indices, var_to_shape, var_to_error = (
find_shapes_of_vars(
@@ -1509,14 +1509,14 @@ def determine_shapes_of_temporaries(knl):
"shape of temporary '%s' because: %s"
% (varname, err))
- def feed_assignee_of_instruction(receiver):
- for insn in knl.instructions:
- for assignee in insn.assignees:
- receiver(assignee, knl.insn_inames(insn))
+ def feed_assignee_of_statement(receiver):
+ for stmt in knl.statements:
+ for assignee in stmt.assignees:
+ receiver(assignee, knl.stmt_inames(stmt))
var_to_base_indices_fallback, var_to_shape_fallback, var_to_error = (
find_shapes_of_vars(
- knl, vars_needing_shape_inference, feed_assignee_of_instruction))
+ knl, vars_needing_shape_inference, feed_assignee_of_statement))
if len(var_to_error) > 0:
# No way around errors: propagate an exception upward.
@@ -1622,10 +1622,10 @@ def apply_default_order_to_args(kernel, default_order):
# }}}
-# {{{ resolve instruction dependencies
+# {{{ resolve statement dependencies
-def _resolve_dependencies(knl, insn, deps):
- from loopy import find_instructions
+def _resolve_dependencies(knl, stmt, deps):
+ from loopy import find_statements
from loopy.match import MatchExpressionBase
new_deps = []
@@ -1634,45 +1634,45 @@ def _resolve_dependencies(knl, insn, deps):
found_any = False
if isinstance(dep, MatchExpressionBase):
- for new_dep in find_instructions(knl, dep):
- if new_dep.id != insn.id:
+ for new_dep in find_statements(knl, dep):
+ if new_dep.id != stmt.id:
new_deps.append(new_dep.id)
found_any = True
else:
from fnmatch import fnmatchcase
- for other_insn in knl.instructions:
- if fnmatchcase(other_insn.id, dep):
- new_deps.append(other_insn.id)
+ for other_stmt in knl.statements:
+ if fnmatchcase(other_stmt.id, dep):
+ new_deps.append(other_stmt.id)
found_any = True
if not found_any and knl.options.check_dep_resolution:
- raise LoopyError("instruction '%s' declared a depency on '%s', "
- "which did not resolve to any instruction present in the "
+ raise LoopyError("statement '%s' declared a depency on '%s', "
+ "which did not resolve to any statement present in the "
"kernel '%s'. Set the kernel option 'check_dep_resolution'"
- "to False to disable this check." % (insn.id, dep, knl.name))
+ "to False to disable this check." % (stmt.id, dep, knl.name))
for dep_id in new_deps:
- if dep_id not in knl.id_to_insn:
- raise LoopyError("instruction '%s' depends on instruction id '%s', "
- "which was not found" % (insn.id, dep_id))
+ if dep_id not in knl.id_to_stmt:
+ raise LoopyError("statement '%s' depends on statement id '%s', "
+ "which was not found" % (stmt.id, dep_id))
return frozenset(new_deps)
def resolve_dependencies(knl):
- new_insns = []
+ new_stmts = []
- for insn in knl.instructions:
- new_insns.append(insn.copy(
- depends_on=_resolve_dependencies(knl, insn, insn.depends_on),
+ for stmt in knl.statements:
+ new_stmts.append(stmt.copy(
+ depends_on=_resolve_dependencies(knl, stmt, stmt.depends_on),
no_sync_with=frozenset(
- (resolved_insn_id, nosync_scope)
- for nosync_dep, nosync_scope in insn.no_sync_with
- for resolved_insn_id in
- _resolve_dependencies(knl, insn, (nosync_dep,))),
+ (resolved_stmt_id, nosync_scope)
+ for nosync_dep, nosync_scope in stmt.no_sync_with
+ for resolved_stmt_id in
+ _resolve_dependencies(knl, stmt, (nosync_dep,))),
))
- return knl.copy(instructions=new_insns)
+ return knl.copy(statements=new_stmts)
# }}}
@@ -1680,20 +1680,20 @@ def resolve_dependencies(knl):
# {{{ add used inames deps
def add_used_inames(knl):
- new_insns = []
+ new_stmts = []
- for insn in knl.instructions:
- deps = insn.read_dependency_names() | insn.write_dependency_names()
+ for stmt in knl.statements:
+ deps = stmt.read_dependency_names() | stmt.write_dependency_names()
iname_deps = deps & knl.all_inames()
- new_within_inames = insn.within_inames | iname_deps
+ new_within_inames = stmt.within_inames | iname_deps
- if new_within_inames != insn.within_inames:
- insn = insn.copy(within_inames=new_within_inames)
+ if new_within_inames != stmt.within_inames:
+ stmt = stmt.copy(within_inames=new_within_inames)
- new_insns.append(insn)
+ new_stmts.append(stmt)
- return knl.copy(instructions=new_insns)
+ return knl.copy(statements=new_stmts)
# }}}
@@ -1701,12 +1701,12 @@ def add_used_inames(knl):
# {{{ add inferred iname deps
def add_inferred_inames(knl):
- from loopy.kernel.tools import find_all_insn_inames
- insn_inames = find_all_insn_inames(knl)
+ from loopy.kernel.tools import find_all_stmt_inames
+ stmt_inames = find_all_stmt_inames(knl)
- return knl.copy(instructions=[
- insn.copy(within_inames=insn_inames[insn.id])
- for insn in knl.instructions])
+ return knl.copy(statements=[
+ stmt.copy(within_inames=stmt_inames[stmt.id])
+ for stmt in knl.statements])
# }}}
@@ -1726,18 +1726,18 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True):
var_names = arg_names | set(six.iterkeys(kernel.temporary_variables))
dep_map = dict(
- (insn.id, insn.read_dependency_names() & var_names)
- for insn in expanded_kernel.instructions)
+ (stmt.id, stmt.read_dependency_names() & var_names)
+ for stmt in expanded_kernel.statements)
- new_insns = []
- for insn in kernel.instructions:
- if not insn.depends_on_is_final:
+ new_stmts = []
+ for stmt in kernel.statements:
+ if not stmt.depends_on_is_final:
auto_deps = set()
# {{{ add automatic dependencies
all_my_var_writers = set()
- for var in dep_map[insn.id]:
+ for var in dep_map[stmt.id]:
var_writers = writer_map.get(var, set())
all_my_var_writers |= var_writers
@@ -1751,11 +1751,11 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True):
if len(var_writers) == 1:
auto_deps.update(
var_writers
- - set([insn.id]))
+ - set([stmt.id]))
# }}}
- depends_on = insn.depends_on
+ depends_on = stmt.depends_on
if depends_on is None:
depends_on = frozenset()
@@ -1764,26 +1764,26 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True):
if warn_if_used and new_deps != depends_on:
warn_with_kernel(kernel, "single_writer_after_creation",
"The single-writer dependency heuristic added dependencies "
- "on instruction ID(s) '%s' to instruction ID '%s' after "
+ "on statement ID(s) '%s' to statement ID '%s' after "
"kernel creation is complete. This is deprecated and "
"may stop working in the future. "
- "To fix this, ensure that instruction dependencies "
+ "To fix this, ensure that statement dependencies "
"are added/resolved as soon as possible, ideally at kernel "
"creation time."
- % (", ".join(new_deps - depends_on), insn.id))
+ % (", ".join(new_deps - depends_on), stmt.id))
- insn = insn.copy(depends_on=new_deps)
+ stmt = stmt.copy(depends_on=new_deps)
- new_insns.append(insn)
+ new_stmts.append(stmt)
- return kernel.copy(instructions=new_insns)
+ return kernel.copy(statements=new_stmts)
# }}}
# {{{ kernel creation top-level
-def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
+def make_kernel(domains, statements, kernel_data=["..."], **kwargs):
"""User-facing kernel creation entrypoint.
:arg domains:
@@ -1792,9 +1792,9 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
representing the :ref:`domain-tree`. May also be a list of strings
which will be parsed into such instances according to :ref:`isl-syntax`.
- :arg instructions:
+ :arg statements:
- A list of :class:`Assignment` (or other :class:`InstructionBase`
+ A list of :class:`Assignment` (or other :class:`StatementBase`
subclasses), possibly intermixed with instances of
:class:`SubstitutionRule`. This same list may also contain strings
which will be parsed into such objects using the
@@ -1857,7 +1857,7 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
:arg target: an instance of :class:`loopy.TargetBase`, or *None*,
to use the default target.
:arg seq_dependencies: If *True*, dependencies that sequentially
- connect the given *instructions* will be added. Defaults to
+ connect the given *statements* will be added. Defaults to
*False*.
:arg fixed_parameters: A dictionary of *name*/*value* pairs, where *name*
will be fixed to *value*. *name* may refer to :ref:`domain-parameters`
@@ -1948,8 +1948,8 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
# }}}
- instructions, inames_to_dup, substitutions = \
- parse_instructions(instructions, defines)
+ statements, inames_to_dup, substitutions = \
+ parse_statements(statements, defines)
# {{{ find/create isl_context
@@ -1959,15 +1959,15 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
# }}}
- instructions, inames_to_dup, cse_temp_vars = expand_cses(
- instructions, inames_to_dup)
+ statements, inames_to_dup, cse_temp_vars = expand_cses(
+ statements, inames_to_dup)
for tv in cse_temp_vars:
temporary_variables[tv.name] = tv
del cse_temp_vars
domains = parse_domains(domains, defines)
- arg_guesser = ArgumentGuesser(domains, instructions,
+ arg_guesser = ArgumentGuesser(domains, statements,
temporary_variables, substitutions,
default_offset)
@@ -1977,29 +1977,29 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
kwargs["substitutions"] = substitutions
from loopy.kernel import LoopKernel
- knl = LoopKernel(domains, instructions, kernel_args,
+ knl = LoopKernel(domains, statements, kernel_args,
temporary_variables=temporary_variables,
silenced_warnings=silenced_warnings,
options=options,
target=target,
**kwargs)
- from loopy.transform.instruction import uniquify_instruction_ids
- knl = uniquify_instruction_ids(knl)
- from loopy.check import check_for_duplicate_insn_ids
- check_for_duplicate_insn_ids(knl)
+ from loopy.transform.statement import uniquify_statement_ids
+ knl = uniquify_statement_ids(knl)
+ from loopy.check import check_for_duplicate_stmt_ids
+ check_for_duplicate_stmt_ids(knl)
if seq_dependencies:
knl = add_sequential_dependencies(knl)
- assert len(knl.instructions) == len(inames_to_dup)
+ assert len(knl.statements) == len(inames_to_dup)
from loopy import duplicate_inames
from loopy.match import Id
- for insn, insn_inames_to_dup in zip(knl.instructions, inames_to_dup):
- for old_iname, new_iname in insn_inames_to_dup:
+ for stmt, stmt_inames_to_dup in zip(knl.statements, inames_to_dup):
+ for old_iname, new_iname in stmt_inames_to_dup:
knl = duplicate_inames(knl, old_iname,
- within=Id(insn.id), new_inames=new_iname)
+ within=Id(stmt.id), new_inames=new_iname)
check_for_nonexistent_iname_deps(knl)
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 44cbdea49456904bb61cd93bbe5febbb35bee074..42c0a6b75597da34822b8f3f3e52907eaec1e917 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -30,8 +30,8 @@ import numpy as np # noqa
from pytools import ImmutableRecord
from loopy.kernel.array import ArrayBase
from loopy.diagnostic import LoopyError
-from loopy.kernel.instruction import ( # noqa
- InstructionBase,
+from loopy.kernel.statement import ( # noqa
+ StatementBase,
memory_ordering,
memory_scope,
VarAtomicity,
@@ -39,10 +39,10 @@ from loopy.kernel.instruction import ( # noqa
AtomicUpdate,
MultiAssignmentBase,
Assignment,
- ExpressionInstruction,
- CallInstruction,
+ ExpressionStatement,
+ CallStatement,
make_assignment,
- CInstruction)
+ CStatement)
class auto(object): # noqa
@@ -70,7 +70,7 @@ class IndexTag(ImmutableRecord):
@property
def key(self):
"""Return a hashable, comparable value that is used to ensure
- per-instruction uniqueness of this unique iname tag.
+ per-statement uniqueness of this unique iname tag.
Also used for persistent hash construction.
"""
diff --git a/loopy/kernel/instruction.py b/loopy/kernel/statement.py
similarity index 88%
rename from loopy/kernel/instruction.py
rename to loopy/kernel/statement.py
index e6b0937856c45fd130f93742eb000c12921d0e11..f23815ccb9494d94037c92bf5784ce468912065f 100644
--- a/loopy/kernel/instruction.py
+++ b/loopy/kernel/statement.py
@@ -28,10 +28,10 @@ from loopy.diagnostic import LoopyError
from warnings import warn
-# {{{ instructions: base class
+# {{{ statements: base class
-class InstructionBase(ImmutableRecord):
- """A base class for all types of instruction that can occur in
+class StatementBase(ImmutableRecord):
+ """A base class for all types of statement that can occur in
a kernel.
.. attribute:: id
@@ -39,26 +39,26 @@ class InstructionBase(ImmutableRecord):
An (otherwise meaningless) identifier that is unique within
a :class:`loopy.kernel.LoopKernel`.
- .. rubric:: Instruction ordering
+ .. rubric:: Statement ordering
.. attribute:: depends_on
- a :class:`frozenset` of :attr:`id` values of :class:`Instruction` instances
+ a :class:`frozenset` of :attr:`id` values of :class:`Statement` instances
that *must* be executed before this one. Note that
:func:`loopy.preprocess_kernel` (usually invoked automatically)
augments this by adding dependencies on any writes to temporaries read
- by this instruction.
+ by this statement.
May be *None* to invoke the default.
There are two extensions to this:
- You may use `*` as a wildcard in the given IDs. This will be expanded
- to all matching instruction IDs during :func:`loopy.make_kernel`.
- - Instead of an instruction ID, you may pass an instance of
+ to all matching statement IDs during :func:`loopy.make_kernel`.
+ - Instead of an statement ID, you may pass an instance of
:class:`loopy.match.MatchExpressionBase` into the :attr:`depends_on`
:class:`frozenset`. The given expression will be used to add any
- matching instructions in the kernel to :attr:`depends_on` during
+ matching statements in the kernel to :attr:`depends_on` during
:func:`loopy.make_kernel`. Note, that this is not meant as a user-facing
interface.
@@ -71,16 +71,16 @@ class InstructionBase(ImmutableRecord):
.. attribute:: groups
- A :class:`frozenset` of strings indicating the names of 'instruction
- groups' of which this instruction is a part. An instruction group is
- considered 'active' as long as one (but not all) instructions of the
+ A :class:`frozenset` of strings indicating the names of 'statement
+ groups' of which this statement is a part. An statement group is
+ considered 'active' as long as one (but not all) statements of the
group have been executed.
.. attribute:: conflicts_with_groups
- A :class:`frozenset` of strings indicating which instruction groups
- (see :class:`InstructionBase.groups`) may not be active when this
- instruction is scheduled.
+ A :class:`frozenset` of strings indicating which statement groups
+ (see :class:`StatementBase.groups`) may not be active when this
+ statement is scheduled.
.. attribute:: priority
@@ -91,8 +91,8 @@ class InstructionBase(ImmutableRecord):
.. attribute:: no_sync_with
- a :class:`frozenset` of tuples of the form `(insn_id, scope)`, where
- `insn_id` refers to :attr:`id` of :class:`Instruction` instances
+ a :class:`frozenset` of tuples of the form `(stmt_id, scope)`, where
+ `stmt_id` refers to :attr:`id` of :class:`Statement` instances
and `scope` is one of the following strings:
- `"local"`
@@ -100,10 +100,10 @@ class InstructionBase(ImmutableRecord):
- `"any"`.
This indicates no barrier synchronization is necessary with the given
- instruction using barriers of type `scope`, even given the existence of
+ statement using barriers of type `scope`, even given the existence of
a dependency chain and apparently conflicting access.
- Note, that :attr:`no_sync_with` allows instruction matching through wildcards
+ Note, that :attr:`no_sync_with` allows statement matching through wildcards
and match expression, just like :attr:`depends_on`.
.. rubric:: Conditionals
@@ -111,7 +111,7 @@ class InstructionBase(ImmutableRecord):
.. attribute:: predicates
a :class:`frozenset` of expressions. The conjunction (logical and) of
- their truth values (as defined by C) determines whether this instruction
+ their truth values (as defined by C) determines whether this statement
should be run.
.. rubric:: Iname dependencies
@@ -119,7 +119,7 @@ class InstructionBase(ImmutableRecord):
.. attribute:: within_inames
A :class:`frozenset` of inames identifying the loops within which this
- instruction will be executed.
+ statement will be executed.
.. rubric:: Iname dependencies
@@ -128,7 +128,7 @@ class InstructionBase(ImmutableRecord):
.. attribute:: tags
A :class:`frozenset` of string identifiers that can be used to
- identify groups of instructions.
+ identify groups of statements.
Tags starting with exclamation marks (``!``) are reserved and may have
specific meanings defined by :mod:`loopy` or its targets.
@@ -164,19 +164,19 @@ class InstructionBase(ImmutableRecord):
within_inames_is_final, within_inames,
priority,
boostable, boostable_into, predicates, tags,
- insn_deps=None, insn_deps_is_final=None,
+ stmt_deps=None, stmt_deps_is_final=None,
forced_iname_deps=None, forced_iname_deps_is_final=None):
# {{{ backwards compatibility goop
- if depends_on is not None and insn_deps is not None:
- raise LoopyError("may not specify both insn_deps and depends_on")
- elif insn_deps is not None:
- warn("insn_deps is deprecated, use depends_on",
+ if depends_on is not None and stmt_deps is not None:
+ raise LoopyError("may not specify both stmt_deps and depends_on")
+ elif stmt_deps is not None:
+ warn("stmt_deps is deprecated, use depends_on",
DeprecationWarning, stacklevel=2)
- depends_on = insn_deps
- depends_on_is_final = insn_deps_is_final
+ depends_on = stmt_deps
+ depends_on_is_final = stmt_deps_is_final
if forced_iname_deps is not None and within_inames is not None:
raise LoopyError("may not specify both forced_iname_deps "
@@ -282,16 +282,16 @@ class InstructionBase(ImmutableRecord):
# {{{ backwards compatibility goop
@property
- def insn_deps(self):
- warn("insn_deps is deprecated, use depends_on",
+ def stmt_deps(self):
+ warn("stmt_deps is deprecated, use depends_on",
DeprecationWarning, stacklevel=2)
return self.depends_on
# legacy
@property
- def insn_deps_is_final(self):
- warn("insn_deps_is_final is deprecated, use depends_on_is_final",
+ def stmt_deps_is_final(self):
+ warn("stmt_deps_is_final is deprecated, use depends_on_is_final",
DeprecationWarning, stacklevel=2)
return self.depends_on_is_final
@@ -349,14 +349,14 @@ class InstructionBase(ImmutableRecord):
def assignee_name(self):
"""A convenience wrapper around :meth:`assignee_var_names`
that returns the the name of the variable being assigned.
- If more than one variable is being modified in the instruction,
+ If more than one variable is being modified in the statement,
:raise:`ValueError` is raised.
"""
names = self.assignee_var_names()
if len(names) != 1:
- raise ValueError("expected exactly one assignment in instruction "
+ raise ValueError("expected exactly one assignment in statement "
"on which assignee_name is being called, found %d"
% len(names))
@@ -366,7 +366,7 @@ class InstructionBase(ImmutableRecord):
@memoize_method
def write_dependency_names(self):
"""Return a set of dependencies of the left hand side of the
- assignments performed by this instruction, including written variables
+ assignments performed by this statement, including written variables
and indices.
"""
@@ -393,7 +393,7 @@ class InstructionBase(ImmutableRecord):
elif self.boostable is None:
pass
else:
- raise RuntimeError("unexpected value for Instruction.boostable")
+ raise RuntimeError("unexpected value for Statement.boostable")
if self.depends_on:
result.append("dep="+":".join(self.depends_on))
@@ -447,22 +447,22 @@ class InstructionBase(ImmutableRecord):
# }}}
def copy(self, **kwargs):
- if "insn_deps" in kwargs:
- warn("insn_deps is deprecated, use depends_on",
+ if "stmt_deps" in kwargs:
+ warn("stmt_deps is deprecated, use depends_on",
DeprecationWarning, stacklevel=2)
- kwargs["depends_on"] = kwargs.pop("insn_deps")
+ kwargs["depends_on"] = kwargs.pop("stmt_deps")
- if "insn_deps_is_final" in kwargs:
- warn("insn_deps_is_final is deprecated, use depends_on",
+ if "stmt_deps_is_final" in kwargs:
+ warn("stmt_deps_is_final is deprecated, use depends_on",
DeprecationWarning, stacklevel=2)
- kwargs["depends_on_is_final"] = kwargs.pop("insn_deps_is_final")
+ kwargs["depends_on_is_final"] = kwargs.pop("stmt_deps_is_final")
- return super(InstructionBase, self).copy(**kwargs)
+ return super(StatementBase, self).copy(**kwargs)
def __setstate__(self, val):
- super(InstructionBase, self).__setstate__(val)
+ super(StatementBase, self).__setstate__(val)
from loopy.tools import intern_frozenset_of_ids
@@ -666,13 +666,13 @@ class AtomicUpdate(VarAtomicity):
# }}}
-# {{{ instruction base class: expression rhs
+# {{{ statement base class: expression rhs
-class MultiAssignmentBase(InstructionBase):
- """An assignment instruction with an expression as a right-hand side."""
+class MultiAssignmentBase(StatementBase):
+ """An assignment statement with an expression as a right-hand side."""
- fields = InstructionBase.fields | set(["expression"])
- pymbolic_fields = InstructionBase.pymbolic_fields | set(["expression"])
+ fields = StatementBase.fields | set(["expression"])
+ pymbolic_fields = StatementBase.pymbolic_fields | set(["expression"])
@memoize_method
def read_dependency_names(self):
@@ -704,7 +704,7 @@ class MultiAssignmentBase(InstructionBase):
# }}}
-# {{{ instruction: assignment
+# {{{ statement: assignment
class Assignment(MultiAssignmentBase):
"""
@@ -774,7 +774,7 @@ class Assignment(MultiAssignmentBase):
boostable=None, boostable_into=None, tags=None,
temp_var_type=None, atomicity=(),
priority=0, predicates=frozenset(),
- insn_deps=None, insn_deps_is_final=None,
+ stmt_deps=None, stmt_deps_is_final=None,
forced_iname_deps=None, forced_iname_deps_is_final=None):
super(Assignment, self).__init__(
@@ -791,8 +791,8 @@ class Assignment(MultiAssignmentBase):
priority=priority,
predicates=predicates,
tags=tags,
- insn_deps=insn_deps,
- insn_deps_is_final=insn_deps_is_final,
+ stmt_deps=stmt_deps,
+ stmt_deps_is_final=stmt_deps_is_final,
forced_iname_deps=forced_iname_deps,
forced_iname_deps_is_final=forced_iname_deps_is_final)
@@ -812,7 +812,7 @@ class Assignment(MultiAssignmentBase):
self.temp_var_type = temp_var_type
self.atomicity = atomicity
- # {{{ implement InstructionBase interface
+ # {{{ implement StatementBase interface
@memoize_method
def assignee_var_names(self):
@@ -844,7 +844,7 @@ class Assignment(MultiAssignmentBase):
result += "\n" + 10*" " + "if (%s)" % " && ".join(self.predicates)
return result
- # {{{ for interface uniformity with CallInstruction
+ # {{{ for interface uniformity with CallStatement
@property
def temp_var_types(self):
@@ -857,21 +857,21 @@ class Assignment(MultiAssignmentBase):
# }}}
-class ExpressionInstruction(Assignment):
+class ExpressionStatement(Assignment):
def __init__(self, *args, **kwargs):
- warn("ExpressionInstruction is deprecated. Use Assignment instead",
+ warn("ExpressionStatement is deprecated. Use Assignment instead",
DeprecationWarning, stacklevel=2)
- super(ExpressionInstruction, self).__init__(*args, **kwargs)
+ super(ExpressionStatement, self).__init__(*args, **kwargs)
# }}}
-# {{{ instruction: function call
+# {{{ statement: function call
-class CallInstruction(MultiAssignmentBase):
- """An instruction capturing a function call. Unlike :class:`Assignment`,
- this instruction supports functions with multiple return values.
+class CallStatement(MultiAssignmentBase):
+ """An statement capturing a function call. Unlike :class:`Assignment`,
+ this statement supports functions with multiple return values.
.. attribute:: assignees
@@ -907,11 +907,11 @@ class CallInstruction(MultiAssignmentBase):
boostable=None, boostable_into=None, tags=None,
temp_var_types=None,
priority=0, predicates=frozenset(),
- insn_deps=None, insn_deps_is_final=None,
+ stmt_deps=None, stmt_deps_is_final=None,
forced_iname_deps=None,
forced_iname_deps_is_final=None):
- super(CallInstruction, self).__init__(
+ super(CallStatement, self).__init__(
id=id,
depends_on=depends_on,
depends_on_is_final=depends_on_is_final,
@@ -925,22 +925,22 @@ class CallInstruction(MultiAssignmentBase):
priority=priority,
predicates=predicates,
tags=tags,
- insn_deps=insn_deps,
- insn_deps_is_final=insn_deps_is_final,
+ stmt_deps=stmt_deps,
+ stmt_deps_is_final=stmt_deps_is_final,
forced_iname_deps=forced_iname_deps,
forced_iname_deps_is_final=forced_iname_deps_is_final)
from pymbolic.primitives import Call
from loopy.symbolic import Reduction
if not isinstance(expression, (Call, Reduction)) and expression is not None:
- raise LoopyError("'expression' argument to CallInstruction "
+ raise LoopyError("'expression' argument to CallStatement "
"must be a function call")
from loopy.symbolic import parse
if isinstance(assignees, str):
assignees = parse(assignees)
if not isinstance(assignees, tuple):
- raise LoopyError("'assignees' argument to CallInstruction "
+ raise LoopyError("'assignees' argument to CallStatement "
"must be a tuple or a string parseable to a tuple"
"--got '%s'" % type(assignees).__name__)
@@ -961,7 +961,7 @@ class CallInstruction(MultiAssignmentBase):
else:
self.temp_var_types = temp_var_types
- # {{{ implement InstructionBase interface
+ # {{{ implement StatementBase interface
@memoize_method
def assignee_var_names(self):
@@ -1017,7 +1017,7 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs):
raise LoopyError("right-hand side in multiple assignment must be "
"function call or reduction, got: '%s'" % expression)
- return CallInstruction(
+ return CallStatement(
assignees=assignees,
expression=expression,
temp_var_types=temp_var_types,
@@ -1034,14 +1034,14 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs):
**kwargs)
-# {{{ c instruction
+# {{{ c statement
-class CInstruction(InstructionBase):
+class CStatement(StatementBase):
"""
.. attribute:: iname_exprs
A list of tuples *(name, expr)* of inames or expressions based on them
- that the instruction needs access to.
+ that the statement needs access to.
.. attribute:: code
@@ -1071,11 +1071,11 @@ class CInstruction(InstructionBase):
figuring out dependencies.
"""
- fields = InstructionBase.fields | \
+ fields = StatementBase.fields | \
set("iname_exprs code read_variables assignees".split())
- pymbolic_fields = InstructionBase.pymbolic_fields | \
+ pymbolic_fields = StatementBase.pymbolic_fields | \
set("iname_exprs assignees".split())
- pymbolic_set_fields = InstructionBase.pymbolic_set_fields | \
+ pymbolic_set_fields = StatementBase.pymbolic_set_fields | \
set(["read_variables"])
def __init__(self,
@@ -1087,7 +1087,7 @@ class CInstruction(InstructionBase):
within_inames_is_final=None, within_inames=None,
priority=0, boostable=None, boostable_into=None,
predicates=frozenset(), tags=None,
- insn_deps=None, insn_deps_is_final=None):
+ stmt_deps=None, stmt_deps_is_final=None):
"""
:arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples,
simple strings pepresenting inames are also allowed. A single
@@ -1098,7 +1098,7 @@ class CInstruction(InstructionBase):
sequence of strings parseable into the desired format.
"""
- InstructionBase.__init__(self,
+ StatementBase.__init__(self,
id=id,
depends_on=depends_on,
depends_on_is_final=depends_on_is_final,
@@ -1109,8 +1109,8 @@ class CInstruction(InstructionBase):
boostable=boostable,
boostable_into=boostable_into,
priority=priority, predicates=predicates, tags=tags,
- insn_deps=insn_deps,
- insn_deps_is_final=insn_deps_is_final)
+ stmt_deps=stmt_deps,
+ stmt_deps_is_final=stmt_deps_is_final)
# {{{ normalize iname_exprs
@@ -1153,7 +1153,7 @@ class CInstruction(InstructionBase):
def read_dependency_names(self):
result = (
- super(CInstruction, self).read_dependency_names()
+ super(CStatement, self).read_dependency_names()
| frozenset(self.read_variables))
from loopy.symbolic import get_dependencies
@@ -1204,7 +1204,7 @@ class CInstruction(InstructionBase):
# }}}
-class _DataObliviousInstruction(InstructionBase):
+class _DataObliviousStatement(StatementBase):
# {{{ abstract interface
# read_dependency_names inherited
@@ -1230,12 +1230,12 @@ class _DataObliviousInstruction(InstructionBase):
return ()
-# {{{ barrier instruction
+# {{{ barrier statement
-class NoOpInstruction(_DataObliviousInstruction):
- """An instruction that carries out no operation. It is mainly
+class NoOpStatement(_DataObliviousStatement):
+ """An statement that carries out no operation. It is mainly
useful as a way to structure dependencies between other
- instructions.
+ statements.
The textual syntax in a :mod:`loopy` kernel is::
@@ -1249,7 +1249,7 @@ class NoOpInstruction(_DataObliviousInstruction):
priority=None,
boostable=None, boostable_into=None,
predicates=None, tags=None):
- super(NoOpInstruction, self).__init__(
+ super(NoOpStatement, self).__init__(
id=id,
depends_on=depends_on,
depends_on_is_final=depends_on_is_final,
@@ -1276,10 +1276,10 @@ class NoOpInstruction(_DataObliviousInstruction):
# }}}
-# {{{ barrier instruction
+# {{{ barrier statement
-class BarrierInstruction(_DataObliviousInstruction):
- """An instruction that requires synchronization with all
+class BarrierStatement(_DataObliviousStatement):
+ """An statement that requires synchronization with all
concurrent work items of :attr:`kind`.
.. attribute:: kind
@@ -1292,7 +1292,7 @@ class BarrierInstruction(_DataObliviousInstruction):
... lbarrier
"""
- fields = _DataObliviousInstruction.fields | set(["kind"])
+ fields = _DataObliviousStatement.fields | set(["kind"])
def __init__(self, id, depends_on=None, depends_on_is_final=None,
groups=None, conflicts_with_groups=None,
@@ -1305,7 +1305,7 @@ class BarrierInstruction(_DataObliviousInstruction):
if predicates:
raise LoopyError("conditional barriers are not supported")
- super(BarrierInstruction, self).__init__(
+ super(BarrierStatement, self).__init__(
id=id,
depends_on=depends_on,
depends_on_is_final=depends_on_is_final,
diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py
index ad1e71e59aca3f658bd3374b46afd786475d0d04..edc154b15d39a6941bbbc0688ca7556c60b02317 100644
--- a/loopy/kernel/tools.py
+++ b/loopy/kernel/tools.py
@@ -131,30 +131,30 @@ def _add_and_infer_dtypes_overdetermined(knl, dtype_dict):
# }}}
-# {{{ find_all_insn_inames fixed point iteration (deprecated)
+# {{{ find_all_stmt_inames fixed point iteration (deprecated)
-def guess_iname_deps_based_on_var_use(kernel, insn, insn_id_to_inames=None):
- # For all variables that insn depends on, find the intersection
- # of iname deps of all writers, and add those to insn's
+def guess_iname_deps_based_on_var_use(kernel, stmt, stmt_id_to_inames=None):
+ # For all variables that stmt depends on, find the intersection
+ # of iname deps of all writers, and add those to stmt's
# dependencies.
result = frozenset()
writer_map = kernel.writer_map()
- for tv_name in (insn.read_dependency_names() & kernel.get_written_variables()):
+ for tv_name in (stmt.read_dependency_names() & kernel.get_written_variables()):
tv_implicit_inames = None
for writer_id in writer_map[tv_name]:
- writer_insn = kernel.id_to_insn[writer_id]
- if insn_id_to_inames is None:
- writer_inames = writer_insn.within_inames
+ writer_stmt = kernel.id_to_stmt[writer_id]
+ if stmt_id_to_inames is None:
+ writer_inames = writer_stmt.within_inames
else:
- writer_inames = insn_id_to_inames[writer_id]
+ writer_inames = stmt_id_to_inames[writer_id]
writer_implicit_inames = (
writer_inames
- - (writer_insn.write_dependency_names() & kernel.all_inames()))
+ - (writer_stmt.write_dependency_names() & kernel.all_inames()))
if tv_implicit_inames is None:
tv_implicit_inames = writer_implicit_inames
else:
@@ -164,16 +164,16 @@ def guess_iname_deps_based_on_var_use(kernel, insn, insn_id_to_inames=None):
if tv_implicit_inames is not None:
result = result | tv_implicit_inames
- return result - insn.reduction_inames()
+ return result - stmt.reduction_inames()
-def find_all_insn_inames(kernel):
- logger.debug("%s: find_all_insn_inames: start" % kernel.name)
+def find_all_stmt_inames(kernel):
+ logger.debug("%s: find_all_stmt_inames: start" % kernel.name)
writer_map = kernel.writer_map()
- insn_id_to_inames = {}
- insn_assignee_inames = {}
+ stmt_id_to_inames = {}
+ stmt_assignee_inames = {}
all_read_deps = {}
all_write_deps = {}
@@ -181,30 +181,30 @@ def find_all_insn_inames(kernel):
from loopy.transform.subst import expand_subst
kernel = expand_subst(kernel)
- for insn in kernel.instructions:
- all_read_deps[insn.id] = read_deps = insn.read_dependency_names()
- all_write_deps[insn.id] = write_deps = insn.write_dependency_names()
+ for stmt in kernel.statements:
+ all_read_deps[stmt.id] = read_deps = stmt.read_dependency_names()
+ all_write_deps[stmt.id] = write_deps = stmt.write_dependency_names()
deps = read_deps | write_deps
- if insn.within_inames_is_final:
- iname_deps = insn.within_inames
+ if stmt.within_inames_is_final:
+ iname_deps = stmt.within_inames
else:
iname_deps = (
deps & kernel.all_inames()
- | insn.within_inames)
+ | stmt.within_inames)
- assert isinstance(read_deps, frozenset), type(insn)
- assert isinstance(write_deps, frozenset), type(insn)
- assert isinstance(iname_deps, frozenset), type(insn)
+ assert isinstance(read_deps, frozenset), type(stmt)
+ assert isinstance(write_deps, frozenset), type(stmt)
+ assert isinstance(iname_deps, frozenset), type(stmt)
- logger.debug("%s: find_all_insn_inames: %s (init): %s - "
+ logger.debug("%s: find_all_stmt_inames: %s (init): %s - "
"read deps: %s - write deps: %s" % (
- kernel.name, insn.id, ", ".join(sorted(iname_deps)),
+ kernel.name, stmt.id, ", ".join(sorted(iname_deps)),
", ".join(sorted(read_deps)), ", ".join(sorted(write_deps)),
))
- insn_id_to_inames[insn.id] = iname_deps
- insn_assignee_inames[insn.id] = write_deps & kernel.all_inames()
+ stmt_id_to_inames[stmt.id] = iname_deps
+ stmt_assignee_inames[stmt.id] = write_deps & kernel.all_inames()
# fixed point iteration until all iname dep sets have converged
@@ -221,36 +221,36 @@ def find_all_insn_inames(kernel):
while True:
did_something = False
- for insn in kernel.instructions:
+ for stmt in kernel.statements:
- if insn.within_inames_is_final:
+ if stmt.within_inames_is_final:
continue
# {{{ depdency-based propagation
- inames_old = insn_id_to_inames[insn.id]
+ inames_old = stmt_id_to_inames[stmt.id]
inames_new = inames_old | guess_iname_deps_based_on_var_use(
- kernel, insn, insn_id_to_inames)
+ kernel, stmt, stmt_id_to_inames)
- insn_id_to_inames[insn.id] = inames_new
+ stmt_id_to_inames[stmt.id] = inames_new
if inames_new != inames_old:
did_something = True
warn_with_kernel(kernel, "inferred_iname",
- "The iname(s) '%s' on instruction '%s' "
+ "The iname(s) '%s' on statement '%s' "
"was/were automatically added. "
"This is deprecated. Please add the iname "
- "to the instruction "
+ "to the statement "
"explicitly, e.g. by adding 'for' loops"
- % (", ".join(inames_new-inames_old), insn.id))
+ % (", ".join(inames_new-inames_old), stmt.id))
# }}}
# {{{ domain-based propagation
- inames_old = insn_id_to_inames[insn.id]
- inames_new = set(insn_id_to_inames[insn.id])
+ inames_old = stmt_id_to_inames[stmt.id]
+ inames_new = set(stmt_id_to_inames[stmt.id])
for iname in inames_old:
home_domain = kernel.domains[kernel.get_home_domain_index(iname)]
@@ -268,31 +268,31 @@ def find_all_insn_inames(kernel):
if par in kernel.temporary_variables:
for writer_id in writer_map.get(par, []):
- inames_new.update(insn_id_to_inames[writer_id])
+ inames_new.update(stmt_id_to_inames[writer_id])
if inames_new != inames_old:
did_something = True
- insn_id_to_inames[insn.id] = frozenset(inames_new)
+ stmt_id_to_inames[stmt.id] = frozenset(inames_new)
warn_with_kernel(kernel, "inferred_iname",
- "The iname(s) '%s' on instruction '%s' was "
+ "The iname(s) '%s' on statement '%s' was "
"automatically added. "
"This is deprecated. Please add the iname "
- "to the instruction "
+ "to the statement "
"explicitly, e.g. by adding 'for' loops"
- % (", ".join(inames_new-inames_old), insn.id))
+ % (", ".join(inames_new-inames_old), stmt.id))
# }}}
if not did_something:
break
- logger.debug("%s: find_all_insn_inames: done" % kernel.name)
+ logger.debug("%s: find_all_stmt_inames: done" % kernel.name)
- for v in six.itervalues(insn_id_to_inames):
+ for v in six.itervalues(stmt_id_to_inames):
assert isinstance(v, frozenset)
- return insn_id_to_inames
+ return stmt_id_to_inames
# }}}
@@ -447,17 +447,17 @@ class DomainChanger:
# Changing the domain might look like it wants to change grid
# sizes. Not true.
# (Relevant for 'slab decomposition')
- overridden_get_grid_sizes_for_insn_ids=(
- self.kernel.get_grid_sizes_for_insn_ids))
+ overridden_get_grid_sizes_for_stmt_ids=(
+ self.kernel.get_grid_sizes_for_stmt_ids))
# }}}
# {{{ graphviz / dot export
-def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False):
+def get_dot_dependency_graph(kernel, iname_cluster=True, use_stmt_id=False):
"""Return a string in the `dot `_ language depicting
- dependencies among kernel instructions.
+ dependencies among kernel statements.
"""
# make sure all automatically added stuff shows up
@@ -478,34 +478,34 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False):
dep_graph = {}
lines = []
- from loopy.kernel.data import MultiAssignmentBase, CInstruction
+ from loopy.kernel.data import MultiAssignmentBase, CStatement
- for insn in kernel.instructions:
- if isinstance(insn, MultiAssignmentBase):
- op = "%s <- %s" % (insn.assignees, insn.expression)
+ for stmt in kernel.statements:
+ if isinstance(stmt, MultiAssignmentBase):
+ op = "%s <- %s" % (stmt.assignees, stmt.expression)
if len(op) > 200:
op = op[:200] + "..."
- elif isinstance(insn, CInstruction):
- op = "" % insn.id
+ elif isinstance(stmt, CStatement):
+ op = "" % stmt.id
else:
- op = "" % insn.id
+ op = "" % stmt.id
- if use_insn_id:
- insn_label = insn.id
+ if use_stmt_id:
+ stmt_label = stmt.id
tooltip = op
else:
- insn_label = op
- tooltip = insn.id
+ stmt_label = op
+ tooltip = stmt.id
lines.append("\"%s\" [label=\"%s\",shape=\"box\",tooltip=\"%s\"];"
% (
- insn.id,
- repr(insn_label)[1:-1],
+ stmt.id,
+ repr(stmt_label)[1:-1],
repr(tooltip)[1:-1],
))
- for dep in insn.depends_on:
- dep_graph.setdefault(insn.id, set()).add(dep)
+ for dep in stmt.depends_on:
+ dep_graph.setdefault(stmt.id, set()).add(dep)
# {{{ O(n^3) transitive reduction
@@ -513,31 +513,31 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False):
while True:
changed_something = False
- for insn_1 in dep_graph:
- for insn_2 in dep_graph.get(insn_1, set()).copy():
- for insn_3 in dep_graph.get(insn_2, set()).copy():
- if insn_3 not in dep_graph.get(insn_1, set()):
+ for stmt_1 in dep_graph:
+ for stmt_2 in dep_graph.get(stmt_1, set()).copy():
+ for stmt_3 in dep_graph.get(stmt_2, set()).copy():
+ if stmt_3 not in dep_graph.get(stmt_1, set()):
changed_something = True
- dep_graph[insn_1].add(insn_3)
+ dep_graph[stmt_1].add(stmt_3)
if not changed_something:
break
- for insn_1 in dep_graph:
- for insn_2 in dep_graph.get(insn_1, set()).copy():
- for insn_3 in dep_graph.get(insn_2, set()).copy():
- if insn_3 in dep_graph.get(insn_1, set()):
- dep_graph[insn_1].remove(insn_3)
+ for stmt_1 in dep_graph:
+ for stmt_2 in dep_graph.get(stmt_1, set()).copy():
+ for stmt_3 in dep_graph.get(stmt_2, set()).copy():
+ if stmt_3 in dep_graph.get(stmt_1, set()):
+ dep_graph[stmt_1].remove(stmt_3)
# }}}
- for insn_1 in dep_graph:
- for insn_2 in dep_graph.get(insn_1, set()):
- lines.append("%s -> %s" % (insn_2, insn_1))
+ for stmt_1 in dep_graph:
+ for stmt_2 in dep_graph.get(stmt_1, set()):
+ lines.append("%s -> %s" % (stmt_2, stmt_1))
if iname_cluster:
from loopy.schedule import (
- EnterLoop, LeaveLoop, RunInstruction, Barrier,
+ EnterLoop, LeaveLoop, RunStatement, Barrier,
CallKernel, ReturnFromKernel)
for sched_item in kernel.schedule:
@@ -546,8 +546,8 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False):
% (sched_item.iname, sched_item.iname))
elif isinstance(sched_item, LeaveLoop):
lines.append("}")
- elif isinstance(sched_item, RunInstruction):
- lines.append(sched_item.insn_id)
+ elif isinstance(sched_item, RunStatement):
+ lines.append(sched_item.stmt_id)
elif isinstance(sched_item, (CallKernel, ReturnFromKernel, Barrier)):
pass
else:
@@ -672,14 +672,14 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames):
for par in dom_parameters:
if par in kernel.temporary_variables:
- writer_insns = kernel.writer_map()[par]
+ writer_stmts = kernel.writer_map()[par]
- if len(writer_insns) > 1:
+ if len(writer_stmts) > 1:
raise RuntimeError("loop bound '%s' "
"may only be written to once" % par)
- writer_insn, = writer_insns
- writer_inames = kernel.insn_inames(writer_insn)
+ writer_stmt, = writer_stmts
+ writer_inames = kernel.stmt_inames(writer_stmt)
if writer_inames & inames:
return True
@@ -695,7 +695,7 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames):
# {{{ rank inames by stride
-def get_auto_axis_iname_ranking_by_stride(kernel, insn):
+def get_auto_axis_iname_ranking_by_stride(kernel, stmt):
from loopy.kernel.data import ImageArg, ValueArg
approximate_arg_values = {}
@@ -707,14 +707,14 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn):
raise LoopyError("No approximate arg value specified for '%s'"
% arg.name)
- # {{{ find all array accesses in insn
+ # {{{ find all array accesses in stmt
from loopy.symbolic import ArrayAccessFinder
- ary_acc_exprs = list(ArrayAccessFinder()(insn.expression))
+ ary_acc_exprs = list(ArrayAccessFinder()(stmt.expression))
from pymbolic.primitives import Subscript
- for assignee in insn.assignees:
+ for assignee in stmt.assignees:
if isinstance(assignee, Subscript):
ary_acc_exprs.append(assignee)
@@ -742,7 +742,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn):
from loopy.kernel.data import AutoLocalIndexTagBase
auto_axis_inames = set(
iname
- for iname in kernel.insn_inames(insn)
+ for iname in kernel.stmt_inames(stmt)
if isinstance(kernel.iname_to_tag.get(iname),
AutoLocalIndexTagBase))
@@ -802,7 +802,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn):
if aggregate_strides:
very_large_stride = int(np.iinfo(np.int32).max)
- return sorted((iname for iname in kernel.insn_inames(insn)),
+ return sorted((iname for iname in kernel.stmt_inames(stmt)),
key=lambda iname: (
aggregate_strides.get(iname, very_large_stride),
iname))
@@ -912,13 +912,13 @@ def assign_automatic_axes(kernel, axis=0, local_size=None):
import loopy as lp
- for insn in kernel.instructions:
- if not isinstance(insn, lp.MultiAssignmentBase):
+ for stmt in kernel.statements:
+ if not isinstance(stmt, lp.MultiAssignmentBase):
continue
auto_axis_inames = [
iname
- for iname in kernel.insn_inames(insn)
+ for iname in kernel.stmt_inames(stmt)
if isinstance(kernel.iname_to_tag.get(iname),
AutoLocalIndexTagBase)]
@@ -927,7 +927,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None):
assigned_local_axes = set()
- for iname in kernel.insn_inames(insn):
+ for iname in kernel.stmt_inames(stmt):
tag = kernel.iname_to_tag.get(iname)
if isinstance(tag, LocalIndexTag):
assigned_local_axes.add(tag.axis)
@@ -936,7 +936,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None):
# "valid" pass: try to assign a given axis
if axis not in assigned_local_axes:
- iname_ranking = get_auto_axis_iname_ranking_by_stride(kernel, insn)
+ iname_ranking = get_auto_axis_iname_ranking_by_stride(kernel, stmt)
if iname_ranking is not None:
for iname in iname_ranking:
prev_tag = kernel.iname_to_tag.get(iname)
@@ -963,7 +963,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None):
# }}}
- # We've seen all instructions and not punted to recursion/restart because
+ # We've seen all statements and not punted to recursion/restart because
# of a new axis assignment.
if axis >= len(local_size):
@@ -1032,12 +1032,12 @@ def guess_var_shape(kernel, var_name):
submap = SubstitutionRuleExpander(kernel.substitutions)
def run_through_armap(expr):
- armap(submap(expr), kernel.insn_inames(insn))
+ armap(submap(expr), kernel.stmt_inames(stmt))
return expr
try:
- for insn in kernel.instructions:
- insn.with_transformed_expressions(run_through_armap)
+ for stmt in kernel.statements:
+ stmt.with_transformed_expressions(run_through_armap)
except TypeError as e:
from traceback import print_exc
print_exc()
@@ -1200,9 +1200,9 @@ def get_visual_iname_order_embedding(kernel):
iname_trie = SetTrie()
- for insn in kernel.instructions:
+ for stmt in kernel.statements:
within_inames = set(
- iname for iname in insn.within_inames
+ iname for iname in stmt.within_inames
if iname not in ilp_inames)
iname_trie.add_or_update(within_inames)
@@ -1225,17 +1225,17 @@ def get_visual_iname_order_embedding(kernel):
# {{{ find_recursive_dependencies
-def find_recursive_dependencies(kernel, insn_ids):
- queue = list(insn_ids)
+def find_recursive_dependencies(kernel, stmt_ids):
+ queue = list(stmt_ids)
- result = set(insn_ids)
+ result = set(stmt_ids)
while queue:
new_queue = []
- for insn_id in queue:
- insn = kernel.id_to_insn[insn_id]
- additionals = insn.depends_on - result
+ for stmt_id in queue:
+ stmt = kernel.id_to_stmt[stmt_id]
+ additionals = stmt.depends_on - result
result.update(additionals)
new_queue.extend(additionals)
@@ -1248,15 +1248,15 @@ def find_recursive_dependencies(kernel, insn_ids):
# {{{ find_reverse_dependencies
-def find_reverse_dependencies(kernel, insn_ids):
- """Finds a set of IDs of instructions that depend on one of the insn_ids.
+def find_reverse_dependencies(kernel, stmt_ids):
+ """Finds a set of IDs of statements that depend on one of the stmt_ids.
- :arg insn_ids: a set of instruction IDs
+ :arg stmt_ids: a set of statement IDs
"""
return frozenset(
- insn.id
- for insn in kernel.instructions
- if insn.depends_on & insn_ids)
+ stmt.id
+ for stmt in kernel.statements
+ if stmt.depends_on & stmt_ids)
# }}}
@@ -1264,28 +1264,28 @@ def find_reverse_dependencies(kernel, insn_ids):
# {{{ draw_dependencies_as_unicode_arrows
def draw_dependencies_as_unicode_arrows(
- instructions, fore, style, flag_downward=True, max_columns=20):
+ statements, fore, style, flag_downward=True, max_columns=20):
"""
- :arg instructions: an ordered iterable of :class:`loopy.InstructionBase`
+ :arg statements: an ordered iterable of :class:`loopy.StatementBase`
instances
:arg fore: if given, will be used like a :mod:`colorama` ``Fore`` object
to color-code dependencies. (E.g. red for downward edges)
:returns: A list of tuples (arrows, extender) with Unicode-drawn dependency
- arrows, one per entry of *instructions*. *extender* can be used to
- extend arrows below the line of an instruction.
+ arrows, one per entry of *statements*. *extender* can be used to
+ extend arrows below the line of an statement.
"""
reverse_deps = {}
- for insn in instructions:
- for dep in insn.depends_on:
- reverse_deps.setdefault(dep, []).append(insn.id)
+ for stmt in statements:
+ for dep in stmt.depends_on:
+ reverse_deps.setdefault(dep, []).append(stmt.id)
# mapping of (from_id, to_id) tuples to column_index
dep_to_column = {}
# {{{ find column assignments
- # mapping from column indices to (end_insn_id, updown)
+ # mapping from column indices to (end_stmt_id, updown)
columns_in_use = {}
n_columns = [0]
@@ -1313,28 +1313,28 @@ def draw_dependencies_as_unicode_arrows(
return result
rows = []
- for insn in instructions:
+ for stmt in statements:
row = make_extender()
- for rdep in reverse_deps.get(insn.id, []):
- assert rdep != insn.id
+ for rdep in reverse_deps.get(stmt.id, []):
+ assert rdep != stmt.id
- dep_key = (rdep, insn.id)
+ dep_key = (rdep, stmt.id)
if dep_key not in dep_to_column:
col = dep_to_column[dep_key] = find_free_column()
columns_in_use[col] = (rdep, "up")
row[col] = u"↱"
- for dep in insn.depends_on:
- assert dep != insn.id
- dep_key = (insn.id, dep)
+ for dep in stmt.depends_on:
+ assert dep != stmt.id
+ dep_key = (stmt.id, dep)
if dep_key not in dep_to_column:
col = dep_to_column[dep_key] = find_free_column()
columns_in_use[col] = (dep, "down")
row[col] = do_flag_downward(u"┌", "down")
for col, (end, updown) in list(six.iteritems(columns_in_use)):
- if insn.id == end:
+ if stmt.id == end:
del columns_in_use[col]
if updown == "up":
row[col] = u"â””"
@@ -1376,26 +1376,26 @@ def draw_dependencies_as_unicode_arrows(
# }}}
-# {{{ stringify_instruction_list
+# {{{ stringify_statement_list
-def stringify_instruction_list(kernel):
+def stringify_statement_list(kernel):
# {{{ topological sort
- printed_insn_ids = set()
- printed_insn_order = []
+ printed_stmt_ids = set()
+ printed_stmt_order = []
- def insert_insn_into_order(insn):
- if insn.id in printed_insn_ids:
+ def insert_stmt_into_order(stmt):
+ if stmt.id in printed_stmt_ids:
return
- printed_insn_ids.add(insn.id)
+ printed_stmt_ids.add(stmt.id)
- for dep_id in natsorted(insn.depends_on):
- insert_insn_into_order(kernel.id_to_insn[dep_id])
+ for dep_id in natsorted(stmt.depends_on):
+ insert_stmt_into_order(kernel.id_to_stmt[dep_id])
- printed_insn_order.append(insn)
+ printed_stmt_order.append(stmt)
- for insn in kernel.instructions:
- insert_insn_into_order(insn)
+ for stmt in kernel.statements:
+ insert_stmt_into_order(stmt)
# }}}
@@ -1406,7 +1406,7 @@ def stringify_instruction_list(kernel):
uniform_arrow_length, arrows_and_extenders = \
draw_dependencies_as_unicode_arrows(
- printed_insn_order, fore=Fore, style=Style)
+ printed_stmt_order, fore=Fore, style=Style)
leader = " " * uniform_arrow_length
lines = []
@@ -1457,51 +1457,51 @@ def stringify_instruction_list(kernel):
current_inames[0] = new_inames
- for insn, (arrows, extender) in zip(printed_insn_order, arrows_and_extenders):
- if isinstance(insn, lp.MultiAssignmentBase):
- lhs = ", ".join(str(a) for a in insn.assignees)
- rhs = str(insn.expression)
+ for stmt, (arrows, extender) in zip(printed_stmt_order, arrows_and_extenders):
+ if isinstance(stmt, lp.MultiAssignmentBase):
+ lhs = ", ".join(str(a) for a in stmt.assignees)
+ rhs = str(stmt.expression)
trailing = []
- elif isinstance(insn, lp.CInstruction):
- lhs = ", ".join(str(a) for a in insn.assignees)
+ elif isinstance(stmt, lp.CStatement):
+ lhs = ", ".join(str(a) for a in stmt.assignees)
rhs = "CODE(%s|%s)" % (
- ", ".join(str(x) for x in insn.read_variables),
+ ", ".join(str(x) for x in stmt.read_variables),
", ".join("%s=%s" % (name, expr)
- for name, expr in insn.iname_exprs))
+ for name, expr in stmt.iname_exprs))
- trailing = [l for l in insn.code.split("\n")]
- elif isinstance(insn, lp.BarrierInstruction):
+ trailing = [l for l in stmt.code.split("\n")]
+ elif isinstance(stmt, lp.BarrierStatement):
lhs = ""
- rhs = "... %sbarrier" % insn.kind[0]
+ rhs = "... %sbarrier" % stmt.kind[0]
trailing = []
- elif isinstance(insn, lp.NoOpInstruction):
+ elif isinstance(stmt, lp.NoOpStatement):
lhs = ""
rhs = "... nop"
trailing = []
else:
- raise LoopyError("unexpected instruction type: %s"
- % type(insn).__name__)
+ raise LoopyError("unexpected statement type: %s"
+ % type(stmt).__name__)
- adapt_to_new_inames_list(kernel.insn_inames(insn))
+ adapt_to_new_inames_list(kernel.stmt_inames(stmt))
- options = ["id="+Fore.GREEN+insn.id+Style.RESET_ALL]
- if insn.priority:
- options.append("priority=%d" % insn.priority)
- if insn.tags:
- options.append("tags=%s" % ":".join(insn.tags))
- if isinstance(insn, lp.Assignment) and insn.atomicity:
+ options = ["id="+Fore.GREEN+stmt.id+Style.RESET_ALL]
+ if stmt.priority:
+ options.append("priority=%d" % stmt.priority)
+ if stmt.tags:
+ options.append("tags=%s" % ":".join(stmt.tags))
+ if isinstance(stmt, lp.Assignment) and stmt.atomicity:
options.append("atomic=%s" % ":".join(
- str(a) for a in insn.atomicity))
- if insn.groups:
- options.append("groups=%s" % ":".join(insn.groups))
- if insn.conflicts_with_groups:
+ str(a) for a in stmt.atomicity))
+ if stmt.groups:
+ options.append("groups=%s" % ":".join(stmt.groups))
+ if stmt.conflicts_with_groups:
options.append(
- "conflicts=%s" % ":".join(insn.conflicts_with_groups))
- if insn.no_sync_with:
+ "conflicts=%s" % ":".join(stmt.conflicts_with_groups))
+ if stmt.no_sync_with:
options.append("no_sync_with=%s" % ":".join(
- "%s@%s" % entry for entry in sorted(insn.no_sync_with)))
+ "%s@%s" % entry for entry in sorted(stmt.no_sync_with)))
if lhs:
core = "%s = %s" % (
@@ -1513,9 +1513,9 @@ def stringify_instruction_list(kernel):
options_str = " {%s}" % ", ".join(options)
- if insn.predicates:
+ if stmt.predicates:
# FIXME: precedence
- add_pre_line("if %s" % " and ".join([str(x) for x in insn.predicates]))
+ add_pre_line("if %s" % " and ".join([str(x) for x in stmt.predicates]))
indent_level[0] += indent_increment
add_main_line(core + options_str)
@@ -1523,7 +1523,7 @@ def stringify_instruction_list(kernel):
for t in trailing:
add_post_line(t)
- if insn.predicates:
+ if stmt.predicates:
indent_level[0] -= indent_increment
add_post_line("end")
@@ -1540,21 +1540,21 @@ def stringify_instruction_list(kernel):
@memoize_on_first_arg
def get_global_barrier_order(kernel):
- """Return a :class:`tuple` of the listing the ids of global barrier instructions
+ """Return a :class:`tuple` of the listing the ids of global barrier statements
as they appear in order in the kernel.
- See also :class:`loopy.instruction.BarrierInstruction`.
+ See also :class:`loopy.statement.BarrierStatement`.
"""
barriers = []
visiting = set()
visited = set()
- unvisited = set(insn.id for insn in kernel.instructions)
+ unvisited = set(stmt.id for stmt in kernel.statements)
- def is_barrier(my_insn_id):
- insn = kernel.id_to_insn[my_insn_id]
- from loopy.kernel.instruction import BarrierInstruction
- return isinstance(insn, BarrierInstruction) and insn.kind == "global"
+ def is_barrier(my_stmt_id):
+ stmt = kernel.id_to_stmt[my_stmt_id]
+ from loopy.kernel.statement import BarrierStatement
+ return isinstance(stmt, BarrierStatement) and stmt.kind == "global"
while unvisited:
stack = [unvisited.pop()]
@@ -1574,7 +1574,7 @@ def get_global_barrier_order(kernel):
visited.add(top)
visiting.add(top)
- for child in kernel.id_to_insn[top].depends_on:
+ for child in kernel.id_to_stmt[top].depends_on:
# Check for no cycles.
assert child not in visiting
stack.append(child)
@@ -1610,7 +1610,7 @@ def get_global_barrier_order(kernel):
visiting.clear()
break
- for child in kernel.id_to_insn[top].depends_on:
+ for child in kernel.id_to_stmt[top].depends_on:
stack.append(child)
else:
# Search exhausted and we did not find prev_barrier.
@@ -1625,10 +1625,10 @@ def get_global_barrier_order(kernel):
# {{{ find most recent global barrier
@memoize_on_first_arg
-def find_most_recent_global_barrier(kernel, insn_id):
+def find_most_recent_global_barrier(kernel, stmt_id):
"""Return the id of the latest occuring global barrier which the
- given instruction (indirectly or directly) depends on, or *None* if this
- instruction does not depend on a global barrier.
+ given statement (indirectly or directly) depends on, or *None* if this
+ statement does not depend on a global barrier.
The return value is guaranteed to be unique because global barriers are
totally ordered within the kernel.
@@ -1639,15 +1639,15 @@ def find_most_recent_global_barrier(kernel, insn_id):
if len(global_barrier_order) == 0:
return None
- insn = kernel.id_to_insn[insn_id]
+ stmt = kernel.id_to_stmt[stmt_id]
- if len(insn.depends_on) == 0:
+ if len(stmt.depends_on) == 0:
return None
- def is_barrier(my_insn_id):
- insn = kernel.id_to_insn[my_insn_id]
- from loopy.kernel.instruction import BarrierInstruction
- return isinstance(insn, BarrierInstruction) and insn.kind == "global"
+ def is_barrier(my_stmt_id):
+ stmt = kernel.id_to_stmt[my_stmt_id]
+ from loopy.kernel.statement import BarrierStatement
+ return isinstance(stmt, BarrierStatement) and stmt.kind == "global"
global_barrier_to_ordinal = dict(
(b, i) for i, b in enumerate(global_barrier_order))
@@ -1658,13 +1658,13 @@ def find_most_recent_global_barrier(kernel, insn_id):
else -1)
direct_barrier_dependencies = set(
- dep for dep in insn.depends_on if is_barrier(dep))
+ dep for dep in stmt.depends_on if is_barrier(dep))
if len(direct_barrier_dependencies) > 0:
return max(direct_barrier_dependencies, key=get_barrier_ordinal)
else:
return max((find_most_recent_global_barrier(kernel, dep)
- for dep in insn.depends_on),
+ for dep in stmt.depends_on),
key=get_barrier_ordinal)
# }}}
@@ -1691,9 +1691,9 @@ def get_subkernels(kernel):
@memoize_on_first_arg
-def get_subkernel_to_insn_id_map(kernel):
+def get_subkernel_to_stmt_id_map(kernel):
"""Return a :class:`dict` mapping subkernel names to a :class:`frozenset`
- consisting of the instruction ids scheduled within the subkernel. The
+ consisting of the statement ids scheduled within the subkernel. The
kernel must be scheduled.
"""
from loopy.kernel import kernel_state
@@ -1701,7 +1701,7 @@ def get_subkernel_to_insn_id_map(kernel):
raise LoopyError("Kernel must be scheduled")
from loopy.schedule import (
- sched_item_to_insn_id, CallKernel, ReturnFromKernel)
+ sched_item_to_stmt_id, CallKernel, ReturnFromKernel)
subkernel = None
result = {}
@@ -1715,8 +1715,8 @@ def get_subkernel_to_insn_id_map(kernel):
subkernel = None
if subkernel is not None:
- for insn_id in sched_item_to_insn_id(sched_item):
- result[subkernel].add(insn_id)
+ for stmt_id in sched_item_to_stmt_id(sched_item):
+ result[subkernel].add(stmt_id)
for subkernel in result:
result[subkernel] = frozenset(result[subkernel])
diff --git a/loopy/loop.py b/loopy/loop.py
index 4592463822a2321745aaf48a316d16c98d4efca3..6daef3ac7040b8fefc6a47e94f725ecfa8898033 100644
--- a/loopy/loop.py
+++ b/loopy/loop.py
@@ -37,7 +37,7 @@ def potential_loop_nest_map(kernel):
result = {}
all_inames = kernel.all_inames()
- iname_to_insns = kernel.iname_to_insns()
+ iname_to_stmts = kernel.iname_to_stmts()
# examine pairs of all inames--O(n**2), I know.
for inner_iname in all_inames:
@@ -46,7 +46,7 @@ def potential_loop_nest_map(kernel):
if inner_iname == outer_iname:
continue
- if iname_to_insns[inner_iname] <= iname_to_insns[outer_iname]:
+ if iname_to_stmts[inner_iname] <= iname_to_stmts[outer_iname]:
inner_result.add(outer_iname)
if inner_result:
diff --git a/loopy/match.py b/loopy/match.py
index ab0038af8dc5e9189a382bb76115998f57aef74e..434eced40e2dd4df9137466debbf4870032d37a6 100644
--- a/loopy/match.py
+++ b/loopy/match.py
@@ -1,4 +1,4 @@
-"""Matching functionality for instruction ids and subsitution
+"""Matching functionality for statement ids and subsitution
rule invocations stacks."""
from __future__ import division, absolute_import
@@ -471,11 +471,11 @@ class StackMatch(object):
def __ne__(self, other):
return not self.__eq__(other)
- def __call__(self, kernel, insn, rule_stack):
+ def __call__(self, kernel, stmt, rule_stack):
"""
:arg rule_stack: a tuple of (name, tags) rule invocation, outermost first
"""
- stack_of_matchables = [insn]
+ stack_of_matchables = [stmt]
for id, tags in rule_stack:
stack_of_matchables.append(RuleInvocationMatchable(id, tags))
@@ -490,8 +490,8 @@ def parse_stack_match(smatch):
"""Syntax example::
... > outer > ... > next > innermost $
- insn > next
- insn > ... > next > innermost $
+ stmt > next
+ stmt > ... > next > innermost $
``...`` matches an arbitrary number of intervening stack levels.
diff --git a/loopy/maxima.py b/loopy/maxima.py
index 22d0c085c0edd5a96b6b45b457957f4be50e49d7..c9a9a42ed164bd3adb3d797ea13964492feeb085 100644
--- a/loopy/maxima.py
+++ b/loopy/maxima.py
@@ -42,7 +42,7 @@ class MaximaStringifyMapper(MaximaStringifyMapperBase):
return res
-def get_loopy_instructions_as_maxima(kernel, prefix):
+def get_loopy_statements_as_maxima(kernel, prefix):
"""Sample use for code comparison::
load("knl-optFalse.mac");
@@ -60,8 +60,8 @@ def get_loopy_instructions_as_maxima(kernel, prefix):
my_variable_names = (
avn
- for insn in kernel.instructions
- for avn in insn.assignee_var_names()
+ for stmt in kernel.statements
+ for avn in stmt.assignee_var_names()
)
from pymbolic import var
@@ -75,30 +75,30 @@ def get_loopy_instructions_as_maxima(kernel, prefix):
result = ["ratprint:false;"]
- written_insn_ids = set()
+ written_stmt_ids = set()
- from loopy.kernel import InstructionBase, Assignment
+ from loopy.kernel import StatementBase, Assignment
- def write_insn(insn):
- if not isinstance(insn, InstructionBase):
- insn = kernel.id_to_insn[insn]
- if not isinstance(insn, Assignment):
+ def write_stmt(stmt):
+ if not isinstance(stmt, StatementBase):
+ stmt = kernel.id_to_stmt[stmt]
+ if not isinstance(stmt, Assignment):
raise RuntimeError("non-single-output assignment not supported "
"in maxima export")
- for dep in insn.depends_on:
- if dep not in written_insn_ids:
- write_insn(dep)
+ for dep in stmt.depends_on:
+ if dep not in written_stmt_ids:
+ write_stmt(dep)
- aname, = insn.assignee_var_names()
+ aname, = stmt.assignee_var_names()
result.append("%s%s : %s;" % (
prefix, aname,
- mstr(substitute(insn.expression))))
+ mstr(substitute(stmt.expression))))
- written_insn_ids.add(insn.id)
+ written_stmt_ids.add(stmt.id)
- for insn in kernel.instructions:
- if insn.id not in written_insn_ids:
- write_insn(insn)
+ for stmt in kernel.statements:
+ if stmt.id not in written_stmt_ids:
+ write_stmt(stmt)
return "\n".join(result)
diff --git a/loopy/options.py b/loopy/options.py
index 25bb7014ce07a30c49f7f78d5a6325eaba36291d..451da8b7af7c75a25ce530071519e693e47ed082 100644
--- a/loopy/options.py
+++ b/loopy/options.py
@@ -82,7 +82,7 @@ class Options(ImmutableRecord):
.. attribute:: trace_assignments
Generate code that uses *printf* in kernels to trace the
- execution of assignment instructions.
+ execution of assignment statements.
.. attribute:: trace_assignment_values
@@ -98,7 +98,7 @@ class Options(ImmutableRecord):
.. attribute:: check_dep_resolution
Whether loopy should issue an error if a dependency
- expression does not match any instructions in the kernel.
+ expression does not match any statements in the kernel.
.. rubric:: Invocation-related options
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index ae70a0d6c07e6b922871c6293162321ea335f80a..35f405aa2548c950463b7e5f17215026e67968da 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -78,16 +78,16 @@ def prepare_for_caching(kernel):
def check_for_writes_to_predicates(kernel):
from loopy.symbolic import get_dependencies
- for insn in kernel.instructions:
+ for stmt in kernel.statements:
pred_vars = (
frozenset.union(
- *(get_dependencies(pred) for pred in insn.predicates))
- if insn.predicates else frozenset())
- written_pred_vars = frozenset(insn.assignee_var_names()) & pred_vars
+ *(get_dependencies(pred) for pred in stmt.predicates))
+ if stmt.predicates else frozenset())
+ written_pred_vars = frozenset(stmt.assignee_var_names()) & pred_vars
if written_pred_vars:
- raise LoopyError("In instruction '%s': may not write to "
- "variable(s) '%s' involved in the instruction's predicates"
- % (insn.id, ", ".join(written_pred_vars)))
+ raise LoopyError("In statement '%s': may not write to "
+ "variable(s) '%s' involved in the statement's predicates"
+ % (stmt.id, ", ".join(written_pred_vars)))
# }}}
@@ -113,8 +113,8 @@ def check_reduction_iname_uniqueness(kernel):
from loopy.symbolic import ReductionCallbackMapper
cb_mapper = ReductionCallbackMapper(map_reduction)
- for insn in kernel.instructions:
- insn.with_transformed_expressions(cb_mapper)
+ for stmt in kernel.statements:
+ stmt.with_transformed_expressions(cb_mapper)
for iname, count in six.iteritems(iname_to_reduction_count):
nonsimul_count = iname_to_nonsimultaneous_reduction_count.get(iname, 0)
@@ -134,17 +134,17 @@ def check_reduction_iname_uniqueness(kernel):
# {{{ decide temporary scope
-def _get_compute_inames_tagged(kernel, insn, tag_base):
+def _get_compute_inames_tagged(kernel, stmt, tag_base):
return set(iname
- for iname in kernel.insn_inames(insn.id)
+ for iname in kernel.stmt_inames(stmt.id)
if isinstance(kernel.iname_to_tag.get(iname), tag_base))
-def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names):
+def _get_assignee_inames_tagged(kernel, stmt, tag_base, tv_names):
return set(iname
for aname, adeps in zip(
- insn.assignee_var_names(),
- insn.assignee_subscript_deps())
+ stmt.assignee_var_names(),
+ stmt.assignee_subscript_deps())
for iname in adeps & kernel.all_inames()
if aname in tv_names
if isinstance(kernel.iname_to_tag.get(iname), tag_base))
@@ -190,28 +190,28 @@ def find_temporary_scope(kernel):
for alias in base_storage_to_aliases.get(temp_var.base_storage, []):
my_writers = my_writers | writers.get(alias, frozenset())
- desired_scope_per_insn = []
- for insn_id in my_writers:
- insn = kernel.id_to_insn[insn_id]
+ desired_scope_per_stmt = []
+ for stmt_id in my_writers:
+ stmt = kernel.id_to_stmt[stmt_id]
# A write race will emerge if:
#
# - the variable is local
# and
- # - the instruction is run across more inames (locally) parallel
+ # - the statement is run across more inames (locally) parallel
# than are reflected in the assignee indices.
locparallel_compute_inames = _get_compute_inames_tagged(
- kernel, insn, LocalIndexTagBase)
+ kernel, stmt, LocalIndexTagBase)
locparallel_assignee_inames = _get_assignee_inames_tagged(
- kernel, insn, LocalIndexTagBase, tv_names)
+ kernel, stmt, LocalIndexTagBase, tv_names)
grpparallel_compute_inames = _get_compute_inames_tagged(
- kernel, insn, GroupIndexTag)
+ kernel, stmt, GroupIndexTag)
grpparallel_assignee_inames = _get_assignee_inames_tagged(
- kernel, insn, GroupIndexTag, temp_var.name)
+ kernel, stmt, GroupIndexTag, temp_var.name)
assert locparallel_assignee_inames <= locparallel_compute_inames
assert grpparallel_assignee_inames <= grpparallel_compute_inames
@@ -227,13 +227,13 @@ def find_temporary_scope(kernel):
if (apin != cpin and bool(apin)):
warn_with_kernel(
kernel,
- "write_race_%s(%s)" % (scope_descr, insn_id),
- "instruction '%s' looks invalid: "
+ "write_race_%s(%s)" % (scope_descr, stmt_id),
+ "statement '%s' looks invalid: "
"it assigns to indices based on %s IDs, but "
"its temporary '%s' cannot be made %s because "
"a write race across the iname(s) '%s' would emerge. "
"(Do you need to add an extra iname to your prefetch?)"
- % (insn_id, iname_descr, temp_var.name, scope_descr,
+ % (stmt_id, iname_descr, temp_var.name, scope_descr,
", ".join(cpin - apin)),
WriteRaceConditionWarning)
@@ -244,9 +244,9 @@ def find_temporary_scope(kernel):
and bool(cpin)):
desired_scope = max(desired_scope, scope)
- desired_scope_per_insn.append(desired_scope)
+ desired_scope_per_stmt.append(desired_scope)
- if not desired_scope_per_insn:
+ if not desired_scope_per_stmt:
if temp_var.initializer is None:
warn_with_kernel(kernel, "temp_to_write(%s)" % temp_var.name,
"temporary variable '%s' never written, eliminating"
@@ -258,11 +258,11 @@ def find_temporary_scope(kernel):
continue
- overall_scope = max(desired_scope_per_insn)
+ overall_scope = max(desired_scope_per_stmt)
from pytools import all
- if not all(iscope == overall_scope for iscope in desired_scope_per_insn):
- raise LoopyError("not all instructions agree on the "
+ if not all(iscope == overall_scope for iscope in desired_scope_per_stmt):
+ raise LoopyError("not all statements agree on the "
"the desired scope (private/local/global) of the "
"temporary '%s'" % temp_var.name)
@@ -712,59 +712,59 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel):
be the case by introducing temporary assignments into the kernel.
"""
- insn_id_gen = kernel.get_instruction_id_generator()
+ stmt_id_gen = kernel.get_statement_id_generator()
var_name_gen = kernel.get_var_name_generator()
- new_or_updated_instructions = {}
+ new_or_updated_statements = {}
new_temporaries = {}
dep_map = dict(
- (insn.id, insn.depends_on) for insn in kernel.instructions)
+ (stmt.id, stmt.depends_on) for stmt in kernel.statements)
- inverse_dep_map = dict((insn.id, set()) for insn in kernel.instructions)
+ inverse_dep_map = dict((stmt.id, set()) for stmt in kernel.statements)
import six
- for insn_id, deps in six.iteritems(dep_map):
+ for stmt_id, deps in six.iteritems(dep_map):
for dep in deps:
- inverse_dep_map[dep].add(insn_id)
+ inverse_dep_map[dep].add(stmt_id)
del dep_map
# {{{ utils
- def _add_to_no_sync_with(insn_id, new_no_sync_with_params):
- insn = kernel.id_to_insn.get(insn_id)
- insn = new_or_updated_instructions.get(insn_id, insn)
- new_or_updated_instructions[insn_id] = (
- insn.copy(
+ def _add_to_no_sync_with(stmt_id, new_no_sync_with_params):
+ stmt = kernel.id_to_stmt.get(stmt_id)
+ stmt = new_or_updated_statements.get(stmt_id, stmt)
+ new_or_updated_statements[stmt_id] = (
+ stmt.copy(
no_sync_with=(
- insn.no_sync_with | frozenset(new_no_sync_with_params))))
+ stmt.no_sync_with | frozenset(new_no_sync_with_params))))
- def _add_to_depends_on(insn_id, new_depends_on_params):
- insn = kernel.id_to_insn.get(insn_id)
- insn = new_or_updated_instructions.get(insn_id, insn)
- new_or_updated_instructions[insn_id] = (
- insn.copy(
- depends_on=insn.depends_on | frozenset(new_depends_on_params)))
+ def _add_to_depends_on(stmt_id, new_depends_on_params):
+ stmt = kernel.id_to_stmt.get(stmt_id)
+ stmt = new_or_updated_statements.get(stmt_id, stmt)
+ new_or_updated_statements[stmt_id] = (
+ stmt.copy(
+ depends_on=stmt.depends_on | frozenset(new_depends_on_params)))
# }}}
- from loopy.kernel.instruction import CallInstruction
- for insn in kernel.instructions:
- if not isinstance(insn, CallInstruction):
+ from loopy.kernel.statement import CallStatement
+ for stmt in kernel.statements:
+ if not isinstance(stmt, CallStatement):
continue
- if len(insn.assignees) <= 1:
+ if len(stmt.assignees) <= 1:
continue
- assignees = insn.assignees
- assignee_var_names = insn.assignee_var_names()
+ assignees = stmt.assignees
+ assignee_var_names = stmt.assignee_var_names()
new_assignees = [assignees[0]]
newly_added_assignments_ids = set()
needs_replacement = False
- last_added_insn_id = insn.id
+ last_added_stmt_id = stmt.id
from loopy.kernel.data import temp_var_scope, TemporaryVariable
@@ -785,15 +785,15 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel):
needs_replacement = True
- # {{{ generate a new assignent instruction
+ # {{{ generate a new assignent statement
new_assignee_name = var_name_gen(
- "{insn_id}_retval_{assignee_nr}"
- .format(insn_id=insn.id, assignee_nr=assignee_nr))
+ "{stmt_id}_retval_{assignee_nr}"
+ .format(stmt_id=stmt.id, assignee_nr=assignee_nr))
- new_assignment_id = insn_id_gen(
- "{insn_id}_assign_retval_{assignee_nr}"
- .format(insn_id=insn.id, assignee_nr=assignee_nr))
+ new_assignment_id = stmt_id_gen(
+ "{stmt_id}_assign_retval_{assignee_nr}"
+ .format(stmt_id=stmt.id, assignee_nr=assignee_nr))
newly_added_assignments_ids.add(new_assignment_id)
@@ -808,55 +808,55 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel):
new_assignee = var(new_assignee_name)
new_assignees.append(new_assignee)
- new_or_updated_instructions[new_assignment_id] = (
+ new_or_updated_statements[new_assignment_id] = (
make_assignment(
assignees=(assignee,),
expression=new_assignee,
id=new_assignment_id,
- depends_on=frozenset([last_added_insn_id]),
+ depends_on=frozenset([last_added_stmt_id]),
depends_on_is_final=True,
no_sync_with=(
- insn.no_sync_with | frozenset([(insn.id, "any")])),
- predicates=insn.predicates,
- within_inames=insn.within_inames))
+ stmt.no_sync_with | frozenset([(stmt.id, "any")])),
+ predicates=stmt.predicates,
+ within_inames=stmt.within_inames))
- last_added_insn_id = new_assignment_id
+ last_added_stmt_id = new_assignment_id
# }}}
if not needs_replacement:
continue
- # {{{ update originating instruction
+ # {{{ update originating statement
- orig_insn = new_or_updated_instructions.get(insn.id, insn)
+ orig_stmt = new_or_updated_statements.get(stmt.id, stmt)
- new_or_updated_instructions[insn.id] = (
- orig_insn.copy(assignees=tuple(new_assignees)))
+ new_or_updated_statements[stmt.id] = (
+ orig_stmt.copy(assignees=tuple(new_assignees)))
- _add_to_no_sync_with(insn.id,
+ _add_to_no_sync_with(stmt.id,
[(id, "any") for id in newly_added_assignments_ids])
# }}}
# {{{ squash spurious memory dependencies amongst new assignments
- for new_insn_id in newly_added_assignments_ids:
- _add_to_no_sync_with(new_insn_id,
+ for new_stmt_id in newly_added_assignments_ids:
+ _add_to_no_sync_with(new_stmt_id,
[(id, "any")
for id in newly_added_assignments_ids
- if id != new_insn_id])
+ if id != new_stmt_id])
# }}}
- # {{{ update instructions that depend on the originating instruction
+ # {{{ update statements that depend on the originating statement
- for inverse_dep in inverse_dep_map[insn.id]:
+ for inverse_dep in inverse_dep_map[stmt.id]:
_add_to_depends_on(inverse_dep, newly_added_assignments_ids)
- for insn_id, scope in (
- new_or_updated_instructions[inverse_dep].no_sync_with):
- if insn_id == insn.id:
+ for stmt_id, scope in (
+ new_or_updated_statements[inverse_dep].no_sync_with):
+ if stmt_id == stmt.id:
_add_to_no_sync_with(
inverse_dep,
[(id, scope) for id in newly_added_assignments_ids])
@@ -866,14 +866,14 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel):
new_temporary_variables = kernel.temporary_variables.copy()
new_temporary_variables.update(new_temporaries)
- new_instructions = (
- list(new_or_updated_instructions.values())
- + list(insn
- for insn in kernel.instructions
- if insn.id not in new_or_updated_instructions))
+ new_statements = (
+ list(new_or_updated_statements.values())
+ + list(stmt
+ for stmt in kernel.statements
+ if stmt.id not in new_or_updated_statements))
return kernel.copy(temporary_variables=new_temporary_variables,
- instructions=new_instructions)
+ statements=new_statements)
def _insert_subdomain_into_domain_tree(kernel, domains, subdomain):
@@ -888,18 +888,18 @@ def _insert_subdomain_into_domain_tree(kernel, domains, subdomain):
# }}}
-def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
+def realize_reduction(kernel, stmt_id_filter=None, unknown_types_ok=True,
automagic_scans_ok=False, force_scan=False,
force_outer_iname_for_scan=None):
- """Rewrites reductions into their imperative form. With *insn_id_filter*
- specified, operate only on the instruction with an instruction id matching
- *insn_id_filter*.
+ """Rewrites reductions into their imperative form. With *stmt_id_filter*
+ specified, operate only on the statement with an statement id matching
+ *stmt_id_filter*.
- If *insn_id_filter* is given, only the outermost level of reductions will be
+ If *stmt_id_filter* is given, only the outermost level of reductions will be
expanded, inner reductions will be left alone (because they end up in a new
- instruction with a different ID, which doesn't match the filter).
+ statement with a different ID, which doesn't match the filter).
- If *insn_id_filter* is not given, all reductions in all instructions will
+ If *stmt_id_filter* is not given, all reductions in all statements will
be realized.
If *automagic_scans_ok*, this function will attempt to rewrite triangular
@@ -907,7 +907,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
If *force_scan* is *True*, this function will attempt to rewrite *all*
candidate reductions as scans and raise an error if this is not possible
- (this is most useful combined with *insn_id_filter*).
+ (this is most useful combined with *stmt_id_filter*).
If *force_outer_iname_for_scan* is not *None*, this function will attempt
to realize candidate reductions as scans using the specified iname as the
@@ -916,10 +916,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
logger.debug("%s: realize reduction" % kernel.name)
- new_insns = []
+ new_stmts = []
new_iname_tags = {}
- insn_id_gen = kernel.get_instruction_id_generator()
+ stmt_id_gen = kernel.get_statement_id_generator()
var_name_gen = kernel.get_var_name_generator()
new_temporary_variables = kernel.temporary_variables.copy()
@@ -935,8 +935,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
return val
def preprocess_scan_arguments(
- insn, expr, nresults, scan_iname, track_iname,
- newly_generated_insn_id_set):
+ stmt, expr, nresults, scan_iname, track_iname,
+ newly_generated_stmt_id_set):
"""Does iname substitution within scan arguments and returns a set of values
suitable to be passed to the binary op. Returns a tuple."""
@@ -947,20 +947,20 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
# the arguments in order to pass them to the binary op - so we expand
# items that are not "plain" tuples here.
if not isinstance(inner_expr, tuple):
- get_args_insn_id = insn_id_gen(
- "%s_%s_get" % (insn.id, "_".join(expr.inames)))
+ get_args_stmt_id = stmt_id_gen(
+ "%s_%s_get" % (stmt.id, "_".join(expr.inames)))
inner_expr = expand_inner_reduction(
- id=get_args_insn_id,
+ id=get_args_stmt_id,
expr=inner_expr,
nresults=nresults,
- depends_on=insn.depends_on,
- within_inames=insn.within_inames | expr.inames,
- within_inames_is_final=insn.within_inames_is_final,
- predicates=insn.predicates,
+ depends_on=stmt.depends_on,
+ within_inames=stmt.within_inames | expr.inames,
+ within_inames_is_final=stmt.within_inames_is_final,
+ predicates=stmt.predicates,
)
- newly_generated_insn_id_set.add(get_args_insn_id)
+ newly_generated_stmt_id_set.add(get_args_stmt_id)
updated_inner_exprs = tuple(
replace_var_within_expr(sub_expr, scan_iname, track_iname)
@@ -993,7 +993,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
from pymbolic import var
temp_vars = tuple(var(n) for n in temp_var_names)
- call_insn = make_assignment(
+ call_stmt = make_assignment(
id=id,
assignees=temp_vars,
expression=expr,
@@ -1002,7 +1002,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
within_inames_is_final=within_inames_is_final,
predicates=predicates)
- generated_insns.append(call_insn)
+ generated_stmts.append(call_stmt)
return temp_vars
@@ -1012,7 +1012,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
def map_reduction_seq(expr, rec, nresults, arg_dtypes,
reduction_dtypes):
- outer_insn_inames = temp_kernel.insn_inames(insn)
+ outer_stmt_inames = temp_kernel.stmt_inames(stmt)
from loopy.kernel.data import temp_var_scope
acc_var_names = make_temporaries(
@@ -1022,75 +1022,75 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
dtypes=reduction_dtypes,
scope=temp_var_scope.PRIVATE)
- init_insn_depends_on = frozenset()
+ init_stmt_depends_on = frozenset()
- global_barrier = lp.find_most_recent_global_barrier(temp_kernel, insn.id)
+ global_barrier = lp.find_most_recent_global_barrier(temp_kernel, stmt.id)
if global_barrier is not None:
- init_insn_depends_on |= frozenset([global_barrier])
+ init_stmt_depends_on |= frozenset([global_barrier])
from pymbolic import var
acc_vars = tuple(var(n) for n in acc_var_names)
- init_id = insn_id_gen(
- "%s_%s_init" % (insn.id, "_".join(expr.inames)))
+ init_id = stmt_id_gen(
+ "%s_%s_init" % (stmt.id, "_".join(expr.inames)))
- init_insn = make_assignment(
+ init_stmt = make_assignment(
id=init_id,
assignees=acc_vars,
- within_inames=outer_insn_inames - frozenset(expr.inames),
- within_inames_is_final=insn.within_inames_is_final,
- depends_on=init_insn_depends_on,
+ within_inames=outer_stmt_inames - frozenset(expr.inames),
+ within_inames_is_final=stmt.within_inames_is_final,
+ depends_on=init_stmt_depends_on,
expression=expr.operation.neutral_element(*arg_dtypes),
- predicates=insn.predicates,)
+ predicates=stmt.predicates,)
- generated_insns.append(init_insn)
+ generated_stmts.append(init_stmt)
- update_id = insn_id_gen(
- based_on="%s_%s_update" % (insn.id, "_".join(expr.inames)))
+ update_id = stmt_id_gen(
+ based_on="%s_%s_update" % (stmt.id, "_".join(expr.inames)))
- update_insn_iname_deps = temp_kernel.insn_inames(insn) | set(expr.inames)
- if insn.within_inames_is_final:
- update_insn_iname_deps = insn.within_inames | set(expr.inames)
+ update_stmt_iname_deps = temp_kernel.stmt_inames(stmt) | set(expr.inames)
+ if stmt.within_inames_is_final:
+ update_stmt_iname_deps = stmt.within_inames | set(expr.inames)
- reduction_insn_depends_on = set([init_id])
+ reduction_stmt_depends_on = set([init_id])
# In the case of a multi-argument reduction, we need a name for each of
# the arguments in order to pass them to the binary op - so we expand
# items that are not "plain" tuples here.
if nresults > 1 and not isinstance(expr.expr, tuple):
- get_args_insn_id = insn_id_gen(
- "%s_%s_get" % (insn.id, "_".join(expr.inames)))
+ get_args_stmt_id = stmt_id_gen(
+ "%s_%s_get" % (stmt.id, "_".join(expr.inames)))
reduction_expr = expand_inner_reduction(
- id=get_args_insn_id,
+ id=get_args_stmt_id,
expr=expr.expr,
nresults=nresults,
- depends_on=insn.depends_on,
- within_inames=update_insn_iname_deps,
- within_inames_is_final=insn.within_inames_is_final,
- predicates=insn.predicates,
+ depends_on=stmt.depends_on,
+ within_inames=update_stmt_iname_deps,
+ within_inames_is_final=stmt.within_inames_is_final,
+ predicates=stmt.predicates,
)
- reduction_insn_depends_on.add(get_args_insn_id)
+ reduction_stmt_depends_on.add(get_args_stmt_id)
else:
reduction_expr = expr.expr
- reduction_insn = make_assignment(
+ reduction_stmt = make_assignment(
id=update_id,
assignees=acc_vars,
expression=expr.operation(
arg_dtypes,
_strip_if_scalar(acc_vars, acc_vars),
reduction_expr),
- depends_on=frozenset(reduction_insn_depends_on) | insn.depends_on,
- within_inames=update_insn_iname_deps,
- within_inames_is_final=insn.within_inames_is_final,
- predicates=insn.predicates,)
+ depends_on=frozenset(reduction_stmt_depends_on) | stmt.depends_on,
+ within_inames=update_stmt_iname_deps,
+ within_inames_is_final=stmt.within_inames_is_final,
+ predicates=stmt.predicates,)
- generated_insns.append(reduction_insn)
+ generated_stmts.append(reduction_stmt)
- new_insn_add_depends_on.add(reduction_insn.id)
+ new_stmt_add_depends_on.add(reduction_stmt.id)
if nresults == 1:
assert len(acc_vars) == 1
@@ -1134,12 +1134,12 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
size = _get_int_iname_size(red_iname)
- outer_insn_inames = temp_kernel.insn_inames(insn)
+ outer_stmt_inames = temp_kernel.stmt_inames(stmt)
from loopy.kernel.data import LocalIndexTagBase
outer_local_inames = tuple(
oiname
- for oiname in outer_insn_inames
+ for oiname in outer_stmt_inames
if isinstance(
kernel.iname_to_tag.get(oiname),
LocalIndexTagBase))
@@ -1181,34 +1181,34 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
# }}}
- base_iname_deps = outer_insn_inames - frozenset(expr.inames)
+ base_iname_deps = outer_stmt_inames - frozenset(expr.inames)
neutral = expr.operation.neutral_element(*arg_dtypes)
- init_id = insn_id_gen("%s_%s_init" % (insn.id, red_iname))
- init_insn = make_assignment(
+ init_id = stmt_id_gen("%s_%s_init" % (stmt.id, red_iname))
+ init_stmt = make_assignment(
id=init_id,
assignees=tuple(
acc_var[outer_local_iname_vars + (var(base_exec_iname),)]
for acc_var in acc_vars),
expression=neutral,
within_inames=base_iname_deps | frozenset([base_exec_iname]),
- within_inames_is_final=insn.within_inames_is_final,
+ within_inames_is_final=stmt.within_inames_is_final,
depends_on=frozenset(),
- predicates=insn.predicates,
+ predicates=stmt.predicates,
)
- generated_insns.append(init_insn)
+ generated_stmts.append(init_stmt)
- init_neutral_id = insn_id_gen("%s_%s_init_neutral" % (insn.id, red_iname))
- init_neutral_insn = make_assignment(
+ init_neutral_id = stmt_id_gen("%s_%s_init_neutral" % (stmt.id, red_iname))
+ init_neutral_stmt = make_assignment(
id=init_neutral_id,
assignees=tuple(var(nvn) for nvn in neutral_var_names),
expression=neutral,
within_inames=base_iname_deps | frozenset([base_exec_iname]),
- within_inames_is_final=insn.within_inames_is_final,
+ within_inames_is_final=stmt.within_inames_is_final,
depends_on=frozenset(),
- predicates=insn.predicates,
+ predicates=stmt.predicates,
)
- generated_insns.append(init_neutral_insn)
+ generated_stmts.append(init_neutral_stmt)
transfer_depends_on = set([init_neutral_id, init_id])
@@ -1216,27 +1216,27 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
# the arguments in order to pass them to the binary op - so we expand
# items that are not "plain" tuples here.
if nresults > 1 and not isinstance(expr.expr, tuple):
- get_args_insn_id = insn_id_gen(
- "%s_%s_get" % (insn.id, red_iname))
+ get_args_stmt_id = stmt_id_gen(
+ "%s_%s_get" % (stmt.id, red_iname))
reduction_expr = expand_inner_reduction(
- id=get_args_insn_id,
+ id=get_args_stmt_id,
expr=expr.expr,
nresults=nresults,
- depends_on=insn.depends_on,
+ depends_on=stmt.depends_on,
within_inames=(
- (outer_insn_inames - frozenset(expr.inames))
+ (outer_stmt_inames - frozenset(expr.inames))
| frozenset([red_iname])),
- within_inames_is_final=insn.within_inames_is_final,
- predicates=insn.predicates,
+ within_inames_is_final=stmt.within_inames_is_final,
+ predicates=stmt.predicates,
)
- transfer_depends_on.add(get_args_insn_id)
+ transfer_depends_on.add(get_args_stmt_id)
else:
reduction_expr = expr.expr
- transfer_id = insn_id_gen("%s_%s_transfer" % (insn.id, red_iname))
- transfer_insn = make_assignment(
+ transfer_id = stmt_id_gen("%s_%s_transfer" % (stmt.id, red_iname))
+ transfer_stmt = make_assignment(
id=transfer_id,
assignees=tuple(
acc_var[outer_local_iname_vars + (var(red_iname),)]
@@ -1248,14 +1248,14 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
tuple(var(nvn) for nvn in neutral_var_names)),
reduction_expr),
within_inames=(
- (outer_insn_inames - frozenset(expr.inames))
+ (outer_stmt_inames - frozenset(expr.inames))
| frozenset([red_iname])),
- within_inames_is_final=insn.within_inames_is_final,
- depends_on=frozenset([init_id, init_neutral_id]) | insn.depends_on,
+ within_inames_is_final=stmt.within_inames_is_final,
+ depends_on=frozenset([init_id, init_neutral_id]) | stmt.depends_on,
no_sync_with=frozenset([(init_id, "any")]),
- predicates=insn.predicates,
+ predicates=stmt.predicates,
)
- generated_insns.append(transfer_insn)
+ generated_stmts.append(transfer_stmt)
cur_size = 1
while cur_size < size:
@@ -1274,8 +1274,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
domains.append(_make_slab_set(stage_exec_iname, bound-new_size))
new_iname_tags[stage_exec_iname] = kernel.iname_to_tag[red_iname]
- stage_id = insn_id_gen("red_%s_stage_%d" % (red_iname, istage))
- stage_insn = make_assignment(
+ stage_id = stmt_id_gen("red_%s_stage_%d" % (red_iname, istage))
+ stage_stmt = make_assignment(
id=stage_id,
assignees=tuple(
acc_var[outer_local_iname_vars + (var(stage_exec_iname),)]
@@ -1293,21 +1293,21 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
for acc_var in acc_vars))),
within_inames=(
base_iname_deps | frozenset([stage_exec_iname])),
- within_inames_is_final=insn.within_inames_is_final,
+ within_inames_is_final=stmt.within_inames_is_final,
depends_on=frozenset([prev_id]),
- predicates=insn.predicates,
+ predicates=stmt.predicates,
)
- generated_insns.append(stage_insn)
+ generated_stmts.append(stage_stmt)
prev_id = stage_id
cur_size = new_size
bound = cur_size
istage += 1
- new_insn_add_depends_on.add(prev_id)
- new_insn_add_no_sync_with.add((prev_id, "any"))
- new_insn_add_within_inames.add(base_exec_iname or stage_exec_iname)
+ new_stmt_add_depends_on.add(prev_id)
+ new_stmt_add_no_sync_with.add((prev_id, "any"))
+ new_stmt_add_within_inames.add(base_exec_iname or stage_exec_iname)
if nresults == 1:
assert len(acc_vars) == 1
@@ -1375,7 +1375,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
def map_scan_seq(expr, rec, nresults, arg_dtypes,
reduction_dtypes, sweep_iname, scan_iname, sweep_min_value,
scan_min_value, stride):
- outer_insn_inames = temp_kernel.insn_inames(insn)
+ outer_stmt_inames = temp_kernel.stmt_inames(stmt)
inames_to_remove.add(scan_iname)
track_iname = var_name_gen(
@@ -1397,59 +1397,59 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
from pymbolic import var
acc_vars = tuple(var(n) for n in acc_var_names)
- init_id = insn_id_gen(
- "%s_%s_init" % (insn.id, "_".join(expr.inames)))
+ init_id = stmt_id_gen(
+ "%s_%s_init" % (stmt.id, "_".join(expr.inames)))
- init_insn_depends_on = frozenset()
+ init_stmt_depends_on = frozenset()
- global_barrier = lp.find_most_recent_global_barrier(temp_kernel, insn.id)
+ global_barrier = lp.find_most_recent_global_barrier(temp_kernel, stmt.id)
if global_barrier is not None:
- init_insn_depends_on |= frozenset([global_barrier])
+ init_stmt_depends_on |= frozenset([global_barrier])
- init_insn = make_assignment(
+ init_stmt = make_assignment(
id=init_id,
assignees=acc_vars,
- within_inames=outer_insn_inames - frozenset(
+ within_inames=outer_stmt_inames - frozenset(
(sweep_iname,) + expr.inames),
- within_inames_is_final=insn.within_inames_is_final,
- depends_on=init_insn_depends_on,
+ within_inames_is_final=stmt.within_inames_is_final,
+ depends_on=init_stmt_depends_on,
expression=expr.operation.neutral_element(*arg_dtypes),
- predicates=insn.predicates,
+ predicates=stmt.predicates,
)
- generated_insns.append(init_insn)
+ generated_stmts.append(init_stmt)
- update_insn_depends_on = set([init_insn.id]) | insn.depends_on
+ update_stmt_depends_on = set([init_stmt.id]) | stmt.depends_on
updated_inner_exprs = (
- preprocess_scan_arguments(insn, expr.expr, nresults,
- scan_iname, track_iname, update_insn_depends_on))
+ preprocess_scan_arguments(stmt, expr.expr, nresults,
+ scan_iname, track_iname, update_stmt_depends_on))
- update_id = insn_id_gen(
- based_on="%s_%s_update" % (insn.id, "_".join(expr.inames)))
+ update_id = stmt_id_gen(
+ based_on="%s_%s_update" % (stmt.id, "_".join(expr.inames)))
- update_insn_iname_deps = temp_kernel.insn_inames(insn) | set([track_iname])
- if insn.within_inames_is_final:
- update_insn_iname_deps = insn.within_inames | set([track_iname])
+ update_stmt_iname_deps = temp_kernel.stmt_inames(stmt) | set([track_iname])
+ if stmt.within_inames_is_final:
+ update_stmt_iname_deps = stmt.within_inames | set([track_iname])
- scan_insn = make_assignment(
+ scan_stmt = make_assignment(
id=update_id,
assignees=acc_vars,
expression=expr.operation(
arg_dtypes,
_strip_if_scalar(acc_vars, acc_vars),
_strip_if_scalar(acc_vars, updated_inner_exprs)),
- depends_on=frozenset(update_insn_depends_on),
- within_inames=update_insn_iname_deps,
- no_sync_with=insn.no_sync_with,
- within_inames_is_final=insn.within_inames_is_final,
- predicates=insn.predicates,
+ depends_on=frozenset(update_stmt_depends_on),
+ within_inames=update_stmt_iname_deps,
+ no_sync_with=stmt.no_sync_with,
+ within_inames_is_final=stmt.within_inames_is_final,
+ predicates=stmt.predicates,
)
- generated_insns.append(scan_insn)
+ generated_stmts.append(scan_stmt)
- new_insn_add_depends_on.add(scan_insn.id)
+ new_stmt_add_depends_on.add(scan_stmt.id)
if nresults == 1:
assert len(acc_vars) == 1
@@ -1473,12 +1473,12 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
return map_reduction_seq(
expr, rec, nresults, arg_dtypes, reduction_dtypes)
- outer_insn_inames = temp_kernel.insn_inames(insn)
+ outer_stmt_inames = temp_kernel.stmt_inames(stmt)
from loopy.kernel.data import LocalIndexTagBase
outer_local_inames = tuple(
oiname
- for oiname in outer_insn_inames
+ for oiname in outer_stmt_inames
if isinstance(
kernel.iname_to_tag.get(oiname),
LocalIndexTagBase)
@@ -1530,45 +1530,45 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
acc_vars = tuple(var(n) for n in acc_var_names)
read_vars = tuple(var(n) for n in read_var_names)
- base_iname_deps = (outer_insn_inames
+ base_iname_deps = (outer_stmt_inames
- frozenset(expr.inames) - frozenset([sweep_iname]))
neutral = expr.operation.neutral_element(*arg_dtypes)
- init_insn_depends_on = insn.depends_on
+ init_stmt_depends_on = stmt.depends_on
- global_barrier = lp.find_most_recent_global_barrier(temp_kernel, insn.id)
+ global_barrier = lp.find_most_recent_global_barrier(temp_kernel, stmt.id)
if global_barrier is not None:
- init_insn_depends_on |= frozenset([global_barrier])
+ init_stmt_depends_on |= frozenset([global_barrier])
- init_id = insn_id_gen("%s_%s_init" % (insn.id, scan_iname))
- init_insn = make_assignment(
+ init_id = stmt_id_gen("%s_%s_init" % (stmt.id, scan_iname))
+ init_stmt = make_assignment(
id=init_id,
assignees=tuple(
acc_var[outer_local_iname_vars + (var(base_exec_iname),)]
for acc_var in acc_vars),
expression=neutral,
within_inames=base_iname_deps | frozenset([base_exec_iname]),
- within_inames_is_final=insn.within_inames_is_final,
- depends_on=init_insn_depends_on,
- predicates=insn.predicates,
+ within_inames_is_final=stmt.within_inames_is_final,
+ depends_on=init_stmt_depends_on,
+ predicates=stmt.predicates,
)
- generated_insns.append(init_insn)
+ generated_stmts.append(init_stmt)
- transfer_insn_depends_on = set([init_insn.id]) | insn.depends_on
+ transfer_stmt_depends_on = set([init_stmt.id]) | stmt.depends_on
updated_inner_exprs = (
- preprocess_scan_arguments(insn, expr.expr, nresults,
- scan_iname, track_iname, transfer_insn_depends_on))
+ preprocess_scan_arguments(stmt, expr.expr, nresults,
+ scan_iname, track_iname, transfer_stmt_depends_on))
from loopy.symbolic import Reduction
from loopy.symbolic import pw_aff_to_expr
sweep_min_value_expr = pw_aff_to_expr(sweep_min_value)
- transfer_id = insn_id_gen("%s_%s_transfer" % (insn.id, scan_iname))
- transfer_insn = make_assignment(
+ transfer_id = stmt_id_gen("%s_%s_transfer" % (stmt.id, scan_iname))
+ transfer_stmt = make_assignment(
id=transfer_id,
assignees=tuple(
acc_var[outer_local_iname_vars
@@ -1580,14 +1580,14 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
expr=_strip_if_scalar(acc_vars, updated_inner_exprs),
allow_simultaneous=False,
),
- within_inames=outer_insn_inames - frozenset(expr.inames),
- within_inames_is_final=insn.within_inames_is_final,
- depends_on=frozenset(transfer_insn_depends_on),
- no_sync_with=frozenset([(init_id, "any")]) | insn.no_sync_with,
- predicates=insn.predicates,
+ within_inames=outer_stmt_inames - frozenset(expr.inames),
+ within_inames_is_final=stmt.within_inames_is_final,
+ depends_on=frozenset(transfer_stmt_depends_on),
+ no_sync_with=frozenset([(init_id, "any")]) | stmt.no_sync_with,
+ predicates=stmt.predicates,
)
- generated_insns.append(transfer_insn)
+ generated_stmts.append(transfer_stmt)
prev_id = transfer_id
@@ -1601,10 +1601,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
new_iname_tags[stage_exec_iname] = kernel.iname_to_tag[sweep_iname]
for read_var, acc_var in zip(read_vars, acc_vars):
- read_stage_id = insn_id_gen(
+ read_stage_id = stmt_id_gen(
"scan_%s_read_stage_%d" % (scan_iname, istage))
- read_stage_insn = make_assignment(
+ read_stage_stmt = make_assignment(
id=read_stage_id,
assignees=(read_var,),
expression=(
@@ -1613,26 +1613,26 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
+ (var(stage_exec_iname) - cur_size,)]),
within_inames=(
base_iname_deps | frozenset([stage_exec_iname])),
- within_inames_is_final=insn.within_inames_is_final,
+ within_inames_is_final=stmt.within_inames_is_final,
depends_on=frozenset([prev_id]),
- predicates=insn.predicates,
+ predicates=stmt.predicates,
)
if cur_size == 1:
- # Performance hack: don't add a barrier here with transfer_insn.
+ # Performance hack: don't add a barrier here with transfer_stmt.
# NOTE: This won't work if the way that local inames
# are lowered changes.
- read_stage_insn = read_stage_insn.copy(
+ read_stage_stmt = read_stage_stmt.copy(
no_sync_with=(
- read_stage_insn.no_sync_with
+ read_stage_stmt.no_sync_with
| frozenset([(transfer_id, "any")])))
- generated_insns.append(read_stage_insn)
+ generated_stmts.append(read_stage_stmt)
prev_id = read_stage_id
- write_stage_id = insn_id_gen(
+ write_stage_id = stmt_id_gen(
"scan_%s_write_stage_%d" % (scan_iname, istage))
- write_stage_insn = make_assignment(
+ write_stage_stmt = make_assignment(
id=write_stage_id,
assignees=tuple(
acc_var[outer_local_iname_vars + (var(stage_exec_iname),)]
@@ -1647,19 +1647,19 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
),
within_inames=(
base_iname_deps | frozenset([stage_exec_iname])),
- within_inames_is_final=insn.within_inames_is_final,
+ within_inames_is_final=stmt.within_inames_is_final,
depends_on=frozenset([prev_id]),
- predicates=insn.predicates,
+ predicates=stmt.predicates,
)
- generated_insns.append(write_stage_insn)
+ generated_stmts.append(write_stage_stmt)
prev_id = write_stage_id
cur_size *= 2
istage += 1
- new_insn_add_depends_on.add(prev_id)
- new_insn_add_within_inames.add(sweep_iname)
+ new_stmt_add_depends_on.add(prev_id)
+ new_stmt_add_within_inames.add(sweep_iname)
output_idx = var(sweep_iname) - sweep_min_value_expr
@@ -1676,7 +1676,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
def map_reduction(expr, rec, nresults=1):
# Only expand one level of reduction at a time, going from outermost to
- # innermost. Otherwise we get the (iname + insn) dependencies wrong.
+ # innermost. Otherwise we get the (iname + stmt) dependencies wrong.
from loopy.type_inference import (
infer_arg_and_reduction_dtypes_for_reduction_expression)
@@ -1684,8 +1684,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
infer_arg_and_reduction_dtypes_for_reduction_expression(
temp_kernel, expr, unknown_types_ok))
- outer_insn_inames = temp_kernel.insn_inames(insn)
- bad_inames = frozenset(expr.inames) & outer_insn_inames
+ outer_stmt_inames = temp_kernel.stmt_inames(stmt)
+ bad_inames = frozenset(expr.inames) & outer_stmt_inames
if bad_inames:
raise LoopyError("reduction used within loop(s) that it was "
"supposed to reduce over: " + ", ".join(bad_inames))
@@ -1711,7 +1711,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
# Try to determine scan candidate information (sweep iname, scan
# iname, etc).
scan_param = _try_infer_scan_candidate_from_expr(
- temp_kernel, expr, outer_insn_inames,
+ temp_kernel, expr, outer_stmt_inames,
sweep_iname=force_outer_iname_for_scan)
except ValueError as v:
@@ -1778,10 +1778,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
parallel = sweep_iname in sweep_class.local_parallel
bad_parallel = sweep_iname in sweep_class.nonlocal_parallel
- if sweep_iname not in outer_insn_inames:
+ if sweep_iname not in outer_stmt_inames:
_error_if_force_scan_on(LoopyError,
"Sweep iname '%s' was detected, but is not an iname "
- "for the instruction." % sweep_iname)
+ "for the statement." % sweep_iname)
elif bad_parallel:
_error_if_force_scan_on(LoopyError,
"Sweep iname '%s' has an unsupported parallel tag '%s' "
@@ -1828,48 +1828,48 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
from loopy.symbolic import ReductionCallbackMapper
cb_mapper = ReductionCallbackMapper(map_reduction)
- insn_queue = kernel.instructions[:]
- insn_id_replacements = {}
+ stmt_queue = kernel.statements[:]
+ stmt_id_replacements = {}
domains = kernel.domains[:]
temp_kernel = kernel
import loopy as lp
- while insn_queue:
- new_insn_add_depends_on = set()
- new_insn_add_no_sync_with = set()
- new_insn_add_within_inames = set()
+ while stmt_queue:
+ new_stmt_add_depends_on = set()
+ new_stmt_add_no_sync_with = set()
+ new_stmt_add_within_inames = set()
- generated_insns = []
+ generated_stmts = []
- insn = insn_queue.pop(0)
+ stmt = stmt_queue.pop(0)
- if insn_id_filter is not None and insn.id != insn_id_filter \
- or not isinstance(insn, lp.MultiAssignmentBase):
- new_insns.append(insn)
+ if stmt_id_filter is not None and stmt.id != stmt_id_filter \
+ or not isinstance(stmt, lp.MultiAssignmentBase):
+ new_stmts.append(stmt)
continue
- nresults = len(insn.assignees)
+ nresults = len(stmt.assignees)
# Run reduction expansion.
from loopy.symbolic import Reduction
- if isinstance(insn.expression, Reduction) and nresults > 1:
- new_expressions = cb_mapper(insn.expression, nresults=nresults)
+ if isinstance(stmt.expression, Reduction) and nresults > 1:
+ new_expressions = cb_mapper(stmt.expression, nresults=nresults)
else:
- new_expressions = (cb_mapper(insn.expression),)
+ new_expressions = (cb_mapper(stmt.expression),)
- if generated_insns:
+ if generated_stmts:
# An expansion happened, so insert the generated stuff plus
# ourselves back into the queue.
- kwargs = insn.get_copy_kwargs(
- depends_on=insn.depends_on
- | frozenset(new_insn_add_depends_on),
- no_sync_with=insn.no_sync_with
- | frozenset(new_insn_add_no_sync_with),
+ kwargs = stmt.get_copy_kwargs(
+ depends_on=stmt.depends_on
+ | frozenset(new_stmt_add_depends_on),
+ no_sync_with=stmt.no_sync_with
+ | frozenset(new_stmt_add_no_sync_with),
within_inames=(
- temp_kernel.insn_inames(insn)
- | new_insn_add_within_inames))
+ temp_kernel.stmt_inames(stmt)
+ | new_stmt_add_within_inames))
kwargs.pop("id")
kwargs.pop("expression")
@@ -1878,53 +1878,53 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
kwargs.pop("temp_var_type", None)
kwargs.pop("temp_var_types", None)
- if isinstance(insn.expression, Reduction) and nresults > 1:
- replacement_insns = [
+ if isinstance(stmt.expression, Reduction) and nresults > 1:
+ replacement_stmts = [
lp.Assignment(
- id=insn_id_gen(insn.id),
+ id=stmt_id_gen(stmt.id),
assignee=assignee,
expression=new_expr,
**kwargs)
for assignee, new_expr in zip(
- insn.assignees, new_expressions)]
+ stmt.assignees, new_expressions)]
else:
new_expr, = new_expressions
- replacement_insns = [
+ replacement_stmts = [
make_assignment(
- id=insn_id_gen(insn.id),
- assignees=insn.assignees,
+ id=stmt_id_gen(stmt.id),
+ assignees=stmt.assignees,
expression=new_expr,
**kwargs)
]
- insn_id_replacements[insn.id] = [
- rinsn.id for rinsn in replacement_insns]
+ stmt_id_replacements[stmt.id] = [
+ rstmt.id for rstmt in replacement_stmts]
- insn_queue = generated_insns + replacement_insns + insn_queue
+ stmt_queue = generated_stmts + replacement_stmts + stmt_queue
# The reduction expander needs an up-to-date kernel
# object to find dependencies. Keep temp_kernel up-to-date.
temp_kernel = kernel.copy(
- instructions=new_insns + insn_queue,
+ statements=new_stmts + stmt_queue,
temporary_variables=new_temporary_variables,
domains=domains)
- temp_kernel = lp.replace_instruction_ids(
- temp_kernel, insn_id_replacements)
+ temp_kernel = lp.replace_statement_ids(
+ temp_kernel, stmt_id_replacements)
else:
- # nothing happened, we're done with insn
- assert not new_insn_add_depends_on
+ # nothing happened, we're done with stmt
+ assert not new_stmt_add_depends_on
- new_insns.append(insn)
+ new_stmts.append(stmt)
kernel = kernel.copy(
- instructions=new_insns,
+ statements=new_stmts,
temporary_variables=new_temporary_variables,
domains=domains)
- kernel = lp.replace_instruction_ids(kernel, insn_id_replacements)
+ kernel = lp.replace_statement_ids(kernel, stmt_id_replacements)
kernel = lp.tag_inames(kernel, new_iname_tags)
@@ -1939,7 +1939,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
# }}}
-# {{{ find idempotence ("boostability") of instructions
+# {{{ find idempotence ("boostability") of statements
def find_idempotence(kernel):
logger.debug("%s: idempotence" % kernel.name)
@@ -1951,18 +1951,18 @@ def find_idempotence(kernel):
var_names = arg_names | set(six.iterkeys(kernel.temporary_variables))
reads_map = dict(
- (insn.id, insn.read_dependency_names() & var_names)
- for insn in kernel.instructions)
+ (stmt.id, stmt.read_dependency_names() & var_names)
+ for stmt in kernel.statements)
from collections import defaultdict
dep_graph = defaultdict(set)
- for insn in kernel.instructions:
- dep_graph[insn.id] = set(writer_id
- for var in reads_map[insn.id]
+ for stmt in kernel.statements:
+ dep_graph[stmt.id] = set(writer_id
+ for var in reads_map[stmt.id]
for writer_id in writer_map.get(var, set()))
- # Find SCCs of dep_graph. These are used for checking if the instruction is
+ # Find SCCs of dep_graph. These are used for checking if the statement is
# in a dependency cycle.
from loopy.tools import compute_sccs
@@ -1972,29 +1972,29 @@ def find_idempotence(kernel):
non_idempotently_updated_vars = set()
- new_insns = []
- for insn in kernel.instructions:
- boostable = len(sccs[insn.id]) == 1 and insn.id not in dep_graph[insn.id]
+ new_stmts = []
+ for stmt in kernel.statements:
+ boostable = len(sccs[stmt.id]) == 1 and stmt.id not in dep_graph[stmt.id]
if not boostable:
non_idempotently_updated_vars.update(
- insn.assignee_var_names())
+ stmt.assignee_var_names())
- new_insns.append(insn.copy(boostable=boostable))
+ new_stmts.append(stmt.copy(boostable=boostable))
# {{{ remove boostability from isns that access non-idempotently updated vars
- new2_insns = []
- for insn in new_insns:
- if insn.boostable and bool(
- non_idempotently_updated_vars & insn.dependency_names()):
- new2_insns.append(insn.copy(boostable=False))
+ new2_stmts = []
+ for stmt in new_stmts:
+ if stmt.boostable and bool(
+ non_idempotently_updated_vars & stmt.dependency_names()):
+ new2_stmts.append(stmt.copy(boostable=False))
else:
- new2_insns.append(insn)
+ new2_stmts.append(stmt)
# }}}
- return kernel.copy(instructions=new2_insns)
+ return kernel.copy(statements=new2_stmts)
# }}}
@@ -2002,47 +2002,47 @@ def find_idempotence(kernel):
# {{{ limit boostability
def limit_boostability(kernel):
- """Finds out which other inames an instruction's inames occur with
+ """Finds out which other inames an statement's inames occur with
and then limits boostability to just those inames.
"""
logger.debug("%s: limit boostability" % kernel.name)
iname_occurs_with = {}
- for insn in kernel.instructions:
- insn_inames = kernel.insn_inames(insn)
- for iname in insn_inames:
- iname_occurs_with.setdefault(iname, set()).update(insn_inames)
+ for stmt in kernel.statements:
+ stmt_inames = kernel.stmt_inames(stmt)
+ for iname in stmt_inames:
+ iname_occurs_with.setdefault(iname, set()).update(stmt_inames)
iname_use_counts = {}
- for insn in kernel.instructions:
- for iname in kernel.insn_inames(insn):
+ for stmt in kernel.statements:
+ for iname in kernel.stmt_inames(stmt):
iname_use_counts[iname] = iname_use_counts.get(iname, 0) + 1
single_use_inames = set(iname for iname, uc in six.iteritems(iname_use_counts)
if uc == 1)
- new_insns = []
- for insn in kernel.instructions:
- if insn.boostable is None:
- raise LoopyError("insn '%s' has undetermined boostability" % insn.id)
- elif insn.boostable:
+ new_stmts = []
+ for stmt in kernel.statements:
+ if stmt.boostable is None:
+ raise LoopyError("stmt '%s' has undetermined boostability" % stmt.id)
+ elif stmt.boostable:
boostable_into = set()
- for iname in kernel.insn_inames(insn):
+ for iname in kernel.stmt_inames(stmt):
boostable_into.update(iname_occurs_with[iname])
- boostable_into -= kernel.insn_inames(insn) | single_use_inames
+ boostable_into -= kernel.stmt_inames(stmt) | single_use_inames
# Even if boostable_into is empty, leave boostable flag on--it is used
# for boosting into unused hw axes.
- insn = insn.copy(boostable_into=boostable_into)
+ stmt = stmt.copy(boostable_into=boostable_into)
else:
- insn = insn.copy(boostable_into=set())
+ stmt = stmt.copy(boostable_into=set())
- new_insns.append(insn)
+ new_stmts.append(stmt)
- return kernel.copy(instructions=new_insns)
+ return kernel.copy(statements=new_stmts)
# }}}
diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index abf4d799fbdb14f86fa29dde26e6654130fc66de..e585a8a39ce188e7ea41c1a301efb5d82b4b33e8 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -66,8 +66,8 @@ class LeaveLoop(EndBlockItem):
hash_fields = __slots__ = ["iname"]
-class RunInstruction(ScheduleItem):
- hash_fields = __slots__ = ["insn_id"]
+class RunStatement(ScheduleItem):
+ hash_fields = __slots__ = ["stmt_id"]
class CallKernel(BeginBlockItem):
@@ -88,11 +88,11 @@ class Barrier(ScheduleItem):
``"local"`` or ``"global"``
- .. attribute:: originating_insn_id
+ .. attribute:: originating_stmt_id
"""
hash_fields = ["comment", "kind"]
- __slots__ = hash_fields + ["originating_insn_id"]
+ __slots__ = hash_fields + ["originating_stmt_id"]
# }}}
@@ -143,12 +143,12 @@ def generate_sub_sched_items(schedule, start_idx):
assert False
-def get_insn_ids_for_block_at(schedule, start_idx):
+def get_stmt_ids_for_block_at(schedule, start_idx):
return frozenset(
- sub_sched_item.insn_id
+ sub_sched_item.stmt_id
for i, sub_sched_item in generate_sub_sched_items(
schedule, start_idx)
- if isinstance(sub_sched_item, RunInstruction))
+ if isinstance(sub_sched_item, RunStatement))
def find_active_inames_at(kernel, sched_index):
@@ -185,17 +185,17 @@ def find_used_inames_within(kernel, sched_index):
if isinstance(sched_item, BeginBlockItem):
loop_contents, _ = gather_schedule_block(
kernel.schedule, sched_index)
- run_insns = [subsched_item
+ run_stmts = [subsched_item
for subsched_item in loop_contents
- if isinstance(subsched_item, RunInstruction)]
- elif isinstance(sched_item, RunInstruction):
- run_insns = [sched_item]
+ if isinstance(subsched_item, RunStatement)]
+ elif isinstance(sched_item, RunStatement):
+ run_stmts = [sched_item]
else:
return set()
result = set()
- for sched_item in run_insns:
- result.update(kernel.insn_inames(sched_item.insn_id))
+ for sched_item in run_stmts:
+ result.update(kernel.stmt_inames(sched_item.stmt_id))
return result
@@ -214,13 +214,13 @@ def find_loop_nest_with_map(kernel):
if not isinstance(kernel.iname_to_tag.get(iname),
(ConcurrentTag, IlpBaseTag, VectorizeTag))])
- iname_to_insns = kernel.iname_to_insns()
+ iname_to_stmts = kernel.iname_to_stmts()
for iname in all_nonpar_inames:
result[iname] = set([
other_iname
- for insn in iname_to_insns[iname]
- for other_iname in kernel.insn_inames(insn) & all_nonpar_inames
+ for stmt in iname_to_stmts[iname]
+ for other_iname in kernel.stmt_inames(stmt) & all_nonpar_inames
])
return result
@@ -234,7 +234,7 @@ def find_loop_nest_around_map(kernel):
all_inames = kernel.all_inames()
- iname_to_insns = kernel.iname_to_insns()
+ iname_to_stmts = kernel.iname_to_stmts()
# examine pairs of all inames--O(n**2), I know.
from loopy.kernel.data import IlpBaseTag
@@ -253,7 +253,7 @@ def find_loop_nest_around_map(kernel):
# slack here.
continue
- if iname_to_insns[inner_iname] < iname_to_insns[outer_iname]:
+ if iname_to_stmts[inner_iname] < iname_to_stmts[outer_iname]:
result[inner_iname].add(outer_iname)
for dom_idx, dom in enumerate(kernel.domains):
@@ -267,54 +267,54 @@ def find_loop_nest_around_map(kernel):
return result
-def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
- """Returns a dictionary mapping inames to other instruction ids that need to
+def find_loop_stmt_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
+ """Returns a dictionary mapping inames to other statement ids that need to
be scheduled before the iname should be eligible for scheduling.
"""
result = {}
from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag
- for insn in kernel.instructions:
- for iname in kernel.insn_inames(insn):
+ for stmt in kernel.statements:
+ for iname in kernel.stmt_inames(stmt):
if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag):
continue
iname_dep = result.setdefault(iname, set())
- for dep_insn_id in insn.depends_on:
- if dep_insn_id in iname_dep:
+ for dep_stmt_id in stmt.depends_on:
+ if dep_stmt_id in iname_dep:
# already depending, nothing to check
continue
- dep_insn = kernel.id_to_insn[dep_insn_id]
- dep_insn_inames = kernel.insn_inames(dep_insn)
+ dep_stmt = kernel.id_to_stmt[dep_stmt_id]
+ dep_stmt_inames = kernel.stmt_inames(dep_stmt)
- if iname in dep_insn_inames:
+ if iname in dep_stmt_inames:
# Nothing to be learned, dependency is in loop over iname
# already.
continue
- # To make sure dep_insn belongs outside of iname, we must prove
- # that all inames that dep_insn will be executed in nest
+ # To make sure dep_stmt belongs outside of iname, we must prove
+ # that all inames that dep_stmt will be executed in nest
# outside of the loop over *iname*. (i.e. nested around, or
# before).
may_add_to_loop_dep_map = True
- for dep_insn_iname in dep_insn_inames:
- if dep_insn_iname in loop_nest_around_map[iname]:
- # dep_insn_iname is guaranteed to nest outside of iname
+ for dep_stmt_iname in dep_stmt_inames:
+ if dep_stmt_iname in loop_nest_around_map[iname]:
+ # dep_stmt_iname is guaranteed to nest outside of iname
# -> safe.
continue
- tag = kernel.iname_to_tag.get(dep_insn_iname)
+ tag = kernel.iname_to_tag.get(dep_stmt_iname)
if isinstance(tag, (ConcurrentTag, IlpBaseTag, VectorizeTag)):
# Parallel tags don't really nest, so we'll disregard
# them here.
continue
- if dep_insn_iname not in loop_nest_with_map.get(iname, []):
- # dep_insn_iname does not nest with iname, so its nest
+ if dep_stmt_iname not in loop_nest_with_map.get(iname, []):
+ # dep_stmt_iname does not nest with iname, so its nest
# must occur outside.
continue
@@ -325,38 +325,38 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
continue
logger.debug("{knl}: loop dependency map: iname '{iname}' "
- "depends on '{dep_insn}' via '{insn}'"
+ "depends on '{dep_stmt}' via '{stmt}'"
.format(
knl=kernel.name,
iname=iname,
- dep_insn=dep_insn_id,
- insn=insn.id))
+ dep_stmt=dep_stmt_id,
+ stmt=stmt.id))
- iname_dep.add(dep_insn_id)
+ iname_dep.add(dep_stmt_id)
return result
-def group_insn_counts(kernel):
+def group_stmt_counts(kernel):
result = {}
- for insn in kernel.instructions:
- for grp in insn.groups:
+ for stmt in kernel.statements:
+ for grp in stmt.groups:
result[grp] = result.get(grp, 0) + 1
return result
-def gen_dependencies_except(kernel, insn_id, except_insn_ids):
- insn = kernel.id_to_insn[insn_id]
- for dep_id in insn.depends_on:
+def gen_dependencies_except(kernel, stmt_id, except_stmt_ids):
+ stmt = kernel.id_to_stmt[stmt_id]
+ for dep_id in stmt.depends_on:
- if dep_id in except_insn_ids:
+ if dep_id in except_stmt_ids:
continue
yield dep_id
- for sub_dep_id in gen_dependencies_except(kernel, dep_id, except_insn_ids):
+ for sub_dep_id in gen_dependencies_except(kernel, dep_id, except_stmt_ids):
yield sub_dep_id
@@ -403,50 +403,50 @@ def get_priority_tiers(wanted, priorities):
yield tier
-def sched_item_to_insn_id(sched_item):
+def sched_item_to_stmt_id(sched_item):
# Helper for use in generator expressions, i.e.
- # (... for insn_id in sched_item_to_insn_id(item) ...)
- if isinstance(sched_item, RunInstruction):
- yield sched_item.insn_id
+ # (... for stmt_id in sched_item_to_stmt_id(item) ...)
+ if isinstance(sched_item, RunStatement):
+ yield sched_item.stmt_id
elif isinstance(sched_item, Barrier):
- if (hasattr(sched_item, "originating_insn_id")
- and sched_item.originating_insn_id is not None):
- yield sched_item.originating_insn_id
+ if (hasattr(sched_item, "originating_stmt_id")
+ and sched_item.originating_stmt_id is not None):
+ yield sched_item.originating_stmt_id
# }}}
# {{{ debug help
-def format_insn_id(kernel, insn_id):
+def format_stmt_id(kernel, stmt_id):
Fore = kernel.options._fore # noqa
Style = kernel.options._style # noqa
- return Fore.GREEN + insn_id + Style.RESET_ALL
+ return Fore.GREEN + stmt_id + Style.RESET_ALL
-def format_insn(kernel, insn_id):
- insn = kernel.id_to_insn[insn_id]
+def format_stmt(kernel, stmt_id):
+ stmt = kernel.id_to_stmt[stmt_id]
Fore = kernel.options._fore # noqa
Style = kernel.options._style # noqa
- from loopy.kernel.instruction import (
- MultiAssignmentBase, NoOpInstruction, BarrierInstruction)
- if isinstance(insn, MultiAssignmentBase):
+ from loopy.kernel.statement import (
+ MultiAssignmentBase, NoOpStatement, BarrierStatement)
+ if isinstance(stmt, MultiAssignmentBase):
return "%s%s%s = %s%s%s {id=%s}" % (
- Fore.CYAN, ", ".join(str(a) for a in insn.assignees), Style.RESET_ALL,
- Fore.MAGENTA, str(insn.expression), Style.RESET_ALL,
- format_insn_id(kernel, insn_id))
- elif isinstance(insn, BarrierInstruction):
+ Fore.CYAN, ", ".join(str(a) for a in stmt.assignees), Style.RESET_ALL,
+ Fore.MAGENTA, str(stmt.expression), Style.RESET_ALL,
+ format_stmt_id(kernel, stmt_id))
+ elif isinstance(stmt, BarrierStatement):
return "[%s] %s... %sbarrier%s" % (
- format_insn_id(kernel, insn_id),
- Fore.MAGENTA, insn.kind[0], Style.RESET_ALL)
- elif isinstance(insn, NoOpInstruction):
+ format_stmt_id(kernel, stmt_id),
+ Fore.MAGENTA, stmt.kind[0], Style.RESET_ALL)
+ elif isinstance(stmt, NoOpStatement):
return "[%s] %s... nop%s" % (
- format_insn_id(kernel, insn_id),
+ format_stmt_id(kernel, stmt_id),
Fore.MAGENTA, Style.RESET_ALL)
else:
return "[%s] %s%s%s" % (
- format_insn_id(kernel, insn_id),
- Fore.CYAN, str(insn), Style.RESET_ALL)
+ format_stmt_id(kernel, stmt_id),
+ Fore.CYAN, str(stmt), Style.RESET_ALL)
def dump_schedule(kernel, schedule):
@@ -471,13 +471,13 @@ def dump_schedule(kernel, schedule):
elif isinstance(sched_item, ReturnFromKernel):
indent = indent[:-4]
lines.append(indent + "RETURN FROM KERNEL %s" % sched_item.kernel_name)
- elif isinstance(sched_item, RunInstruction):
- insn = kernel.id_to_insn[sched_item.insn_id]
- if isinstance(insn, MultiAssignmentBase):
- insn_str = format_insn(kernel, sched_item.insn_id)
+ elif isinstance(sched_item, RunStatement):
+ stmt = kernel.id_to_stmt[sched_item.stmt_id]
+ if isinstance(stmt, MultiAssignmentBase):
+ stmt_str = format_stmt(kernel, sched_item.stmt_id)
else:
- insn_str = sched_item.insn_id
- lines.append(indent + insn_str)
+ stmt_str = sched_item.stmt_id
+ lines.append(indent + stmt_str)
elif isinstance(sched_item, Barrier):
lines.append(indent + "... %sbarrier" % sched_item.kind[0])
else:
@@ -592,9 +592,9 @@ class SchedulerState(ImmutableRecord):
.. attribute:: schedule
- .. attribute:: scheduled_insn_ids
+ .. attribute:: scheduled_stmt_ids
- .. attribute:: unscheduled_insn_ids
+ .. attribute:: unscheduled_stmt_ids
.. attribute:: preschedule
@@ -602,9 +602,9 @@ class SchedulerState(ImmutableRecord):
schedule, maintaining the same relative ordering. Newly scheduled
items may interleave this sequence.
- .. attribute:: prescheduled_insn_ids
+ .. attribute:: prescheduled_stmt_ids
- A :class:`frozenset` of any instruction that started prescheduled
+ A :class:`frozenset` of any statement that started prescheduled
.. attribute:: prescheduled_inames
@@ -618,14 +618,14 @@ class SchedulerState(ImmutableRecord):
Whether the scheduler is inside a subkernel
- .. attribute:: group_insn_counts
+ .. attribute:: group_stmt_counts
- A mapping from instruction group names to the number of instructions
+ A mapping from statement group names to the number of statements
contained in them.
.. attribute:: active_group_counts
- A mapping from instruction group names to the number of instructions
+ A mapping from statement group names to the number of statements
in them that are left to schedule. If a group name occurs in this
mapping, that group is considered active.
@@ -645,8 +645,8 @@ class SchedulerState(ImmutableRecord):
def generate_loop_schedules_internal(
sched_state, allow_boost=False, debug=None):
- # allow_insn is set to False initially and after entering each loop
- # to give loops containing high-priority instructions a chance.
+ # allow_stmt is set to False initially and after entering each loop
+ # to give loops containing high-priority statements a chance.
kernel = sched_state.kernel
Fore = kernel.options._fore # noqa
@@ -734,13 +734,13 @@ def generate_loop_schedules_internal(
# {{{ see if there are pending barriers in the preschedule
- # Barriers that do not have an originating instruction are handled here.
+ # Barriers that do not have an originating statement are handled here.
# (These are automatically inserted by insert_barriers().) Barriers with
- # originating instructions are handled as part of normal instruction
+ # originating statements are handled as part of normal statement
# scheduling below.
if (
isinstance(next_preschedule_item, Barrier)
- and next_preschedule_item.originating_insn_id is None):
+ and next_preschedule_item.originating_stmt_id is None):
for result in generate_loop_schedules_internal(
sched_state.copy(
schedule=sched_state.schedule + (next_preschedule_item,),
@@ -751,134 +751,134 @@ def generate_loop_schedules_internal(
# }}}
- # {{{ see if any insns are ready to be scheduled now
+ # {{{ see if any stmts are ready to be scheduled now
- # Also take note of insns that have a chance of being schedulable inside
+ # Also take note of stmts that have a chance of being schedulable inside
# the current loop nest, in this set:
- reachable_insn_ids = set()
+ reachable_stmt_ids = set()
active_groups = frozenset(sched_state.active_group_counts)
- def insn_sort_key(insn_id):
- insn = kernel.id_to_insn[insn_id]
+ def stmt_sort_key(stmt_id):
+ stmt = kernel.id_to_stmt[stmt_id]
- # Sort by insn.id as a last criterion to achieve deterministic
+ # Sort by stmt.id as a last criterion to achieve deterministic
# schedule generation order.
- return (insn.priority, len(active_groups & insn.groups), insn.id)
+ return (stmt.priority, len(active_groups & stmt.groups), stmt.id)
- insn_ids_to_try = sorted(
- # Non-prescheduled instructions go first.
- sched_state.unscheduled_insn_ids - sched_state.prescheduled_insn_ids,
- key=insn_sort_key, reverse=True)
+ stmt_ids_to_try = sorted(
+ # Non-prescheduled statements go first.
+ sched_state.unscheduled_stmt_ids - sched_state.prescheduled_stmt_ids,
+ key=stmt_sort_key, reverse=True)
- insn_ids_to_try.extend(
- insn_id
+ stmt_ids_to_try.extend(
+ stmt_id
for item in sched_state.preschedule
- for insn_id in sched_item_to_insn_id(item))
+ for stmt_id in sched_item_to_stmt_id(item))
- for insn_id in insn_ids_to_try:
- insn = kernel.id_to_insn[insn_id]
+ for stmt_id in stmt_ids_to_try:
+ stmt = kernel.id_to_stmt[stmt_id]
- is_ready = insn.depends_on <= sched_state.scheduled_insn_ids
+ is_ready = stmt.depends_on <= sched_state.scheduled_stmt_ids
if not is_ready:
if debug_mode:
- print("instruction '%s' is missing insn depedencies '%s'" % (
- format_insn(kernel, insn.id), ",".join(
- insn.depends_on - sched_state.scheduled_insn_ids)))
+ print("statement '%s' is missing stmt depedencies '%s'" % (
+ format_stmt(kernel, stmt.id), ",".join(
+ stmt.depends_on - sched_state.scheduled_stmt_ids)))
continue
- want = kernel.insn_inames(insn) - sched_state.parallel_inames
+ want = kernel.stmt_inames(stmt) - sched_state.parallel_inames
have = active_inames_set - sched_state.parallel_inames
- # If insn is boostable, it may be placed inside a more deeply
+ # If stmt is boostable, it may be placed inside a more deeply
# nested loop without harm.
orig_have = have
if allow_boost:
- # Note that the inames in 'insn.boostable_into' necessarily won't
+ # Note that the inames in 'stmt.boostable_into' necessarily won't
# be contained in 'want'.
- have = have - insn.boostable_into
+ have = have - stmt.boostable_into
if want != have:
is_ready = False
if debug_mode:
if want-have:
- print("instruction '%s' is missing inames '%s'"
- % (format_insn(kernel, insn.id), ",".join(want-have)))
+ print("statement '%s' is missing inames '%s'"
+ % (format_stmt(kernel, stmt.id), ",".join(want-have)))
if have-want:
- print("instruction '%s' won't work under inames '%s'"
- % (format_insn(kernel, insn.id), ",".join(have-want)))
+ print("statement '%s' won't work under inames '%s'"
+ % (format_stmt(kernel, stmt.id), ",".join(have-want)))
- # {{{ check if scheduling this insn is compatible with preschedule
+ # {{{ check if scheduling this stmt is compatible with preschedule
- if insn_id in sched_state.prescheduled_insn_ids:
- if isinstance(next_preschedule_item, RunInstruction):
- next_preschedule_insn_id = next_preschedule_item.insn_id
+ if stmt_id in sched_state.prescheduled_stmt_ids:
+ if isinstance(next_preschedule_item, RunStatement):
+ next_preschedule_stmt_id = next_preschedule_item.stmt_id
elif isinstance(next_preschedule_item, Barrier):
- assert next_preschedule_item.originating_insn_id is not None
- next_preschedule_insn_id = next_preschedule_item.originating_insn_id
+ assert next_preschedule_item.originating_stmt_id is not None
+ next_preschedule_stmt_id = next_preschedule_item.originating_stmt_id
else:
- next_preschedule_insn_id = None
+ next_preschedule_stmt_id = None
- if next_preschedule_insn_id != insn_id:
+ if next_preschedule_stmt_id != stmt_id:
if debug_mode:
print("can't schedule '%s' because another preschedule "
- "instruction precedes it" % format_insn(kernel, insn.id))
+ "statement precedes it" % format_stmt(kernel, stmt.id))
is_ready = False
# }}}
- # {{{ check if scheduler state allows insn scheduling
+ # {{{ check if scheduler state allows stmt scheduling
- from loopy.kernel.instruction import BarrierInstruction
- if isinstance(insn, BarrierInstruction) and insn.kind == "global":
+ from loopy.kernel.statement import BarrierStatement
+ if isinstance(stmt, BarrierStatement) and stmt.kind == "global":
if not sched_state.may_schedule_global_barriers:
if debug_mode:
print("can't schedule '%s' because global barriers are "
- "not currently allowed" % format_insn(kernel, insn.id))
+ "not currently allowed" % format_stmt(kernel, stmt.id))
is_ready = False
else:
if not sched_state.within_subkernel:
if debug_mode:
print("can't schedule '%s' because not within subkernel"
- % format_insn(kernel, insn.id))
+ % format_stmt(kernel, stmt.id))
is_ready = False
# }}}
# {{{ determine group-based readiness
- if insn.conflicts_with_groups & active_groups:
+ if stmt.conflicts_with_groups & active_groups:
is_ready = False
if debug_mode:
- print("instruction '%s' conflicts with active group(s) '%s'"
- % (insn.id, ",".join(
- active_groups & insn.conflicts_with_groups)))
+ print("statement '%s' conflicts with active group(s) '%s'"
+ % (stmt.id, ",".join(
+ active_groups & stmt.conflicts_with_groups)))
# }}}
# {{{ determine reachability
if (not is_ready and have <= want):
- reachable_insn_ids.add(insn_id)
+ reachable_stmt_ids.add(stmt_id)
# }}}
if is_ready and debug_mode:
- print("ready to schedule '%s'" % format_insn(kernel, insn.id))
+ print("ready to schedule '%s'" % format_stmt(kernel, stmt.id))
if is_ready and not debug_mode:
- iid_set = frozenset([insn.id])
+ iid_set = frozenset([stmt.id])
- # {{{ update active group counts for added instruction
+ # {{{ update active group counts for added statement
- if insn.groups:
+ if stmt.groups:
new_active_group_counts = sched_state.active_group_counts.copy()
- for grp in insn.groups:
+ for grp in stmt.groups:
if grp in new_active_group_counts:
new_active_group_counts[grp] -= 1
if new_active_group_counts[grp] == 0:
@@ -886,7 +886,7 @@ def generate_loop_schedules_internal(
else:
new_active_group_counts[grp] = (
- sched_state.group_insn_counts[grp] - 1)
+ sched_state.group_stmt_counts[grp] - 1)
else:
new_active_group_counts = sched_state.active_group_counts
@@ -895,18 +895,18 @@ def generate_loop_schedules_internal(
new_uses_of_boostability = []
if allow_boost:
- if orig_have & insn.boostable_into:
+ if orig_have & stmt.boostable_into:
new_uses_of_boostability.append(
- (insn.id, orig_have & insn.boostable_into))
+ (stmt.id, orig_have & stmt.boostable_into))
new_sched_state = sched_state.copy(
- scheduled_insn_ids=sched_state.scheduled_insn_ids | iid_set,
- unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set,
+ scheduled_stmt_ids=sched_state.scheduled_stmt_ids | iid_set,
+ unscheduled_stmt_ids=sched_state.unscheduled_stmt_ids - iid_set,
schedule=(
- sched_state.schedule + (RunInstruction(insn_id=insn.id),)),
+ sched_state.schedule + (RunStatement(stmt_id=stmt.id),)),
preschedule=(
sched_state.preschedule
- if insn_id not in sched_state.prescheduled_insn_ids
+ if stmt_id not in sched_state.prescheduled_stmt_ids
else sched_state.preschedule[1:]),
active_group_counts=new_active_group_counts,
uses_of_boostability=(
@@ -923,9 +923,9 @@ def generate_loop_schedules_internal(
allow_boost=rec_allow_boost, debug=debug):
yield sub_sched
- if not sched_state.group_insn_counts:
+ if not sched_state.group_stmt_counts:
# No groups: We won't need to backtrack on scheduling
- # instructions.
+ # statements.
return
# }}}
@@ -949,21 +949,21 @@ def generate_loop_schedules_internal(
can_leave = False
elif last_entered_loop not in sched_state.breakable_inames:
# If the iname is not breakable, then check that we've
- # scheduled all the instructions that require it.
+ # scheduled all the statements that require it.
- for insn_id in sched_state.unscheduled_insn_ids:
- insn = kernel.id_to_insn[insn_id]
- if last_entered_loop in kernel.insn_inames(insn):
+ for stmt_id in sched_state.unscheduled_stmt_ids:
+ stmt = kernel.id_to_stmt[stmt_id]
+ if last_entered_loop in kernel.stmt_inames(stmt):
if debug_mode:
print("cannot leave '%s' because '%s' still depends on it"
- % (last_entered_loop, format_insn(kernel, insn.id)))
+ % (last_entered_loop, format_stmt(kernel, stmt.id)))
- # check if there's a dependency of insn that needs to be
+ # check if there's a dependency of stmt that needs to be
# outside of last_entered_loop.
- for subdep_id in gen_dependencies_except(kernel, insn_id,
- sched_state.scheduled_insn_ids):
- subdep = kernel.id_to_insn[insn_id]
- want = (kernel.insn_inames(subdep_id)
+ for subdep_id in gen_dependencies_except(kernel, stmt_id,
+ sched_state.scheduled_stmt_ids):
+ subdep = kernel.id_to_stmt[stmt_id]
+ want = (kernel.stmt_inames(subdep_id)
- sched_state.parallel_inames)
if (
last_entered_loop not in want and
@@ -982,10 +982,10 @@ def generate_loop_schedules_internal(
"warn": Fore.RED + Style.BRIGHT,
"reset_all": Style.RESET_ALL,
"iname": last_entered_loop,
- "subdep": format_insn_id(kernel, subdep_id),
- "dep": format_insn_id(kernel, insn_id),
- "subdep_i": format_insn(kernel, subdep_id),
- "dep_i": format_insn(kernel, insn_id),
+ "subdep": format_stmt_id(kernel, subdep_id),
+ "dep": format_stmt_id(kernel, stmt_id),
+ "subdep_i": format_stmt(kernel, subdep_id),
+ "dep_i": format_stmt(kernel, stmt_id),
})
can_leave = False
@@ -994,14 +994,14 @@ def generate_loop_schedules_internal(
if can_leave:
can_leave = False
- # We may only leave this loop if we've scheduled an instruction
+ # We may only leave this loop if we've scheduled an statement
# since entering it.
- seen_an_insn = False
+ seen_an_stmt = False
ignore_count = 0
for sched_item in sched_state.schedule[::-1]:
- if isinstance(sched_item, RunInstruction):
- seen_an_insn = True
+ if isinstance(sched_item, RunStatement):
+ seen_an_stmt = True
elif isinstance(sched_item, LeaveLoop):
ignore_count += 1
elif isinstance(sched_item, EnterLoop):
@@ -1009,7 +1009,7 @@ def generate_loop_schedules_internal(
ignore_count -= 1
else:
assert sched_item.iname == last_entered_loop
- if seen_an_insn:
+ if seen_an_stmt:
can_leave = True
break
@@ -1036,10 +1036,10 @@ def generate_loop_schedules_internal(
# {{{ see if any loop can be entered now
- # Find inames that are being referenced by as yet unscheduled instructions.
+ # Find inames that are being referenced by as yet unscheduled statements.
needed_inames = set()
- for insn_id in sched_state.unscheduled_insn_ids:
- needed_inames.update(kernel.insn_inames(insn_id))
+ for stmt_id in sched_state.unscheduled_stmt_ids:
+ needed_inames.update(kernel.stmt_inames(stmt_id))
needed_inames = (needed_inames
# There's no notion of 'entering' a parallel loop
@@ -1053,8 +1053,8 @@ def generate_loop_schedules_internal(
print("inames still needed :", ",".join(needed_inames))
print("active inames :", ",".join(sched_state.active_inames))
print("inames entered so far :", ",".join(sched_state.entered_inames))
- print("reachable insns:", ",".join(reachable_insn_ids))
- print("active groups (with insn counts):", ",".join(
+ print("reachable stmts:", ",".join(reachable_stmt_ids))
+ print("active groups (with stmt counts):", ",".join(
"%s: %d" % (grp, c)
for grp, c in six.iteritems(sched_state.active_group_counts)))
print(75*"-")
@@ -1086,18 +1086,18 @@ def generate_loop_schedules_internal(
continue
if (
- not sched_state.loop_insn_dep_map.get(iname, set())
- <= sched_state.scheduled_insn_ids):
+ not sched_state.loop_stmt_dep_map.get(iname, set())
+ <= sched_state.scheduled_stmt_ids):
if debug_mode:
print(
"scheduling {iname} prohibited by loop dependency map "
- "(needs '{needed_insns})'"
+ "(needs '{needed_stmts})'"
.format(
iname=iname,
- needed_insns=", ".join(
- sched_state.loop_insn_dep_map.get(iname, set())
+ needed_stmts=", ".join(
+ sched_state.loop_stmt_dep_map.get(iname, set())
-
- sched_state.scheduled_insn_ids)))
+ sched_state.scheduled_stmt_ids)))
continue
@@ -1120,8 +1120,8 @@ def generate_loop_schedules_internal(
iname_home_domain_params
&
set(kernel.temporary_variables)):
- writer_insn, = kernel.writer_map()[domain_par]
- if writer_insn not in sched_state.scheduled_insn_ids:
+ writer_stmt, = kernel.writer_map()[domain_par]
+ if writer_stmt not in sched_state.scheduled_stmt_ids:
data_dep_written = False
if debug_mode:
print("iname '%s' not scheduled because domain "
@@ -1134,21 +1134,21 @@ def generate_loop_schedules_internal(
# }}}
- # {{{ determine if that gets us closer to being able to schedule an insn
+ # {{{ determine if that gets us closer to being able to schedule an stmt
- usefulness = None # highest insn priority enabled by iname
+ usefulness = None # highest stmt priority enabled by iname
hypothetically_active_loops = active_inames_set | set([iname])
- for insn_id in reachable_insn_ids:
- insn = kernel.id_to_insn[insn_id]
+ for stmt_id in reachable_stmt_ids:
+ stmt = kernel.id_to_stmt[stmt_id]
- want = kernel.insn_inames(insn) | insn.boostable_into
+ want = kernel.stmt_inames(stmt) | stmt.boostable_into
if hypothetically_active_loops <= want:
if usefulness is None:
- usefulness = insn.priority
+ usefulness = stmt.priority
else:
- usefulness = max(usefulness, insn.priority)
+ usefulness = max(usefulness, stmt.priority)
if usefulness is None:
if debug_mode:
@@ -1262,18 +1262,18 @@ def generate_loop_schedules_internal(
if (
not sched_state.active_inames
- and not sched_state.unscheduled_insn_ids
+ and not sched_state.unscheduled_stmt_ids
and not sched_state.preschedule):
# if done, yield result
debug.log_success(sched_state.schedule)
- for boost_insn_id, boost_inames in sched_state.uses_of_boostability:
+ for boost_stmt_id, boost_inames in sched_state.uses_of_boostability:
warn_with_kernel(
kernel, "used_boostability",
- "instruction '%s' was implicitly nested inside "
+ "statement '%s' was implicitly nested inside "
"inames '%s' based on an idempotence heuristic. "
"This is deprecated and will stop working in loopy 2017.x."
- % (boost_insn_id, ", ".join(boost_inames)),
+ % (boost_stmt_id, ", ".join(boost_inames)),
DeprecationWarning)
yield sched_state.schedule
@@ -1296,31 +1296,31 @@ def generate_loop_schedules_internal(
# {{{ filter nops from schedule
def filter_nops_from_schedule(kernel, schedule):
- from loopy.kernel.instruction import NoOpInstruction
+ from loopy.kernel.statement import NoOpStatement
return [
sched_item
for sched_item in schedule
- if (not isinstance(sched_item, RunInstruction)
- or not isinstance(kernel.id_to_insn[sched_item.insn_id],
- NoOpInstruction))]
+ if (not isinstance(sched_item, RunStatement)
+ or not isinstance(kernel.id_to_stmt[sched_item.stmt_id],
+ NoOpStatement))]
# }}}
-# {{{ convert barrier instructions to proper barriers
+# {{{ convert barrier statements to proper barriers
-def convert_barrier_instructions_to_barriers(kernel, schedule):
- from loopy.kernel.instruction import BarrierInstruction
+def convert_barrier_statements_to_barriers(kernel, schedule):
+ from loopy.kernel.statement import BarrierStatement
result = []
for sched_item in schedule:
- if isinstance(sched_item, RunInstruction):
- insn = kernel.id_to_insn[sched_item.insn_id]
- if isinstance(insn, BarrierInstruction):
+ if isinstance(sched_item, RunStatement):
+ stmt = kernel.id_to_stmt[sched_item.stmt_id]
+ if isinstance(stmt, BarrierStatement):
result.append(Barrier(
- kind=insn.kind,
- originating_insn_id=insn.id,
- comment="Barrier inserted due to %s" % insn.id))
+ kind=stmt.kind,
+ originating_stmt_id=stmt.id,
+ comment="Barrier inserted due to %s" % stmt.id))
continue
result.append(sched_item)
@@ -1336,16 +1336,16 @@ class DependencyRecord(ImmutableRecord):
"""
.. attribute:: source
- A :class:`loopy.InstructionBase` instance.
+ A :class:`loopy.StatementBase` instance.
.. attribute:: target
- A :class:`loopy.InstructionBase` instance.
+ A :class:`loopy.StatementBase` instance.
.. attribute:: dep_descr
A string containing a phrase describing the dependency. The variables
- '{src}' and '{tgt}' will be replaced by their respective instruction IDs.
+ '{src}' and '{tgt}' will be replaced by their respective statement IDs.
.. attribute:: variable
@@ -1435,10 +1435,10 @@ class DependencyTracker(object):
def add_source(self, source):
"""
- Specify that an instruction may be used as the source of a dependency edge.
+ Specify that an statement may be used as the source of a dependency edge.
"""
- # If source is an insn ID, look up the actual instruction.
- source = self.kernel.id_to_insn.get(source, source)
+ # If source is an stmt ID, look up the actual statement.
+ source = self.kernel.id_to_stmt.get(source, source)
for written in self.map_to_base_storage(
set(source.assignee_var_names()) & self.relevant_vars):
@@ -1451,13 +1451,13 @@ class DependencyTracker(object):
def gen_dependencies_with_target_at(self, target):
"""
Generate :class:`DependencyRecord` instances for dependencies edges
- whose target is the given instruction.
+ whose target is the given statement.
- :arg target: The ID of the instruction for which dependencies
+ :arg target: The ID of the statement for which dependencies
with conflicting var access should be found.
"""
- # If target is an insn ID, look up the actual instruction.
- target = self.kernel.id_to_insn.get(target, target)
+ # If target is an stmt ID, look up the actual statement.
+ target = self.kernel.id_to_stmt.get(target, target)
tgt_write = self.map_to_base_storage(
set(target.assignee_var_names()) & self.relevant_vars)
@@ -1493,8 +1493,8 @@ class DependencyTracker(object):
continue
yield DependencyRecord(
- source=self.kernel.id_to_insn[source],
- target=self.kernel.id_to_insn[target],
+ source=self.kernel.id_to_stmt[source],
+ target=self.kernel.id_to_stmt[target],
dep_descr=dep_descr,
variable=var,
var_kind=self.var_kind)
@@ -1502,13 +1502,13 @@ class DependencyTracker(object):
def describe_dependency(self, source, target):
dep_descr = None
- source = self.kernel.id_to_insn[source]
- target = self.kernel.id_to_insn[target]
+ source = self.kernel.id_to_stmt[source]
+ target = self.kernel.id_to_stmt[target]
if self.reverse:
source, target = target, source
- target_deps = self.kernel.recursive_insn_dep_map()[target.id]
+ target_deps = self.kernel.recursive_stmt_dep_map()[target.id]
if source.id in target_deps:
if self.reverse:
dep_descr = "{tgt} rev-depends on {src}"
@@ -1529,15 +1529,15 @@ def barrier_kind_more_or_equally_global(kind1, kind2):
return (kind1 == kind2) or (kind1 == "global" and kind2 == "local")
-def insn_ids_reaching_end_without_intervening_barrier(schedule, kind):
- return _insn_ids_reaching_end(schedule, kind, reverse=False)
+def stmt_ids_reaching_end_without_intervening_barrier(schedule, kind):
+ return _stmt_ids_reaching_end(schedule, kind, reverse=False)
-def insn_ids_reachable_from_start_without_intervening_barrier(schedule, kind):
- return _insn_ids_reaching_end(schedule, kind, reverse=True)
+def stmt_ids_reachable_from_start_without_intervening_barrier(schedule, kind):
+ return _stmt_ids_reaching_end(schedule, kind, reverse=True)
-def _insn_ids_reaching_end(schedule, kind, reverse):
+def _stmt_ids_reaching_end(schedule, kind, reverse):
if reverse:
schedule = reversed(schedule)
enter_scope_item_kind = LeaveLoop
@@ -1546,45 +1546,45 @@ def _insn_ids_reaching_end(schedule, kind, reverse):
enter_scope_item_kind = EnterLoop
leave_scope_item_kind = LeaveLoop
- insn_ids_alive_at_scope = [set()]
+ stmt_ids_alive_at_scope = [set()]
for sched_item in schedule:
if isinstance(sched_item, enter_scope_item_kind):
- insn_ids_alive_at_scope.append(set())
+ stmt_ids_alive_at_scope.append(set())
elif isinstance(sched_item, leave_scope_item_kind):
- innermost_scope = insn_ids_alive_at_scope.pop()
- # Instructions in deeper scopes are alive but could be killed by
+ innermost_scope = stmt_ids_alive_at_scope.pop()
+ # Statements in deeper scopes are alive but could be killed by
# barriers at a shallower level, e.g.:
#
# for i
- # insn0
+ # stmt0
# end
- # barrier() <= kills insn0
+ # barrier() <= kills stmt0
#
# Hence we merge this scope into the parent scope.
- insn_ids_alive_at_scope[-1].update(innermost_scope)
+ stmt_ids_alive_at_scope[-1].update(innermost_scope)
elif isinstance(sched_item, Barrier):
- # This barrier kills only the instruction ids that are alive at
+ # This barrier kills only the statement ids that are alive at
# the current scope (or deeper). Without further analysis, we
- # can't assume that instructions at shallower scope can be
+ # can't assume that statements at shallower scope can be
# killed by deeper barriers, since loops might be empty, e.g.:
#
- # insn0 <= isn't killed by barrier (i loop could be empty)
+ # stmt0 <= isn't killed by barrier (i loop could be empty)
# for i
- # insn1 <= is killed by barrier
+ # stmt1 <= is killed by barrier
# for j
- # insn2 <= is killed by barrier
+ # stmt2 <= is killed by barrier
# end
# barrier()
# end
if barrier_kind_more_or_equally_global(sched_item.kind, kind):
- insn_ids_alive_at_scope[-1].clear()
+ stmt_ids_alive_at_scope[-1].clear()
else:
- insn_ids_alive_at_scope[-1] |= set(
- insn_id for insn_id in sched_item_to_insn_id(sched_item))
+ stmt_ids_alive_at_scope[-1] |= set(
+ stmt_id for stmt_id in sched_item_to_stmt_id(sched_item))
- assert len(insn_ids_alive_at_scope) == 1
- return insn_ids_alive_at_scope[-1]
+ assert len(stmt_ids_alive_at_scope) == 1
+ return stmt_ids_alive_at_scope[-1]
def append_barrier_or_raise_error(schedule, dep, verify_only):
@@ -1594,7 +1594,7 @@ def append_barrier_or_raise_error(schedule, dep, verify_only):
"Dependency '%s' (for variable '%s') "
"requires synchronization "
"by a %s barrier (add a 'no_sync_with' "
- "instruction option to state that no "
+ "statement option to state that no "
"synchronization is needed)"
% (
dep.dep_descr.format(
@@ -1608,7 +1608,7 @@ def append_barrier_or_raise_error(schedule, dep, verify_only):
schedule.append(Barrier(
comment=comment,
kind=dep.var_kind,
- originating_insn_id=None))
+ originating_stmt_id=None))
def insert_barriers(kernel, schedule, kind, verify_only, level=0):
@@ -1629,10 +1629,10 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
if reverse:
# Populate the dependency tracker with sources from the tail end of
# the schedule block.
- for insn_id in (
- insn_ids_reaching_end_without_intervening_barrier(
+ for stmt_id in (
+ stmt_ids_reaching_end_without_intervening_barrier(
schedule, kind)):
- dep_tracker.add_source(insn_id)
+ dep_tracker.add_source(stmt_id)
result = []
@@ -1644,11 +1644,11 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
subloop, new_i = gather_schedule_block(schedule, i)
loop_head = (
- insn_ids_reachable_from_start_without_intervening_barrier(
+ stmt_ids_reachable_from_start_without_intervening_barrier(
subloop, kind))
loop_tail = (
- insn_ids_reaching_end_without_intervening_barrier(
+ stmt_ids_reaching_end_without_intervening_barrier(
subloop, kind))
# Checks if a barrier is needed before the loop. This handles
@@ -1662,8 +1662,8 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
# ...
from itertools import chain
for dep in chain.from_iterable(
- dep_tracker.gen_dependencies_with_target_at(insn)
- for insn in loop_head):
+ dep_tracker.gen_dependencies_with_target_at(stmt)
+ for stmt in loop_head):
append_barrier_or_raise_error(result, dep, verify_only)
# This barrier gets inserted outside the loop, hence it is
# executed unconditionally and so kills all sources before
@@ -1692,14 +1692,14 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
dep_tracker.discard_all_sources()
i += 1
- elif isinstance(sched_item, RunInstruction):
+ elif isinstance(sched_item, RunStatement):
for dep in dep_tracker.gen_dependencies_with_target_at(
- sched_item.insn_id):
+ sched_item.stmt_id):
append_barrier_or_raise_error(result, dep, verify_only)
dep_tracker.discard_all_sources()
break
result.append(sched_item)
- dep_tracker.add_source(sched_item.insn_id)
+ dep_tracker.add_source(sched_item.stmt_id)
i += 1
elif isinstance(sched_item, (CallKernel, ReturnFromKernel)):
@@ -1731,7 +1731,7 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
i = new_i
elif isinstance(sched_item,
- (Barrier, RunInstruction, CallKernel, ReturnFromKernel)):
+ (Barrier, RunStatement, CallKernel, ReturnFromKernel)):
result.append(sched_item)
i += 1
@@ -1756,7 +1756,7 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
def generate_loop_schedules(kernel, debug_args={}):
from pytools import MinRecursionLimit
- with MinRecursionLimit(max(len(kernel.instructions) * 2,
+ with MinRecursionLimit(max(len(kernel.statements) * 2,
len(kernel.all_inames()) * 4)):
for sched in generate_loop_schedules_inner(kernel, debug_args=debug_args):
yield sched
@@ -1778,14 +1778,14 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
preschedule = kernel.schedule if kernel.state == kernel_state.SCHEDULED else ()
prescheduled_inames = set(
- insn.iname
- for insn in preschedule
- if isinstance(insn, EnterLoop))
+ stmt.iname
+ for stmt in preschedule
+ if isinstance(stmt, EnterLoop))
- prescheduled_insn_ids = set(
- insn_id
+ prescheduled_stmt_ids = set(
+ stmt_id
for item in preschedule
- for insn_id in sched_item_to_insn_id(item))
+ for stmt_id in sched_item_to_stmt_id(item))
from loopy.kernel.data import IlpBaseTag, ConcurrentTag, VectorizeTag
ilp_inames = set(
@@ -1805,7 +1805,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
sched_state = SchedulerState(
kernel=kernel,
loop_nest_around_map=loop_nest_around_map,
- loop_insn_dep_map=find_loop_insn_dep_map(
+ loop_stmt_dep_map=find_loop_stmt_dep_map(
kernel,
loop_nest_with_map=loop_nest_with_map,
loop_nest_around_map=loop_nest_around_map),
@@ -1814,7 +1814,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
vec_inames=vec_inames,
prescheduled_inames=prescheduled_inames,
- prescheduled_insn_ids=prescheduled_insn_ids,
+ prescheduled_stmt_ids=prescheduled_stmt_ids,
# time-varying part
active_inames=(),
@@ -1823,8 +1823,8 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
schedule=(),
- unscheduled_insn_ids=set(insn.id for insn in kernel.instructions),
- scheduled_insn_ids=frozenset(),
+ unscheduled_stmt_ids=set(stmt.id for stmt in kernel.statements),
+ scheduled_stmt_ids=frozenset(),
within_subkernel=kernel.state != kernel_state.SCHEDULED,
may_schedule_global_barriers=True,
@@ -1833,7 +1833,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
# ilp and vec are not parallel for the purposes of the scheduler
parallel_inames=parallel_inames - ilp_inames - vec_inames,
- group_insn_counts=group_insn_counts(kernel),
+ group_stmt_counts=group_stmt_counts(kernel),
active_group_counts={},
uses_of_boostability=[])
@@ -1881,7 +1881,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
debug.stop()
gen_sched = filter_nops_from_schedule(kernel, gen_sched)
- gen_sched = convert_barrier_instructions_to_barriers(
+ gen_sched = convert_barrier_statements_to_barriers(
kernel, gen_sched)
gsize, lsize = kernel.get_grid_size_upper_bounds()
diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py
index 1a0789c2f61e21e4a0371e2a73195c9771245527..9c73fdffe98d78cfb11968ca5964dadeca8f28b4 100644
--- a/loopy/schedule/device_mapping.py
+++ b/loopy/schedule/device_mapping.py
@@ -24,7 +24,7 @@ THE SOFTWARE.
from loopy.diagnostic import LoopyError
from loopy.schedule import (Barrier, CallKernel, EnterLoop, LeaveLoop,
- ReturnFromKernel, RunInstruction)
+ ReturnFromKernel, RunStatement)
from loopy.schedule.tools import get_block_boundaries
@@ -72,7 +72,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen):
while i <= end_idx:
sched_item = schedule[i]
- if isinstance(sched_item, RunInstruction):
+ if isinstance(sched_item, RunStatement):
current_chunk.append(sched_item)
i += 1
@@ -145,7 +145,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen):
new_schedule +
[dummy_return.copy()])
- # Assign names, extra_inames to CallKernel / ReturnFromKernel instructions
+ # Assign names, extra_inames to CallKernel / ReturnFromKernel statements
inames = []
for idx, sched_item in enumerate(new_schedule):
diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py
index f9b08d3434556f912107726f125abbfa110f5676..1801ff8e0c6c68c58ed11298dc6aede526216cd8 100644
--- a/loopy/schedule/tools.py
+++ b/loopy/schedule/tools.py
@@ -51,20 +51,20 @@ def get_block_boundaries(schedule):
# {{{ subkernel tools
def temporaries_read_in_subkernel(kernel, subkernel):
- from loopy.kernel.tools import get_subkernel_to_insn_id_map
- insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel]
+ from loopy.kernel.tools import get_subkernel_to_stmt_id_map
+ stmt_ids = get_subkernel_to_stmt_id_map(kernel)[subkernel]
return frozenset(tv
- for insn_id in insn_ids
- for tv in kernel.id_to_insn[insn_id].read_dependency_names()
+ for stmt_id in stmt_ids
+ for tv in kernel.id_to_stmt[stmt_id].read_dependency_names()
if tv in kernel.temporary_variables)
def temporaries_written_in_subkernel(kernel, subkernel):
- from loopy.kernel.tools import get_subkernel_to_insn_id_map
- insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel]
+ from loopy.kernel.tools import get_subkernel_to_stmt_id_map
+ stmt_ids = get_subkernel_to_stmt_id_map(kernel)[subkernel]
return frozenset(tv
- for insn_id in insn_ids
- for tv in kernel.id_to_insn[insn_id].write_dependency_names()
+ for stmt_id in stmt_ids
+ for tv in kernel.id_to_stmt[stmt_id].write_dependency_names()
if tv in kernel.temporary_variables)
# }}}
@@ -75,7 +75,7 @@ def temporaries_written_in_subkernel(kernel, subkernel):
def add_extra_args_to_schedule(kernel):
"""
Fill the `extra_args` fields in all the :class:`loopy.schedule.CallKernel`
- instructions in the schedule with global temporaries.
+ statements in the schedule with global temporaries.
"""
new_schedule = []
from loopy.schedule import CallKernel
diff --git a/loopy/statistics.py b/loopy/statistics.py
index 88d7ec328345fd4c97d75b449385316f99c2509d..69bf555bc2a8207a0dd8d9ca8fd9cdcd9841c0e0 100755
--- a/loopy/statistics.py
+++ b/loopy/statistics.py
@@ -1097,7 +1097,7 @@ def count(kernel, set, space=None):
return add_assumptions_guard(kernel, count)
-def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None):
+def get_unused_hw_axes_factor(knl, stmt, disregard_local_axes, space=None):
# FIXME: Multi-kernel support
gsize, lsize = knl.get_grid_size_upper_bounds()
@@ -1105,7 +1105,7 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None):
l_used = set()
from loopy.kernel.data import LocalIndexTag, GroupIndexTag
- for iname in knl.insn_inames(insn):
+ for iname in knl.stmt_inames(stmt):
tag = knl.iname_to_tag.get(iname)
if isinstance(tag, LocalIndexTag):
@@ -1135,17 +1135,17 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None):
return add_assumptions_guard(knl, result)
-def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False):
- insn_inames = knl.insn_inames(insn)
+def count_stmt_runs(knl, stmt, count_redundant_work, disregard_local_axes=False):
+ stmt_inames = knl.stmt_inames(stmt)
if disregard_local_axes:
from loopy.kernel.data import LocalIndexTag
- insn_inames = [iname for iname in insn_inames if not
+ stmt_inames = [iname for iname in stmt_inames if not
isinstance(knl.iname_to_tag.get(iname), LocalIndexTag)]
- inames_domain = knl.get_inames_domain(insn_inames)
+ inames_domain = knl.get_inames_domain(stmt_inames)
domain = (inames_domain.project_out_except(
- insn_inames, [dim_type.set]))
+ stmt_inames, [dim_type.set]))
space = isl.Space.create_from_names(isl.DEFAULT_CONTEXT,
set=[], params=knl.outer_params())
@@ -1153,7 +1153,7 @@ def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False)
c = count(knl, domain, space=space)
if count_redundant_work:
- unused_fac = get_unused_hw_axes_factor(knl, insn,
+ unused_fac = get_unused_hw_axes_factor(knl, stmt,
disregard_local_axes=disregard_local_axes,
space=space)
return c * unused_fac
@@ -1210,10 +1210,10 @@ def get_op_map(knl, numpy_types=True, count_redundant_work=False):
op_map = ToCountMap()
op_counter = ExpressionOpCounter(knl)
- for insn in knl.instructions:
- ops = op_counter(insn.assignee) + op_counter(insn.expression)
- op_map = op_map + ops*count_insn_runs(
- knl, insn,
+ for stmt in knl.statements:
+ ops = op_counter(stmt.assignee) + op_counter(stmt.expression)
+ op_map = op_map + ops*count_stmt_runs(
+ knl, stmt,
count_redundant_work=count_redundant_work)
if numpy_types:
@@ -1296,11 +1296,11 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False):
cache_holder = CacheHolder()
- @memoize_in(cache_holder, "insn_count")
- def get_insn_count(knl, insn_id, uniform=False):
- insn = knl.id_to_insn[insn_id]
- return count_insn_runs(
- knl, insn, disregard_local_axes=uniform,
+ @memoize_in(cache_holder, "stmt_count")
+ def get_stmt_count(knl, stmt_id, uniform=False):
+ stmt = knl.id_to_stmt[stmt_id]
+ return count_stmt_runs(
+ knl, stmt, disregard_local_axes=uniform,
count_redundant_work=count_redundant_work)
knl = infer_unknown_types(knl, expect_completion=True)
@@ -1310,13 +1310,13 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False):
access_counter_g = GlobalMemAccessCounter(knl)
access_counter_l = LocalMemAccessCounter(knl)
- for insn in knl.instructions:
+ for stmt in knl.statements:
access_expr = (
- access_counter_g(insn.expression)
- + access_counter_l(insn.expression)
+ access_counter_g(stmt.expression)
+ + access_counter_l(stmt.expression)
).with_set_attributes(direction="load")
- access_assignee_g = access_counter_g(insn.assignee).with_set_attributes(
+ access_assignee_g = access_counter_g(stmt.assignee).with_set_attributes(
direction="store")
# FIXME: (!!!!) for now, don't count writes to local mem
@@ -1329,7 +1329,7 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False):
access_map = (
access_map
+ ToCountMap({key: val})
- * get_insn_count(knl, insn.id, is_uniform))
+ * get_stmt_count(knl, stmt.id, is_uniform))
#currently not counting stride of local mem access
for key, val in six.iteritems(access_assignee_g.count_map):
@@ -1339,7 +1339,7 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False):
access_map = (
access_map
+ ToCountMap({key: val})
- * get_insn_count(knl, insn.id, is_uniform))
+ * get_stmt_count(knl, stmt.id, is_uniform))
# for now, don't count writes to local mem
if numpy_types:
@@ -1387,7 +1387,7 @@ def get_synchronization_map(knl):
from loopy.preprocess import preprocess_kernel, infer_unknown_types
from loopy.schedule import (EnterLoop, LeaveLoop, Barrier,
- CallKernel, ReturnFromKernel, RunInstruction)
+ CallKernel, ReturnFromKernel, RunStatement)
from operator import mul
knl = infer_unknown_types(knl, expect_completion=True)
knl = preprocess_kernel(knl)
@@ -1424,7 +1424,7 @@ def get_synchronization_map(knl):
result = result + ToCountMap(
{"kernel_launch": get_count_poly(iname_list)})
- elif isinstance(sched_item, (ReturnFromKernel, RunInstruction)):
+ elif isinstance(sched_item, (ReturnFromKernel, RunStatement)):
pass
else:
@@ -1459,24 +1459,24 @@ def gather_access_footprints(kernel, ignore_uncountable=False):
write_footprints = []
read_footprints = []
- for insn in kernel.instructions:
- if not isinstance(insn, MultiAssignmentBase):
+ for stmt in kernel.statements:
+ if not isinstance(stmt, MultiAssignmentBase):
warn_with_kernel(kernel, "count_non_assignment",
- "Non-assignment instruction encountered in "
+ "Non-assignment statement encountered in "
"gather_access_footprints, not counted")
continue
- insn_inames = kernel.insn_inames(insn)
- inames_domain = kernel.get_inames_domain(insn_inames)
- domain = (inames_domain.project_out_except(insn_inames,
+ stmt_inames = kernel.stmt_inames(stmt)
+ inames_domain = kernel.get_inames_domain(stmt_inames)
+ domain = (inames_domain.project_out_except(stmt_inames,
[dim_type.set]))
afg = AccessFootprintGatherer(kernel, domain,
ignore_uncountable=ignore_uncountable)
- for assignee in insn.assignees:
- write_footprints.append(afg(insn.assignees))
- read_footprints.append(afg(insn.expression))
+ for assignee in stmt.assignees:
+ write_footprints.append(afg(stmt.assignees))
+ read_footprints.append(afg(stmt.expression))
write_footprints = AccessFootprintGatherer.combine(write_footprints)
read_footprints = AccessFootprintGatherer.combine(read_footprints)
diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index 543c2743bb98b09b706c2fdbf9188ed0a85d97f2..8ac520d700d5bca29fbdd26b6790f7d8347ca086 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -595,7 +595,7 @@ def parse_tagged_name(expr):
class ExpansionState(ImmutableRecord):
"""
.. attribute:: kernel
- .. attribute:: instruction
+ .. attribute:: statement
.. attribute:: stack
@@ -608,8 +608,8 @@ class ExpansionState(ImmutableRecord):
"""
@property
- def insn_id(self):
- return self.instruction.id
+ def stmt_id(self):
+ return self.statement.id
def apply_arg_context(self, expr):
from pymbolic.mapper.substitutor import make_subst_func
@@ -651,12 +651,12 @@ class SubstitutionRuleRenamer(IdentityMapper):
return TaggedVariable(new_name, tag)
-def rename_subst_rules_in_instructions(insns, renames):
+def rename_subst_rules_in_statements(stmts, renames):
subst_renamer = SubstitutionRuleRenamer(renames)
return [
- insn.with_transformed_expressions(subst_renamer)
- for insn in insns]
+ stmt.with_transformed_expressions(subst_renamer)
+ for stmt in stmts]
class SubstitutionRuleMappingContext(object):
@@ -766,11 +766,11 @@ class SubstitutionRuleMappingContext(object):
def finish_kernel(self, kernel):
new_substs, renames = self._get_new_substitutions_and_renames()
- new_insns = rename_subst_rules_in_instructions(kernel.instructions, renames)
+ new_stmts = rename_subst_rules_in_statements(kernel.statements, renames)
return kernel.copy(
substitutions=new_substs,
- instructions=new_insns)
+ statements=new_stmts)
class RuleAwareIdentityMapper(IdentityMapper):
@@ -844,30 +844,30 @@ class RuleAwareIdentityMapper(IdentityMapper):
else:
return sym
- def __call__(self, expr, kernel, insn):
- from loopy.kernel.data import InstructionBase
- assert insn is None or isinstance(insn, InstructionBase)
+ def __call__(self, expr, kernel, stmt):
+ from loopy.kernel.data import StatementBase
+ assert stmt is None or isinstance(stmt, StatementBase)
return IdentityMapper.__call__(self, expr,
ExpansionState(
kernel=kernel,
- instruction=insn,
+ statement=stmt,
stack=(),
arg_context={}))
- def map_instruction(self, kernel, insn):
- return insn
+ def map_statement(self, kernel, stmt):
+ return stmt
def map_kernel(self, kernel):
- new_insns = [
+ new_stmts = [
# While subst rules are not allowed in assignees, the mapper
# may perform tasks entirely unrelated to subst rules, so
# we must map assignees, too.
- self.map_instruction(kernel,
- insn.with_transformed_expressions(self, kernel, insn))
- for insn in kernel.instructions]
+ self.map_statement(kernel,
+ stmt.with_transformed_expressions(self, kernel, stmt))
+ for stmt in kernel.statements]
- return kernel.copy(instructions=new_insns)
+ return kernel.copy(statements=new_stmts)
class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
@@ -880,7 +880,7 @@ class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
def map_variable(self, expr, expn_state):
if (expr.name in expn_state.arg_context
or not self.within(
- expn_state.kernel, expn_state.instruction, expn_state.stack)):
+ expn_state.kernel, expn_state.statement, expn_state.stack)):
return super(RuleAwareSubstitutionMapper, self).map_variable(
expr, expn_state)
@@ -907,7 +907,7 @@ class RuleAwareSubstitutionRuleExpander(RuleAwareIdentityMapper):
new_stack = expn_state.stack + ((name, tags),)
- if self.within(expn_state.kernel, expn_state.instruction, new_stack):
+ if self.within(expn_state.kernel, expn_state.statement, new_stack):
# expand
rule = self.rules[name]
diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py
index 5800a0236e8ae5f81a63942c31a74822bc2fab96..f34de91504e2a88100f628134f78bd616113ba62 100644
--- a/loopy/target/__init__.py
+++ b/loopy/target/__init__.py
@@ -201,10 +201,10 @@ class ASTBuilderBase(object):
def get_image_arg_decl(self, name, shape, num_target_axes, dtype, is_written):
raise NotImplementedError()
- def emit_assignment(self, codegen_state, insn):
+ def emit_assignment(self, codegen_state, stmt):
raise NotImplementedError()
- def emit_multiple_assignment(self, codegen_state, insn):
+ def emit_multiple_assignment(self, codegen_state, stmt):
raise NotImplementedError()
def emit_sequential_loop(self, codegen_state, iname, iname_dtype,
diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index e54ac0f693c4704c13b8c435e4bc7acaac1b1a47..09d1382417507f91bbecea77593ec04801bbeaf8 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -607,18 +607,18 @@ class CASTBuilder(ASTBuilderBase):
return arg_decl
- def emit_assignment(self, codegen_state, insn):
+ def emit_assignment(self, codegen_state, stmt):
kernel = codegen_state.kernel
ecm = codegen_state.expression_to_code_mapper
- assignee_var_name, = insn.assignee_var_names()
+ assignee_var_name, = stmt.assignee_var_names()
lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
lhs_dtype = lhs_var.dtype
- if insn.atomicity is not None:
+ if stmt.atomicity is not None:
lhs_atomicity = [
- a for a in insn.atomicity if a.var_name == assignee_var_name]
+ a for a in stmt.atomicity if a.var_name == assignee_var_name]
assert len(lhs_atomicity) <= 1
if lhs_atomicity:
lhs_atomicity, = lhs_atomicity
@@ -630,13 +630,13 @@ class CASTBuilder(ASTBuilderBase):
from loopy.kernel.data import AtomicInit, AtomicUpdate
from loopy.expression import dtype_to_type_context
- lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None)
+ lhs_code = ecm(stmt.assignee, prec=PREC_NONE, type_context=None)
rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
if lhs_atomicity is None:
from cgen import Assign
return Assign(
lhs_code,
- ecm(insn.expression, prec=PREC_NONE,
+ ecm(stmt.expression, prec=PREC_NONE,
type_context=rhs_type_context,
needed_dtype=lhs_dtype))
@@ -647,7 +647,7 @@ class CASTBuilder(ASTBuilderBase):
codegen_state.seen_atomic_dtypes.add(lhs_dtype)
return codegen_state.ast_builder.emit_atomic_update(
codegen_state, lhs_atomicity, lhs_var,
- insn.assignee, insn.expression,
+ stmt.assignee, stmt.expression,
lhs_dtype, rhs_type_context)
else:
@@ -658,16 +658,16 @@ class CASTBuilder(ASTBuilderBase):
lhs_expr, rhs_expr, lhs_dtype):
raise NotImplementedError("atomic updates in %s" % type(self).__name__)
- def emit_tuple_assignment(self, codegen_state, insn):
+ def emit_tuple_assignment(self, codegen_state, stmt):
ecm = codegen_state.expression_to_code_mapper
from cgen import Assign, block_if_necessary
assignments = []
for i, (assignee, parameter) in enumerate(
- zip(insn.assignees, insn.expression.parameters)):
+ zip(stmt.assignees, stmt.expression.parameters)):
lhs_code = ecm(assignee, prec=PREC_NONE, type_context=None)
- assignee_var_name = insn.assignee_var_names()[i]
+ assignee_var_name = stmt.assignee_var_names()[i]
lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
lhs_dtype = lhs_var.dtype
@@ -681,21 +681,21 @@ class CASTBuilder(ASTBuilderBase):
return block_if_necessary(assignments)
- def emit_multiple_assignment(self, codegen_state, insn):
+ def emit_multiple_assignment(self, codegen_state, stmt):
ecm = codegen_state.expression_to_code_mapper
from pymbolic.primitives import Variable
from pymbolic.mapper.stringifier import PREC_NONE
- func_id = insn.expression.function
- parameters = insn.expression.parameters
+ func_id = stmt.expression.function
+ parameters = stmt.expression.parameters
if isinstance(func_id, Variable):
func_id = func_id.name
assignee_var_descriptors = [
codegen_state.kernel.get_var_descriptor(a)
- for a in insn.assignee_var_names()]
+ for a in stmt.assignee_var_names()]
par_dtypes = tuple(ecm.infer_type(par) for par in parameters)
@@ -709,7 +709,7 @@ class CASTBuilder(ASTBuilderBase):
if mangle_result.target_name == "loopy_make_tuple":
# This shorcut avoids actually having to emit a 'make_tuple' function.
- return self.emit_tuple_assignment(codegen_state, insn)
+ return self.emit_tuple_assignment(codegen_state, stmt)
from loopy.expression import dtype_to_type_context
c_parameters = [
@@ -727,10 +727,10 @@ class CASTBuilder(ASTBuilderBase):
from pymbolic import var
for i, (a, tgt_dtype) in enumerate(
- zip(insn.assignees[1:], mangle_result.result_dtypes[1:])):
+ zip(stmt.assignees[1:], mangle_result.result_dtypes[1:])):
if tgt_dtype != ecm.infer_type(a):
raise LoopyError("type mismatch in %d'th (1-based) left-hand "
- "side of instruction '%s'" % (i+1, insn.id))
+ "side of statement '%s'" % (i+1, stmt.id))
c_parameters.append(
# TODO Yuck: The "where-at function": &(...)
var("&")(
@@ -752,7 +752,7 @@ class CASTBuilder(ASTBuilderBase):
assignee_var_descriptors[0].dtype,
result)
- lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None)
+ lhs_code = ecm(stmt.assignees[0], prec=PREC_NONE, type_context=None)
from cgen import Assign
return Assign(
diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py
index 2bdffb5aa69bdc0f72fe12a58faa6d0e78920e0f..484973433120da2596540f8f0279ae7820715805 100644
--- a/loopy/target/cuda.py
+++ b/loopy/target/cuda.py
@@ -244,10 +244,10 @@ class CUDACASTBuilder(CASTBuilder):
from cgen import Extern
fdecl = Extern("C", fdecl)
- from loopy.schedule import get_insn_ids_for_block_at
+ from loopy.schedule import get_stmt_ids_for_block_at
_, local_grid_size = \
- codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
- get_insn_ids_for_block_at(
+ codegen_state.kernel.get_grid_sizes_for_stmt_ids_as_exprs(
+ get_stmt_ids_for_block_at(
codegen_state.kernel.schedule, schedule_index))
from loopy.symbolic import get_dependencies
@@ -294,7 +294,7 @@ class CUDACASTBuilder(CASTBuilder):
def emit_barrier(self, kind, comment):
"""
:arg kind: ``"local"`` or ``"global"``
- :return: a :class:`loopy.codegen.GeneratedInstruction`.
+ :return: a :class:`loopy.codegen.GeneratedStatement`.
"""
if kind == "local":
if comment:
diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py
index 35dade90494906b61aad9eb66e7271f2c5d1e180..4d5a605958462690d5489d15973feb97d2b9bc7d 100644
--- a/loopy/target/ispc.py
+++ b/loopy/target/ispc.py
@@ -362,31 +362,31 @@ class ISPCASTBuilder(CASTBuilder):
from cgen.ispc import ISPCUniform
return ISPCUniform(result)
- def emit_assignment(self, codegen_state, insn):
+ def emit_assignment(self, codegen_state, stmt):
kernel = codegen_state.kernel
ecm = codegen_state.expression_to_code_mapper
- assignee_var_name, = insn.assignee_var_names()
+ assignee_var_name, = stmt.assignee_var_names()
lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
lhs_dtype = lhs_var.dtype
- if insn.atomicity:
+ if stmt.atomicity:
raise NotImplementedError("atomic ops in ISPC")
from loopy.expression import dtype_to_type_context
from pymbolic.mapper.stringifier import PREC_NONE
rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
- rhs_code = ecm(insn.expression, prec=PREC_NONE,
+ rhs_code = ecm(stmt.expression, prec=PREC_NONE,
type_context=rhs_type_context,
needed_dtype=lhs_dtype)
- lhs = insn.assignee
+ lhs = stmt.assignee
# {{{ handle streaming stores
- if "!streaming_store" in insn.tags:
+ if "!streaming_store" in stmt.tags:
ary = ecm.find_array(lhs)
from loopy.kernel.array import get_access_info
@@ -455,7 +455,7 @@ class ISPCASTBuilder(CASTBuilder):
isinstance(
kernel.iname_to_tag.get(dep), LocalIndexTag)
and kernel.iname_to_tag.get(dep).axis == 0
- for dep in get_dependencies(insn.expression))
+ for dep in get_dependencies(stmt.expression))
if not rhs_has_programindex:
rhs_code = "broadcast(%s, 0)" % rhs_code
diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py
index a5f7562c41c3ec8eca673904550e078d2a992241..e47a7466a877d80fab273eff7db221d66236ae5c 100644
--- a/loopy/target/opencl.py
+++ b/loopy/target/opencl.py
@@ -413,9 +413,9 @@ class OpenCLCASTBuilder(CASTBuilder):
from cgen.opencl import CLKernel, CLRequiredWorkGroupSize
fdecl = CLKernel(fdecl)
- from loopy.schedule import get_insn_ids_for_block_at
- _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
- get_insn_ids_for_block_at(
+ from loopy.schedule import get_stmt_ids_for_block_at
+ _, local_sizes = codegen_state.kernel.get_grid_sizes_for_stmt_ids_as_exprs(
+ get_stmt_ids_for_block_at(
codegen_state.kernel.schedule, schedule_index))
from loopy.symbolic import get_dependencies
@@ -453,7 +453,7 @@ class OpenCLCASTBuilder(CASTBuilder):
def emit_barrier(self, kind, comment):
"""
:arg kind: ``"local"`` or ``"global"``
- :return: a :class:`loopy.codegen.GeneratedInstruction`.
+ :return: a :class:`loopy.codegen.GeneratedStatement`.
"""
if kind == "local":
if comment:
diff --git a/loopy/target/python.py b/loopy/target/python.py
index ce04986d3d2a39dcf7126339055d32fa16ffcc25..4c3c33fb5b6a94b078bbba80a01869eaf3054975 100644
--- a/loopy/target/python.py
+++ b/loopy/target/python.py
@@ -291,18 +291,18 @@ class PythonASTBuilderBase(ASTBuilderBase):
from genpy import If
return If(condition_str, ast)
- def emit_assignment(self, codegen_state, insn):
+ def emit_assignment(self, codegen_state, stmt):
ecm = codegen_state.expression_to_code_mapper
- if insn.atomicity:
+ if stmt.atomicity:
raise NotImplementedError("atomic ops in Python")
from pymbolic.mapper.stringifier import PREC_NONE
from genpy import Assign
return Assign(
- ecm(insn.assignee, prec=PREC_NONE, type_context=None),
- ecm(insn.expression, prec=PREC_NONE, type_context=None))
+ ecm(stmt.assignee, prec=PREC_NONE, type_context=None),
+ ecm(stmt.expression, prec=PREC_NONE, type_context=None))
# }}}
diff --git a/loopy/tools.py b/loopy/tools.py
index d6952d54782f113685299641c828907fb7f32a46..022bebabc99edaf9030273d9c0203db966c9c975 100644
--- a/loopy/tools.py
+++ b/loopy/tools.py
@@ -135,8 +135,8 @@ class LoopyEqKeyBuilder(object):
Usage::
kb = LoopyEqKeyBuilder()
- kb.update_for_class(insn.__class__)
- kb.update_for_field("field", insn.field)
+ kb.update_for_class(stmt.__class__)
+ kb.update_for_field("field", stmt.field)
...
key = kb.key()
diff --git a/loopy/transform/arithmetic.py b/loopy/transform/arithmetic.py
index b7f47c38a6a0daf8e4495c16791ef2f955019649..6ec67bf5ac4fe324a3b3ef718711427fadf1c646 100644
--- a/loopy/transform/arithmetic.py
+++ b/loopy/transform/arithmetic.py
@@ -34,9 +34,9 @@ def fold_constants(kernel):
from loopy.symbolic import ConstantFoldingMapper
cfm = ConstantFoldingMapper()
- new_insns = [
- insn.with_transformed_expressions(cfm)
- for insn in kernel.instructions]
+ new_stmts = [
+ stmt.with_transformed_expressions(cfm)
+ for stmt in kernel.statements]
new_substs = dict(
(sub.name,
@@ -44,7 +44,7 @@ def fold_constants(kernel):
for sub in six.itervalues(kernel.substitutions))
return kernel.copy(
- instructions=new_insns,
+ statements=new_stmts,
substitutions=new_substs)
# }}}
@@ -135,8 +135,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
else:
raise ValueError("unexpected type of access_expr")
- def is_assignee(insn):
- return var_name in insn.assignee_var_names()
+ def is_assignee(stmt):
+ return var_name in stmt.assignee_var_names()
def iterate_as(cls, expr):
if isinstance(expr, cls):
@@ -151,16 +151,16 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
from loopy.kernel.data import Assignment
- for insn in kernel.instructions:
- if not is_assignee(insn):
+ for stmt in kernel.statements:
+ if not is_assignee(stmt):
continue
- if not isinstance(insn, Assignment):
+ if not isinstance(stmt, Assignment):
raise LoopyError("'%s' modified by non-single-assignment"
% var_name)
- lhs = insn.assignee
- rhs = insn.expression
+ lhs = stmt.assignee
+ rhs = stmt.expression
if is_zero(rhs):
continue
@@ -182,8 +182,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
for part in iterate_as(Product, term):
if var_name in get_dependencies(part):
raise LoopyError("unexpected dependency on '%s' "
- "in RHS of instruction '%s'"
- % (var_name, insn.id))
+ "in RHS of statement '%s'"
+ % (var_name, stmt.id))
product_parts = set(iterate_as(Product, term))
@@ -211,8 +211,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
for part in iterate_as(Product, term):
if var_name in get_dependencies(part):
raise LoopyError("unexpected dependency on '%s' "
- "in RHS of instruction '%s'"
- % (var_name, insn.id))
+ "in RHS of statement '%s'"
+ % (var_name, stmt.id))
product_parts = set(iterate_as(Product, term))
@@ -235,27 +235,27 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
# {{{ remove common factors
- new_insns = []
+ new_stmts = []
- for insn in kernel.instructions:
- if not isinstance(insn, Assignment) or not is_assignee(insn):
- new_insns.append(insn)
+ for stmt in kernel.statements:
+ if not isinstance(stmt, Assignment) or not is_assignee(stmt):
+ new_stmts.append(stmt)
continue
- index_key = extract_index_key(insn.assignee)
+ index_key = extract_index_key(stmt.assignee)
- lhs = insn.assignee
- rhs = insn.expression
+ lhs = stmt.assignee
+ rhs = stmt.expression
if is_zero(rhs):
- new_insns.append(insn)
+ new_stmts.append(stmt)
continue
index_key = extract_index_key(lhs)
cf_index, unif_result = find_unifiable_cf_index(index_key)
if cf_index is None:
- new_insns.append(insn)
+ new_stmts.append(stmt)
continue
_, my_common_factors = common_factors[cf_index]
@@ -281,8 +281,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
if part not in mapped_my_common_factors
]))
- new_insns.append(
- insn.copy(expression=flattened_sum(new_sum_terms)))
+ new_stmts.append(
+ stmt.copy(expression=flattened_sum(new_sum_terms)))
# }}}
@@ -314,21 +314,21 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
else:
return expr
- insns = new_insns
- new_insns = []
+ stmts = new_stmts
+ new_stmts = []
subm = SubstitutionMapper(find_substitution)
- for insn in insns:
- if not isinstance(insn, Assignment) or is_assignee(insn):
- new_insns.append(insn)
+ for stmt in stmts:
+ if not isinstance(stmt, Assignment) or is_assignee(stmt):
+ new_stmts.append(stmt)
continue
- new_insns.append(insn.with_transformed_expressions(subm))
+ new_stmts.append(stmt.with_transformed_expressions(subm))
# }}}
- return kernel.copy(instructions=new_insns)
+ return kernel.copy(statements=new_stmts)
# }}}
diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py
index e7a86300f9d040cba1688e5bb0f3dcbbd926f783..e74259cc397282623bdc22fe19c00f1c4725c3f6 100644
--- a/loopy/transform/batch.py
+++ b/loopy/transform/batch.py
@@ -168,9 +168,9 @@ def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch",
batch_iname_set = frozenset([batch_iname])
kernel = kernel.copy(
- instructions=[
- insn.copy(within_inames=insn.within_inames | batch_iname_set)
- for insn in kernel.instructions])
+ statements=[
+ stmt.copy(within_inames=stmt.within_inames | batch_iname_set)
+ for stmt in kernel.statements])
return kernel
diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py
index 1b059b6a73d3064596b8679fbc87f94287b2d9fe..c0ef05d24c3541004c024b3be07c513ad78bcb7c 100644
--- a/loopy/transform/buffer.py
+++ b/loopy/transform/buffer.py
@@ -52,7 +52,7 @@ class ArrayAccessReplacer(RuleAwareIdentityMapper):
self.array_base_map = array_base_map
self.var_name = var_name
- self.modified_insn_ids = set()
+ self.modified_stmt_ids = set()
self.buf_var = buf_var
@@ -60,28 +60,28 @@ class ArrayAccessReplacer(RuleAwareIdentityMapper):
result = None
if expr.name == self.var_name and self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack):
result = self.map_array_access((), expn_state)
if result is None:
return super(ArrayAccessReplacer, self).map_variable(expr, expn_state)
else:
- self.modified_insn_ids.add(expn_state.insn_id)
+ self.modified_stmt_ids.add(expn_state.stmt_id)
return result
def map_subscript(self, expr, expn_state):
result = None
if expr.aggregate.name == self.var_name and self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack):
result = self.map_array_access(expr.index_tuple, expn_state)
if result is None:
return super(ArrayAccessReplacer, self).map_subscript(expr, expn_state)
else:
- self.modified_insn_ids.add(expn_state.insn_id)
+ self.modified_stmt_ids.add(expn_state.stmt_id)
return result
def map_array_access(self, index, expn_state):
@@ -153,7 +153,7 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
being buffered).
:arg store_expression: Either *None*, *False*, or an expression involving
variables 'base' and 'buffer' (without array indices).
- (*None* indicates that a default storage instruction should be used,
+ (*None* indicates that a default storage statement should be used,
*False* indicates that no storing of the temporary should occur
at all.)
:arg within: If not None, limit the action of the transformation to
@@ -259,14 +259,14 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
within_inames = set()
access_descriptors = []
- for insn in kernel.instructions:
- if not within(kernel, insn.id, ()):
+ for stmt in kernel.statements:
+ if not within(kernel, stmt.id, ()):
continue
from pymbolic.primitives import Variable, Subscript
from loopy.symbolic import LinearSubscript
- for assignee in insn.assignees:
+ for assignee in stmt.assignees:
if isinstance(assignee, Variable):
assignee_name = assignee.name
index = ()
@@ -289,7 +289,7 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
- buffer_inames_set)
access_descriptors.append(
AccessDescriptor(
- identifier=insn.id,
+ identifier=stmt.id,
storage_axis_exprs=index))
# {{{ find fetch/store inames
@@ -384,11 +384,11 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
# }}}
- new_insns = []
+ new_stmts = []
buf_var = var(buf_var_name)
- # {{{ generate init instruction
+ # {{{ generate init statement
buf_var_init = buf_var
if non1_init_inames:
@@ -419,9 +419,9 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
"base": init_base,
}))(init_expression)
- init_insn_id = kernel.make_unique_instruction_id(based_on="init_"+var_name)
+ init_stmt_id = kernel.make_unique_statement_id(based_on="init_"+var_name)
from loopy.kernel.data import Assignment
- init_instruction = Assignment(id=init_insn_id,
+ init_statement = Assignment(id=init_stmt_id,
assignee=buf_var_init,
expression=init_expression,
within_inames=(
@@ -439,14 +439,14 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
kernel = rule_mapping_context.finish_kernel(aar.map_kernel(kernel))
did_write = False
- for insn_id in aar.modified_insn_ids:
- insn = kernel.id_to_insn[insn_id]
- if buf_var_name in insn.assignee_var_names():
+ for stmt_id in aar.modified_stmt_ids:
+ stmt = kernel.id_to_stmt[stmt_id]
+ if buf_var_name in stmt.assignee_var_names():
did_write = True
- # {{{ add init_insn_id to depends_on
+ # {{{ add init_stmt_id to depends_on
- new_insns = []
+ new_stmts = []
def none_to_empty_set(s):
if s is None:
@@ -454,19 +454,19 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
else:
return s
- for insn in kernel.instructions:
- if insn.id in aar.modified_insn_ids:
- new_insns.append(
- insn.copy(
+ for stmt in kernel.statements:
+ if stmt.id in aar.modified_stmt_ids:
+ new_stmts.append(
+ stmt.copy(
depends_on=(
- none_to_empty_set(insn.depends_on)
- | frozenset([init_insn_id]))))
+ none_to_empty_set(stmt.depends_on)
+ | frozenset([init_stmt_id]))))
else:
- new_insns.append(insn)
+ new_stmts.append(stmt)
# }}}
- # {{{ generate store instruction
+ # {{{ generate store statement
buf_var_store = buf_var
if non1_store_inames:
@@ -498,10 +498,10 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
if store_expression is not False:
from loopy.kernel.data import Assignment
- store_instruction = Assignment(
- id=kernel.make_unique_instruction_id(based_on="store_"+var_name),
- depends_on=frozenset(aar.modified_insn_ids),
- no_sync_with=frozenset([(init_insn_id, "any")]),
+ store_statement = Assignment(
+ id=kernel.make_unique_statement_id(based_on="store_"+var_name),
+ depends_on=frozenset(aar.modified_stmt_ids),
+ no_sync_with=frozenset([(init_stmt_id, "any")]),
assignee=store_target,
expression=store_expression,
within_inames=(
@@ -512,16 +512,16 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
# }}}
- new_insns.append(init_instruction)
+ new_stmts.append(init_statement)
if did_write:
- new_insns.append(store_instruction)
+ new_stmts.append(store_statement)
else:
for iname in store_inames:
del new_iname_to_tag[iname]
kernel = kernel.copy(
domains=new_kernel_domains,
- instructions=new_insns,
+ statements=new_stmts,
temporary_variables=new_temporary_variables)
from loopy import tag_inames
diff --git a/loopy/transform/data.py b/loopy/transform/data.py
index 575311b11716f5a52e4713aa51922eb348c839d9..e2c1a5080a2e04854248b0fb2d6bf5f8f4dbadc9 100644
--- a/loopy/transform/data.py
+++ b/loopy/transform/data.py
@@ -227,7 +227,7 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
:arg fetch_outer_inames: The inames within which the fetch
- instruction is nested. If *None*, make an educated guess.
+ statement is nested. If *None*, make an educated guess.
This function internally uses :func:`extract_subst` and :func:`precompute`.
"""
@@ -446,8 +446,8 @@ def remove_unused_arguments(knl):
exp_knl = lp.expand_subst(knl)
refd_vars = set(knl.all_params())
- for insn in exp_knl.instructions:
- refd_vars.update(insn.dependency_names())
+ for stmt in exp_knl.statements:
+ refd_vars.update(stmt.dependency_names())
from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag
from loopy.symbolic import get_dependencies
@@ -512,19 +512,19 @@ def alias_temporaries(knl, names, base_name_prefix=None,
names_set = set(names)
if synchronize_for_exclusive_use:
- new_insns = []
- for insn in knl.instructions:
- temp_deps = insn.dependency_names() & names_set
+ new_stmts = []
+ for stmt in knl.statements:
+ temp_deps = stmt.dependency_names() & names_set
if not temp_deps:
- new_insns.append(insn)
+ new_stmts.append(stmt)
continue
if len(temp_deps) > 1:
- raise LoopyError("Instruction {insn} refers to multiple of the "
+ raise LoopyError("Statement {stmt} refers to multiple of the "
"temporaries being aliased, namely '{temps}'. Cannot alias."
.format(
- insn=insn.id,
+ stmt=stmt.id,
temps=", ".join(temp_deps)))
temp_name, = temp_deps
@@ -534,13 +534,13 @@ def alias_temporaries(knl, names, base_name_prefix=None,
frozenset(group_names[:temp_idx])
| frozenset(group_names[temp_idx+1:]))
- new_insns.append(
- insn.copy(
- groups=insn.groups | frozenset([group_name]),
+ new_stmts.append(
+ stmt.copy(
+ groups=stmt.groups | frozenset([group_name]),
conflicts_with_groups=(
- insn.conflicts_with_groups | other_group_names)))
+ stmt.conflicts_with_groups | other_group_names)))
else:
- new_insns = knl.instructions
+ new_stmts = knl.statements
new_temporary_variables = {}
for tv in six.itervalues(knl.temporary_variables):
@@ -556,7 +556,7 @@ def alias_temporaries(knl, names, base_name_prefix=None,
new_temporary_variables[tv.name] = tv
return knl.copy(
- instructions=new_insns,
+ statements=new_stmts,
temporary_variables=new_temporary_variables)
# }}}
@@ -624,7 +624,7 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
kernel.substitutions, var_name_gen)
smap = RuleAwareSubstitutionMapper(rule_mapping_context,
make_subst_func(subst_dict),
- within=lambda knl, insn, stack: True)
+ within=lambda knl, stmt, stack: True)
kernel = smap.map_kernel(kernel)
@@ -683,7 +683,7 @@ def set_temporary_scope(kernel, temp_var_names, scope):
# {{{ reduction_arg_to_subst_rule
-def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=None):
+def reduction_arg_to_subst_rule(knl, inames, stmt_match=None, subst_rule_name=None):
if isinstance(inames, str):
inames = [s.strip() for s in inames.split(",")]
@@ -731,15 +731,15 @@ def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=No
from loopy.kernel.data import MultiAssignmentBase
- new_insns = []
- for insn in knl.instructions:
- if not isinstance(insn, MultiAssignmentBase):
- new_insns.append(insn)
+ new_stmts = []
+ for stmt in knl.statements:
+ if not isinstance(stmt, MultiAssignmentBase):
+ new_stmts.append(stmt)
else:
- new_insns.append(insn.copy(expression=cb_mapper(insn.expression)))
+ new_stmts.append(stmt.copy(expression=cb_mapper(stmt.expression)))
return knl.copy(
- instructions=new_insns,
+ statements=new_stmts,
substitutions=substs)
# }}}
diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py
index d4dcb3701f4f23a5b1c66b1559bf6c4879425902..e762082b667d0eabad75fe363b79cf190caf84e2 100644
--- a/loopy/transform/diff.py
+++ b/loopy/transform/diff.py
@@ -168,12 +168,12 @@ class DifferentiationContext(object):
self.imported_outputs = set()
self.output_to_diff_output = {}
- self.generate_instruction_id = self.kernel.get_instruction_id_generator()
+ self.generate_statement_id = self.kernel.get_statement_id_generator()
self.new_args = []
self.new_temporary_variables = {}
- self.new_instructions = []
- self.imported_instructions = set()
+ self.new_statements = []
+ self.imported_statements = set()
self.new_domains = []
self.rule_mapping_context = SubstitutionRuleMappingContext(
@@ -189,7 +189,7 @@ class DifferentiationContext(object):
knl = knl.copy(
args=new_args,
temporary_variables=new_temp_vars,
- instructions=self.new_instructions,
+ statements=self.new_statements,
domains=knl.domains + self.new_domains)
del new_args
@@ -226,24 +226,24 @@ class DifferentiationContext(object):
# }}}
- def import_instruction_and_deps(self, insn_id):
- if insn_id in self.imported_instructions:
+ def import_statement_and_deps(self, stmt_id):
+ if stmt_id in self.imported_statements:
return
- insn = self.kernel.id_to_insn[insn_id]
- self.new_instructions.append(insn)
- self.imported_instructions.add(insn_id)
+ stmt = self.kernel.id_to_stmt[stmt_id]
+ self.new_statements.append(stmt)
+ self.imported_statements.add(stmt_id)
id_map = RuleAwareIdentityMapper(self.rule_mapping_context)
- if isinstance(insn, lp.Assignment):
- id_map(insn.expression, self.kernel, insn)
+ if isinstance(stmt, lp.Assignment):
+ id_map(stmt.expression, self.kernel, stmt)
else:
raise RuntimeError("do not know how to deal with "
- "instruction of type %s" % type(insn))
+ "statement of type %s" % type(stmt))
- for dep in insn.depends_on:
- self.import_instruction_and_deps(dep)
+ for dep in stmt.depends_on:
+ self.import_statement_and_deps(dep)
def import_output_var(self, var_name):
writers = self.kernel.writer_map().get(var_name, [])
@@ -255,8 +255,8 @@ class DifferentiationContext(object):
if not writers:
return
- insn_id, = writers
- self.import_instruction_and_deps(insn_id)
+ stmt_id, = writers
+ self.import_statement_and_deps(stmt_id)
def get_diff_var(self, var_name):
"""
@@ -279,7 +279,7 @@ class DifferentiationContext(object):
% var_name)
orig_writer_id, = writers
- orig_writer_insn = self.kernel.id_to_insn[orig_writer_id]
+ orig_writer_stmt = self.kernel.id_to_stmt[orig_writer_id]
diff_inames = self.add_diff_inames()
diff_iname_exprs = tuple(var(diname) for diname in diff_inames)
@@ -289,32 +289,32 @@ class DifferentiationContext(object):
diff_mapper = LoopyDiffMapper(self.rule_mapping_context, self,
diff_inames)
- diff_expr = diff_mapper(orig_writer_insn.expression,
- self.kernel, orig_writer_insn)
+ diff_expr = diff_mapper(orig_writer_stmt.expression,
+ self.kernel, orig_writer_stmt)
if not diff_expr:
return None
- assert isinstance(orig_writer_insn, lp.Assignment)
- if isinstance(orig_writer_insn.assignee, p.Subscript):
- lhs_ind = orig_writer_insn.assignee.index_tuple
- elif isinstance(orig_writer_insn.assignee, p.Variable):
+ assert isinstance(orig_writer_stmt, lp.Assignment)
+ if isinstance(orig_writer_stmt.assignee, p.Subscript):
+ lhs_ind = orig_writer_stmt.assignee.index_tuple
+ elif isinstance(orig_writer_stmt.assignee, p.Variable):
lhs_ind = ()
else:
raise LoopyError(
"Unrecognized LHS type in differentiation: %s"
- % type(orig_writer_insn.assignee).__name__)
+ % type(orig_writer_stmt.assignee).__name__)
- new_insn_id = self.generate_instruction_id()
- insn = lp.Assignment(
- id=new_insn_id,
+ new_stmt_id = self.generate_statement_id()
+ stmt = lp.Assignment(
+ id=new_stmt_id,
assignee=var(new_var_name)[
lhs_ind + diff_iname_exprs],
expression=diff_expr,
within_inames=(
- orig_writer_insn.within_inames | frozenset(diff_inames)))
+ orig_writer_stmt.within_inames | frozenset(diff_inames)))
- self.new_instructions.append(insn)
+ self.new_statements.append(stmt)
# }}}
@@ -383,7 +383,7 @@ def diff_kernel(knl, diff_outputs, by, diff_iname_prefix="diff_i",
var_name_gen = knl.get_var_name_generator()
- # {{{ differentiate instructions
+ # {{{ differentiate statements
diff_context = DifferentiationContext(
knl, var_name_gen, by, diff_iname_prefix=diff_iname_prefix,
diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py
index 77c2d3adecb6db4c0b77c9eb32983c9c04067c43..b9be8dd1830554c0df066e1a2586137e735bb388 100644
--- a/loopy/transform/fusion.py
+++ b/loopy/transform/fusion.py
@@ -209,10 +209,10 @@ def _fuse_two_kernels(knla, knlb):
knlb = _apply_renames_in_exprs(knlb, b_var_renames)
from pymbolic.imperative.transform import \
- fuse_instruction_streams_with_unique_ids
- new_instructions, old_b_id_to_new_b_id = \
- fuse_instruction_streams_with_unique_ids(
- knla.instructions, knlb.instructions)
+ fuse_statement_streams_with_unique_ids
+ new_statements, old_b_id_to_new_b_id = \
+ fuse_statement_streams_with_unique_ids(
+ knla.statements, knlb.statements)
# {{{ fuse assumptions
@@ -238,7 +238,7 @@ def _fuse_two_kernels(knla, knlb):
from loopy.kernel import LoopKernel
return LoopKernel(
domains=new_domains,
- instructions=new_instructions,
+ statements=new_statements,
args=new_args,
name="%s_and_%s" % (knla.name, knlb.name),
preambles=_ordered_merge_lists(knla.preambles, knlb.preambles),
@@ -321,10 +321,10 @@ def fuse_kernels(kernels, suffixes=None, data_flow=None):
succeed.
* Temporaries are automatically renamed to remain uniquely associated
- with each instruction stream.
+ with each statement stream.
- * The resulting kernel will contain all instructions from each entry
- of *kernels*. Clashing instruction IDs will be renamed to ensure
+ * The resulting kernel will contain all statements from each entry
+ of *kernels*. Clashing statement IDs will be renamed to ensure
uniqueness.
.. versionchanged:: 2016.2
@@ -368,44 +368,44 @@ def fuse_kernels(kernels, suffixes=None, data_flow=None):
# }}}
- kernel_insn_ids = []
+ kernel_stmt_ids = []
result = None
for knlb in kernels:
if result is None:
result = knlb
- kernel_insn_ids.append([
- insn.id for insn in knlb.instructions])
+ kernel_stmt_ids.append([
+ stmt.id for stmt in knlb.statements])
else:
result, old_b_id_to_new_b_id = _fuse_two_kernels(
knla=result,
knlb=knlb)
- kernel_insn_ids.append([
- old_b_id_to_new_b_id[insn.id]
- for insn in knlb.instructions])
+ kernel_stmt_ids.append([
+ old_b_id_to_new_b_id[stmt.id]
+ for stmt in knlb.statements])
# {{{ realize data_flow dependencies
- id_to_insn = result.id_to_insn.copy()
+ id_to_stmt = result.id_to_stmt.copy()
for var_name, from_kernel, to_kernel in data_flow:
from_writer_ids = frozenset(
- insn_id
- for insn_id in kernel_insn_ids[from_kernel]
- if var_name in id_to_insn[insn_id].assignee_var_names())
+ stmt_id
+ for stmt_id in kernel_stmt_ids[from_kernel]
+ if var_name in id_to_stmt[stmt_id].assignee_var_names())
- for insn_id in kernel_insn_ids[to_kernel]:
- insn = id_to_insn[insn_id]
- if var_name in insn.read_dependency_names():
- insn = insn.copy(depends_on=insn.depends_on | from_writer_ids)
+ for stmt_id in kernel_stmt_ids[to_kernel]:
+ stmt = id_to_stmt[stmt_id]
+ if var_name in stmt.read_dependency_names():
+ stmt = stmt.copy(depends_on=stmt.depends_on | from_writer_ids)
- id_to_insn[insn_id] = insn
+ id_to_stmt[stmt_id] = stmt
- result = result.copy(instructions=[
- id_to_insn[insn_id]
- for insn_ids in kernel_insn_ids
- for insn_id in insn_ids])
+ result = result.copy(statements=[
+ id_to_stmt[stmt_id]
+ for stmt_ids in kernel_stmt_ids
+ for stmt_id in stmt_ids])
# }}}
diff --git a/loopy/transform/ilp.py b/loopy/transform/ilp.py
index 0ac71d603ebe8b5150fb854dd3978676dd9d98c3..0c86f6dc364ad79ee87399e6d8a8ccca4e25bdd7 100644
--- a/loopy/transform/ilp.py
+++ b/loopy/transform/ilp.py
@@ -77,12 +77,12 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None):
# {{{ find variables that need extra indices
for tv in six.itervalues(kernel.temporary_variables):
- for writer_insn_id in wmap.get(tv.name, []):
- writer_insn = kernel.id_to_insn[writer_insn_id]
+ for writer_stmt_id in wmap.get(tv.name, []):
+ writer_stmt = kernel.id_to_stmt[writer_stmt_id]
if iname is None:
ilp_inames = frozenset(iname
- for iname in kernel.insn_inames(writer_insn)
+ for iname in kernel.stmt_inames(writer_stmt)
if isinstance(
kernel.iname_to_tag.get(iname),
(IlpBaseTag, VectorizeTag)))
@@ -97,7 +97,7 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None):
ilp_inames = frozenset([iname])
referenced_ilp_inames = (ilp_inames
- & writer_insn.write_dependency_names())
+ & writer_stmt.write_dependency_names())
new_ilp_inames = ilp_inames - referenced_ilp_inames
@@ -106,10 +106,10 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None):
if tv.name in var_to_new_ilp_inames:
if new_ilp_inames != set(var_to_new_ilp_inames[tv.name]):
- raise LoopyError("instruction '%s' requires adding "
+ raise LoopyError("statement '%s' requires adding "
"indices for ILP inames '%s' on var '%s', but previous "
- "instructions required inames '%s'"
- % (writer_insn_id, ", ".join(new_ilp_inames),
+ "statements required inames '%s'"
+ % (writer_stmt_id, ", ".join(new_ilp_inames),
", ".join(var_to_new_ilp_inames[tv.name])))
continue
@@ -167,30 +167,30 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None):
(var_name, tuple(var(iname) for iname in inames))
for var_name, inames in six.iteritems(var_to_new_ilp_inames))
- new_insns = []
+ new_stmts = []
- for insn in kernel.instructions:
+ for stmt in kernel.statements:
eiii = ExtraInameIndexInserter(var_to_extra_iname)
- new_insn = insn.with_transformed_expressions(eiii)
- if not eiii.seen_ilp_inames <= insn.within_inames:
+ new_stmt = stmt.with_transformed_expressions(eiii)
+ if not eiii.seen_ilp_inames <= stmt.within_inames:
from loopy.diagnostic import warn_with_kernel
warn_with_kernel(
kernel,
"implicit_ilp_iname",
- "Instruction '%s': touched variable that (for ILP) "
- "required iname(s) '%s', but that the instruction was not "
+ "Statement '%s': touched variable that (for ILP) "
+ "required iname(s) '%s', but that the statement was not "
"previously within the iname(s). Previously, this would "
- "implicitly promote the instruction, but that behavior is "
+ "implicitly promote the statement, but that behavior is "
"deprecated and will stop working in 2018.1."
- % (insn.id, ", ".join(
- eiii.seen_ilp_inames - insn.within_inames)))
+ % (stmt.id, ", ".join(
+ eiii.seen_ilp_inames - stmt.within_inames)))
- new_insns.append(new_insn)
+ new_stmts.append(new_stmt)
return kernel.copy(
temporary_variables=new_temp_vars,
- instructions=new_insns)
+ statements=new_stmts)
# }}}
@@ -198,14 +198,14 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None):
# {{{ realize_ilp
def realize_ilp(kernel, iname):
- """Instruction-level parallelism (as realized by the loopy iname
+ """Statement-level parallelism (as realized by the loopy iname
tag ``"ilp"``) provides the illusion that multiple concurrent
- program instances execute in lockstep within a single instruction
+ program instances execute in lockstep within a single statement
stream.
- To do so, storage that is private to each instruction stream needs to be
+ To do so, storage that is private to each statement stream needs to be
duplicated so that each program instance receives its own copy. Storage
- that is written to in an instruction using an ILP iname but whose left-hand
+ that is written to in an statement using an ILP iname but whose left-hand
side indices do not contain said ILP iname is marked for duplication.
This storage duplication is carried out automatically at code generation
diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 22fd7b3bb2c643bc3c1309f4e3fdb89438ae7d2b..105ba7bd825c945797a47ef7f23c038b9a33f9b4 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -70,7 +70,7 @@ __doc__ = """
.. autofunction:: make_reduction_inames_unique
-.. autofunction:: add_inames_to_insn
+.. autofunction:: add_inames_to_stmt
"""
@@ -137,7 +137,7 @@ class _InameSplitter(RuleAwareIdentityMapper):
and self.split_iname not in expn_state.arg_context
and self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack)):
new_inames = list(expr.inames)
new_inames.remove(self.split_iname)
@@ -155,7 +155,7 @@ class _InameSplitter(RuleAwareIdentityMapper):
and self.split_iname not in expn_state.arg_context
and self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack)):
return self.replacement_index
else:
@@ -246,20 +246,20 @@ def _split_iname_backend(kernel, split_iname,
# {{{ update within_inames
- new_insns = []
- for insn in kernel.instructions:
- if split_iname in insn.within_inames:
+ new_stmts = []
+ for stmt in kernel.statements:
+ if split_iname in stmt.within_inames:
new_within_inames = (
- (insn.within_inames.copy()
+ (stmt.within_inames.copy()
- frozenset([split_iname]))
| frozenset([outer_iname, inner_iname]))
else:
- new_within_inames = insn.within_inames
+ new_within_inames = stmt.within_inames
- insn = insn.copy(
+ stmt = stmt.copy(
within_inames=new_within_inames)
- new_insns.append(insn)
+ new_stmts.append(stmt)
# }}}
@@ -279,7 +279,7 @@ def _split_iname_backend(kernel, split_iname,
kernel = kernel.copy(
domains=new_domains,
iname_slab_increments=iname_slab_increments,
- instructions=new_insns,
+ statements=new_stmts,
applied_iname_rewrites=applied_iname_rewrites,
loop_priority=frozenset(new_priorities))
@@ -458,7 +458,7 @@ class _InameJoiner(RuleAwareSubstitutionMapper):
- set(expn_state.arg_context))
if overlap and self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack):
if overlap != expr_inames:
raise LoopyError(
@@ -563,14 +563,14 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None):
return frozenset(result)
- new_insns = [
- insn.copy(
- within_inames=subst_within_inames(insn.within_inames))
- for insn in kernel.instructions]
+ new_stmts = [
+ stmt.copy(
+ within_inames=subst_within_inames(stmt.within_inames))
+ for stmt in kernel.statements]
kernel = (kernel
.copy(
- instructions=new_insns,
+ statements=new_stmts,
domains=domch.get_domains_with(new_domain),
applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict]
))
@@ -725,7 +725,7 @@ class _InameDuplicator(RuleAwareIdentityMapper):
if (set(expr.inames) & self.old_inames_set
and self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack)):
new_inames = tuple(
self.old_to_new.get(iname, iname)
@@ -747,21 +747,21 @@ class _InameDuplicator(RuleAwareIdentityMapper):
or expr.name in expn_state.arg_context
or not self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack)):
return super(_InameDuplicator, self).map_variable(expr, expn_state)
else:
from pymbolic import var
return var(new_name)
- def map_instruction(self, kernel, insn):
- if not self.within(kernel, insn, ()):
- return insn
+ def map_statement(self, kernel, stmt):
+ if not self.within(kernel, stmt, ()):
+ return stmt
new_fid = frozenset(
self.old_to_new.get(iname, iname)
- for iname in insn.within_inames)
- return insn.copy(within_inames=new_fid)
+ for iname in stmt.within_inames)
+ return stmt.copy(within_inames=new_fid)
def duplicate_inames(knl, inames, within, new_inames=None, suffix=None,
@@ -854,28 +854,28 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None,
# {{{ iname duplication for schedulability
-def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])):
- # Remove common inames of the current insn_deps, as they are not relevant
+def _get_iname_duplication_options(stmt_deps, old_common_inames=frozenset([])):
+ # Remove common inames of the current stmt_deps, as they are not relevant
# for splitting.
- common = frozenset([]).union(*insn_deps).intersection(*insn_deps)
+ common = frozenset([]).union(*stmt_deps).intersection(*stmt_deps)
# If common inames were found, we reduce the problem and go into recursion
if common:
- # Remove the common inames from the instruction dependencies
- insn_deps = (
- frozenset(dep - common for dep in insn_deps)
+ # Remove the common inames from the statement dependencies
+ stmt_deps = (
+ frozenset(dep - common for dep in stmt_deps)
-
frozenset([frozenset([])]))
# Join the common inames with those previously found
common = common.union(old_common_inames)
# Go into recursion
- for option in _get_iname_duplication_options(insn_deps, common):
+ for option in _get_iname_duplication_options(stmt_deps, common):
yield option
# Do not yield anything beyond here!
return
- # Try finding a partitioning of the remaining inames, such that all instructions
+ # Try finding a partitioning of the remaining inames, such that all statements
# use only inames from one of the disjoint sets from the partitioning.
def join_sets_if_not_disjoint(sets):
for s1 in sets:
@@ -888,7 +888,7 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])):
return sets, True
- partitioning = insn_deps
+ partitioning = stmt_deps
stop = False
while not stop:
partitioning, stop = join_sets_if_not_disjoint(partitioning)
@@ -897,7 +897,7 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])):
# subproblems
if len(partitioning) > 1:
for part in partitioning:
- working_set = frozenset(s for s in insn_deps if s.issubset(part))
+ working_set = frozenset(s for s in stmt_deps if s.issubset(part))
for option in _get_iname_duplication_options(working_set,
old_common_inames):
yield option
@@ -907,19 +907,19 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])):
# There are splitting options for all inames
for iname in inames:
- iname_insns = frozenset(
- insn for insn in insn_deps if frozenset([iname]).issubset(insn))
+ iname_stmts = frozenset(
+ stmt for stmt in stmt_deps if frozenset([iname]).issubset(stmt))
import itertools as it
- # For a given iname, the set of instructions containing this iname
+ # For a given iname, the set of statements containing this iname
# is inspected. For each element of the power set without the
# empty and the full set, one duplication option is generated.
- for insns_to_dup in it.chain.from_iterable(
- it.combinations(iname_insns, l)
- for l in range(1, len(iname_insns))):
+ for stmts_to_dup in it.chain.from_iterable(
+ it.combinations(iname_stmts, l)
+ for l in range(1, len(iname_stmts))):
yield (
iname,
- tuple(insn.union(old_common_inames) for insn in insns_to_dup))
+ tuple(stmt.union(old_common_inames) for stmt in stmts_to_dup))
# If partitioning was empty, we have recursed successfully and yield nothing
@@ -946,31 +946,31 @@ def get_iname_duplication_options(knl, use_boostable_into=False):
\"\"\")
In the example, there are four possibilities to resolve the problem:
- * duplicating i in instruction i3
- * duplicating i in instruction i1 and i3
- * duplicating j in instruction i2
- * duplicating i in instruction i2 and i3
+ * duplicating i in statement i3
+ * duplicating i in statement i1 and i3
+ * duplicating j in statement i2
+ * duplicating i in statement i2 and i3
Use :func:`has_schedulable_iname_nesting` to decide, whether an iname needs to be
duplicated in a given kernel.
"""
# First we extract the minimal necessary information from the kernel
if use_boostable_into:
- insn_deps = (
- frozenset(insn.within_inames.union(
- insn.boostable_into if insn.boostable_into is not None
+ stmt_deps = (
+ frozenset(stmt.within_inames.union(
+ stmt.boostable_into if stmt.boostable_into is not None
else frozenset([]))
- for insn in knl.instructions)
+ for stmt in knl.statements)
-
frozenset([frozenset([])]))
else:
- insn_deps = (
- frozenset(insn.within_inames for insn in knl.instructions)
+ stmt_deps = (
+ frozenset(stmt.within_inames for stmt in knl.statements)
-
frozenset([frozenset([])]))
# Get the duplication options as a tuple of iname and a set
- for iname, insns in _get_iname_duplication_options(insn_deps):
+ for iname, stmts in _get_iname_duplication_options(stmt_deps):
# Check whether this iname has a parallel tag and discard it if so
from loopy.kernel.data import ConcurrentTag
if (iname in knl.iname_to_tag
@@ -987,7 +987,7 @@ def get_iname_duplication_options(knl, use_boostable_into=False):
from warnings import warn
from loopy.diagnostic import LoopyWarning
warn("Kernel '%s' required the deprecated 'boostable_into' "
- "instruction attribute in order to be schedulable!" % knl.name,
+ "statement attribute in order to be schedulable!" % knl.name,
LoopyWarning)
# Return to avoid yielding the duplication
@@ -998,10 +998,10 @@ def get_iname_duplication_options(knl, use_boostable_into=False):
# loopy.duplicate_inames
from loopy.match import Id, Or
within = Or(tuple(
- Id(insn.id) for insn in knl.instructions
- if insn.within_inames in insns))
+ Id(stmt.id) for stmt in knl.statements
+ if stmt.within_inames in stmts))
- # Only yield the result if an instruction matched. With
+ # Only yield the result if an statement matched. With
# use_boostable_into=True this is not always true.
if within.children:
@@ -1085,18 +1085,18 @@ def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None):
knl = rule_mapping_context.finish_kernel(
smap.map_kernel(knl))
- new_instructions = []
- for insn in knl.instructions:
- if (old_iname in insn.within_inames
- and within(knl, insn, ())):
- insn = insn.copy(
+ new_statements = []
+ for stmt in knl.statements:
+ if (old_iname in stmt.within_inames
+ and within(knl, stmt, ())):
+ stmt = stmt.copy(
within_inames=(
- (insn.within_inames - frozenset([old_iname]))
+ (stmt.within_inames - frozenset([old_iname]))
| frozenset([new_iname])))
- new_instructions.append(insn)
+ new_statements.append(stmt)
- knl = knl.copy(instructions=new_instructions)
+ knl = knl.copy(statements=new_statements)
else:
knl = duplicate_inames(
@@ -1136,10 +1136,10 @@ def remove_unused_inames(knl, inames=None):
inames = set(inames)
used_inames = set()
- for insn in exp_knl.instructions:
+ for stmt in exp_knl.statements:
used_inames.update(
- exp_knl.insn_inames(insn.id)
- | insn.reduction_inames())
+ exp_knl.stmt_inames(stmt.id)
+ | stmt.reduction_inames())
unused_inames = inames - used_inames
@@ -1184,7 +1184,7 @@ class _ReductionSplitter(RuleAwareIdentityMapper):
if (self.inames <= set(expr.inames)
and self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack)):
leftover_inames = set(expr.inames) - self.inames
@@ -1451,43 +1451,43 @@ def affine_map_inames(kernel, old_inames, new_inames, equations):
# }}}
- # {{{ switch iname refs in instructions
+ # {{{ switch iname refs in statements
- def fix_iname_set(insn_id, inames):
+ def fix_iname_set(stmt_id, inames):
if old_inames_set <= inames:
return (inames - old_inames_set) | new_inames_set
elif old_inames_set & inames:
- raise LoopyError("instruction '%s' uses only a part (%s), not all, "
+ raise LoopyError("statement '%s' uses only a part (%s), not all, "
"of the old inames"
- % (insn_id, ", ".join(old_inames_set & inames)))
+ % (stmt_id, ", ".join(old_inames_set & inames)))
else:
return inames
- new_instructions = [
- insn.copy(within_inames=fix_iname_set(
- insn.id, insn.within_inames))
- for insn in kernel.instructions]
+ new_statements = [
+ stmt.copy(within_inames=fix_iname_set(
+ stmt.id, stmt.within_inames))
+ for stmt in kernel.statements]
# }}}
- return kernel.copy(domains=new_domains, instructions=new_instructions)
+ return kernel.copy(domains=new_domains, statements=new_statements)
# }}}
# {{{ find unused axes
-def find_unused_axis_tag(kernel, kind, insn_match=None):
+def find_unused_axis_tag(kernel, kind, stmt_match=None):
"""For one of the hardware-parallel execution tags, find an unused
axis.
- :arg insn_match: An instruction match as understood by
+ :arg stmt_match: An statement match as understood by
:func:`loopy.match.parse_match`.
:arg kind: may be "l" or "g", or the corresponding tag class name
:returns: an :class:`GroupIndexTag` or :class:`LocalIndexTag`
- that is not being used within the instructions matched by
- *insn_match*.
+ that is not being used within the statements matched by
+ *stmt_match*.
"""
used_axes = set()
@@ -1505,11 +1505,11 @@ def find_unused_axis_tag(kernel, kind, insn_match=None):
raise LoopyError("invlaid tag kind: %s" % kind)
from loopy.match import parse_match
- match = parse_match(insn_match)
- insns = [insn for insn in kernel.instructions if match(kernel, insn)]
+ match = parse_match(stmt_match)
+ stmts = [stmt for stmt in kernel.statements if match(kernel, stmt)]
- for insn in insns:
- for iname in kernel.insn_inames(insn):
+ for stmt in stmts:
+ for iname in kernel.stmt_inames(stmt):
dim_tag = kernel.iname_to_tag.get(iname)
if isinstance(dim_tag, kind):
@@ -1557,7 +1557,7 @@ class _ReductionInameUniquifier(RuleAwareIdentityMapper):
def map_reduction(self, expr, expn_state):
within = self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack)
for iname in expr.inames:
@@ -1645,19 +1645,19 @@ def make_reduction_inames_unique(kernel, inames=None, within=None):
# }}}
-# {{{ add_inames_to_insn
+# {{{ add_inames_to_stmt
-def add_inames_to_insn(knl, inames, insn_match):
+def add_inames_to_stmt(knl, inames, stmt_match):
"""
:arg inames: a frozenset of inames that will be added to the
- instructions matched by *insn_match*, or a comma-separated
+ statements matched by *stmt_match*, or a comma-separated
string that parses to such a tuple.
- :arg insn_match: An instruction match as understood by
+ :arg stmt_match: An statement match as understood by
:func:`loopy.match.parse_match`.
:returns: an :class:`GroupIndexTag` or :class:`LocalIndexTag`
- that is not being used within the instructions matched by
- *insn_match*.
+ that is not being used within the statements matched by
+ *stmt_match*.
.. versionadded:: 2016.3
"""
@@ -1669,18 +1669,18 @@ def add_inames_to_insn(knl, inames, insn_match):
raise TypeError("'inames' must be a frozenset")
from loopy.match import parse_match
- match = parse_match(insn_match)
+ match = parse_match(stmt_match)
- new_instructions = []
+ new_statements = []
- for insn in knl.instructions:
- if match(knl, insn):
- new_instructions.append(
- insn.copy(within_inames=insn.within_inames | inames))
+ for stmt in knl.statements:
+ if match(knl, stmt):
+ new_statements.append(
+ stmt.copy(within_inames=stmt.within_inames | inames))
else:
- new_instructions.append(insn)
+ new_statements.append(stmt)
- return knl.copy(instructions=new_instructions)
+ return knl.copy(statements=new_statements)
# }}}
diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
deleted file mode 100644
index 37c5d85a1ade5c8f7fadb2c6a785cf7cea3dde40..0000000000000000000000000000000000000000
--- a/loopy/transform/instruction.py
+++ /dev/null
@@ -1,339 +0,0 @@
-from __future__ import division, absolute_import
-
-__copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
-
-__license__ = """
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-"""
-
-import six # noqa
-
-from loopy.diagnostic import LoopyError
-
-
-# {{{ find_instructions
-
-def find_instructions(kernel, insn_match):
- from loopy.match import parse_match
- match = parse_match(insn_match)
- return [insn for insn in kernel.instructions if match(kernel, insn)]
-
-# }}}
-
-
-# {{{ map_instructions
-
-def map_instructions(kernel, insn_match, f):
- from loopy.match import parse_match
- match = parse_match(insn_match)
-
- new_insns = []
-
- for insn in kernel.instructions:
- if match(kernel, insn):
- new_insns.append(f(insn))
- else:
- new_insns.append(insn)
-
- return kernel.copy(instructions=new_insns)
-
-# }}}
-
-
-# {{{ set_instruction_priority
-
-def set_instruction_priority(kernel, insn_match, priority):
- """Set the priority of instructions matching *insn_match* to *priority*.
-
- *insn_match* may be any instruction id match understood by
- :func:`loopy.match.parse_match`.
- """
-
- def set_prio(insn):
- return insn.copy(priority=priority)
-
- return map_instructions(kernel, insn_match, set_prio)
-
-# }}}
-
-
-# {{{ add_dependency
-
-def add_dependency(kernel, insn_match, depends_on):
- """Add the instruction dependency *dependency* to the instructions matched
- by *insn_match*.
-
- *insn_match* and *depends_on* may be any instruction id match understood by
- :func:`loopy.match.parse_match`.
-
- .. versionchanged:: 2016.3
-
- Third argument renamed to *depends_on* for clarity, allowed to
- be not just ID but also match expression.
- """
-
- if isinstance(depends_on, str) and depends_on in kernel.id_to_insn:
- added_deps = frozenset([depends_on])
- else:
- added_deps = frozenset(
- dep.id for dep in find_instructions(kernel, depends_on))
-
- if not added_deps:
- raise LoopyError("no instructions found matching '%s' "
- "(to add as dependencies)" % depends_on)
-
- matched = [False]
-
- def add_dep(insn):
- new_deps = insn.depends_on
- matched[0] = True
- if new_deps is None:
- new_deps = added_deps
- else:
- new_deps = new_deps | added_deps
-
- return insn.copy(depends_on=new_deps)
-
- result = map_instructions(kernel, insn_match, add_dep)
-
- if not matched[0]:
- raise LoopyError("no instructions found matching '%s' "
- "(to which dependencies would be added)" % insn_match)
-
- return result
-
-# }}}
-
-
-# {{{ remove_instructions
-
-def remove_instructions(kernel, insn_ids):
- """Return a new kernel with instructions in *insn_ids* removed.
-
- Dependencies across (one, for now) deleted isntructions are propagated.
- Behavior is undefined for now for chains of dependencies within the
- set of deleted instructions.
-
- This also updates *no_sync_with* for all instructions.
- """
-
- if not insn_ids:
- return kernel
-
- assert isinstance(insn_ids, set)
- id_to_insn = kernel.id_to_insn
-
- new_insns = []
- for insn in kernel.instructions:
- if insn.id in insn_ids:
- continue
-
- # transitively propagate dependencies
- # (only one level for now)
- if insn.depends_on is None:
- depends_on = frozenset()
- else:
- depends_on = insn.depends_on
-
- new_deps = depends_on - insn_ids
-
- for dep_id in depends_on & insn_ids:
- new_deps = new_deps | id_to_insn[dep_id].depends_on
-
- # update no_sync_with
-
- new_no_sync_with = frozenset((insn_id, scope)
- for insn_id, scope in insn.no_sync_with
- if insn_id not in insn_ids)
-
- new_insns.append(
- insn.copy(depends_on=new_deps, no_sync_with=new_no_sync_with))
-
- return kernel.copy(
- instructions=new_insns)
-
-# }}}
-
-
-# {{{ replace_instruction_ids
-
-def replace_instruction_ids(kernel, replacements):
- new_insns = []
-
- for insn in kernel.instructions:
- changed = False
- new_depends_on = []
- new_no_sync_with = []
-
- for dep in insn.depends_on:
- if dep in replacements:
- new_depends_on.extend(replacements[dep])
- changed = True
- else:
- new_depends_on.append(dep)
-
- for insn_id, scope in insn.no_sync_with:
- if insn_id in replacements:
- new_no_sync_with.extend(
- (repl, scope) for repl in replacements[insn_id])
- changed = True
- else:
- new_no_sync_with.append((insn_id, scope))
-
- new_insns.append(
- insn.copy(
- depends_on=frozenset(new_depends_on),
- no_sync_with=frozenset(new_no_sync_with))
- if changed else insn)
-
- return kernel.copy(instructions=new_insns)
-
-# }}}
-
-
-# {{{ tag_instructions
-
-def tag_instructions(kernel, new_tag, within=None):
- from loopy.match import parse_match
- within = parse_match(within)
-
- new_insns = []
- for insn in kernel.instructions:
- if within(kernel, insn):
- new_insns.append(
- insn.copy(tags=insn.tags | frozenset([new_tag])))
- else:
- new_insns.append(insn)
-
- return kernel.copy(instructions=new_insns)
-
-# }}}
-
-
-# {{{ add nosync
-
-def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False):
- """Add a *no_sync_with* directive between *source* and *sink*.
- *no_sync_with* is only added if *sink* depends on *source* or
- if the instruction pair is in a conflicting group.
-
- This function does not check for the presence of a memory dependency.
-
- :arg kernel: The kernel
- :arg source: Either a single instruction id, or any instruction id
- match understood by :func:`loopy.match.parse_match`.
- :arg sink: Either a single instruction id, or any instruction id
- match understood by :func:`loopy.match.parse_match`.
- :arg scope: A valid *no_sync_with* scope. See
- :attr:`loopy.InstructionBase.no_sync_with` for allowable scopes.
- :arg bidirectional: A :class:`bool`. If *True*, add a *no_sync_with*
- to both the source and sink instructions, otherwise the directive
- is only added to the sink instructions.
- :arg force: A :class:`bool`. If *True*, add a *no_sync_with* directive
- even without the presence of a dependency edge or conflicting
- instruction group.
-
- :return: The updated kernel
- """
-
- if isinstance(source, str) and source in kernel.id_to_insn:
- sources = frozenset([source])
- else:
- sources = frozenset(
- source.id for source in find_instructions(kernel, source))
-
- if isinstance(sink, str) and sink in kernel.id_to_insn:
- sinks = frozenset([sink])
- else:
- sinks = frozenset(
- sink.id for sink in find_instructions(kernel, sink))
-
- def insns_in_conflicting_groups(insn1_id, insn2_id):
- insn1 = kernel.id_to_insn[insn1_id]
- insn2 = kernel.id_to_insn[insn2_id]
- return (
- bool(insn1.groups & insn2.conflicts_with_groups)
- or
- bool(insn2.groups & insn1.conflicts_with_groups))
-
- from collections import defaultdict
- nosync_to_add = defaultdict(set)
-
- for sink in sinks:
- for source in sources:
-
- needs_nosync = force or (
- source in kernel.recursive_insn_dep_map()[sink]
- or insns_in_conflicting_groups(source, sink))
-
- if not needs_nosync:
- continue
-
- nosync_to_add[sink].add((source, scope))
- if bidirectional:
- nosync_to_add[source].add((sink, scope))
-
- new_instructions = list(kernel.instructions)
-
- for i, insn in enumerate(new_instructions):
- if insn.id in nosync_to_add:
- new_instructions[i] = insn.copy(no_sync_with=insn.no_sync_with
- | frozenset(nosync_to_add[insn.id]))
-
- return kernel.copy(instructions=new_instructions)
-
-# }}}
-
-
-# {{{ uniquify_instruction_ids
-
-def uniquify_instruction_ids(kernel):
- """Converts any ids that are :class:`loopy.UniqueName` or *None* into unique
- strings.
-
- This function does *not* deduplicate existing instruction ids.
- """
-
- from loopy.kernel.creation import UniqueName
-
- insn_ids = set(
- insn.id for insn in kernel.instructions
- if insn.id is not None and not isinstance(insn.id, UniqueName))
-
- from pytools import UniqueNameGenerator
- insn_id_gen = UniqueNameGenerator(insn_ids)
-
- new_instructions = []
-
- for insn in kernel.instructions:
- if insn.id is None:
- new_instructions.append(
- insn.copy(id=insn_id_gen("insn")))
- elif isinstance(insn.id, UniqueName):
- new_instructions.append(
- insn.copy(id=insn_id_gen(insn.id.name)))
- else:
- new_instructions.append(insn)
-
- return kernel.copy(instructions=new_instructions)
-
-# }}}
-
-
-# vim: foldmethod=marker
diff --git a/loopy/transform/padding.py b/loopy/transform/padding.py
index d695e359558894c201f67e4013b25f5f45c19d82..a22b1db93955e3ea15072c1f2bfff3ed75f8aba4 100644
--- a/loopy/transform/padding.py
+++ b/loopy/transform/padding.py
@@ -83,7 +83,7 @@ def split_array_dim(kernel, arrays_and_axes, count, auto_split_inames=True,
elif len(rest) == 2:
return rest
else:
- raise RuntimeError("split instruction '%s' not understood" % rest)
+ raise RuntimeError("split statement '%s' not understood" % rest)
if isinstance(arrays_and_axes, tuple):
arrays_and_axes = [arrays_and_axes]
diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py
index 6077332c4fc4322ac7ffb02ade4a0e24c7066245..85fc34840aee0851f1f37186c4208ed186acd215 100644
--- a/loopy/transform/precompute.py
+++ b/loopy/transform/precompute.py
@@ -84,7 +84,7 @@ class RuleInvocationGatherer(RuleAwareIdentityMapper):
process_me = process_me and self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack)
if not process_me:
@@ -136,7 +136,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper):
access_descriptors, array_base_map,
storage_axis_names, storage_axis_sources,
non1_storage_axis_names,
- temporary_name, compute_insn_id, compute_dep_id,
+ temporary_name, compute_stmt_id, compute_dep_id,
compute_read_variables):
super(RuleInvocationReplacer, self).__init__(rule_mapping_context)
@@ -152,18 +152,18 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper):
self.non1_storage_axis_names = non1_storage_axis_names
self.temporary_name = temporary_name
- self.compute_insn_id = compute_insn_id
+ self.compute_stmt_id = compute_stmt_id
self.compute_dep_id = compute_dep_id
self.compute_read_variables = compute_read_variables
- self.compute_insn_depends_on = set()
+ self.compute_stmt_depends_on = set()
def map_substitution(self, name, tag, arguments, expn_state):
if not (
name == self.subst_name
and self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack)
and (self.subst_tag is None or self.subst_tag == tag)):
return super(RuleInvocationReplacer, self).map_substitution(
@@ -222,34 +222,34 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper):
return new_outer_expr
def map_kernel(self, kernel):
- new_insns = []
+ new_stmts = []
- excluded_insn_ids = set([self.compute_insn_id, self.compute_dep_id])
+ excluded_stmt_ids = set([self.compute_stmt_id, self.compute_dep_id])
- for insn in kernel.instructions:
+ for stmt in kernel.statements:
self.replaced_something = False
- insn = insn.with_transformed_expressions(self, kernel, insn)
+ stmt = stmt.with_transformed_expressions(self, kernel, stmt)
if self.replaced_something:
- insn = insn.copy(
+ stmt = stmt.copy(
depends_on=(
- insn.depends_on
+ stmt.depends_on
| frozenset([self.compute_dep_id])))
- for dep in insn.depends_on:
- if dep in excluded_insn_ids:
+ for dep in stmt.depends_on:
+ if dep in excluded_stmt_ids:
continue
- dep_insn = kernel.id_to_insn[dep]
- if (frozenset(dep_insn.assignee_var_names())
+ dep_stmt = kernel.id_to_stmt[dep]
+ if (frozenset(dep_stmt.assignee_var_names())
& self.compute_read_variables):
- self.compute_insn_depends_on.update(
- insn.depends_on - excluded_insn_ids)
+ self.compute_stmt_depends_on.update(
+ stmt.depends_on - excluded_stmt_ids)
- new_insns.append(insn)
+ new_stmts.append(stmt)
- return kernel.copy(instructions=new_insns)
+ return kernel.copy(statements=new_stmts)
# }}}
@@ -260,7 +260,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
storage_axis_to_tag={}, default_tag="l.auto", dtype=None,
fetch_bounding_box=False,
temporary_scope=None, temporary_is_local=None,
- compute_insn_id=None):
+ compute_stmt_id=None):
"""Precompute the expression described in the substitution rule determined by
*subst_use* and store it in a temporary array. A precomputation needs two
things to operate, a list of *sweep_inames* (order irrelevant) and an
@@ -325,10 +325,10 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
May also equivalently be a comma-separated string.
:arg precompute_outer_inames: A :class:`frozenset` of inames within which
- the compute instruction is nested. If *None*, make an educated guess.
+ the compute statement is nested. If *None*, make an educated guess.
May also be specified as a comma-separated string.
- :arg compute_insn_id: The ID of the instruction generated to perform the
+ :arg compute_stmt_id: The ID of the statement generated to perform the
precomputation.
If `storage_axes` is not specified, it defaults to the arrangement
@@ -473,11 +473,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
del rule_mapping_context
import loopy as lp
- for insn in kernel.instructions:
- if isinstance(insn, lp.MultiAssignmentBase):
- for assignee in insn.assignees:
- invg(assignee, kernel, insn)
- invg(insn.expression, kernel, insn)
+ for stmt in kernel.statements:
+ if isinstance(stmt, lp.MultiAssignmentBase):
+ for assignee in stmt.assignees:
+ invg(assignee, kernel, stmt)
+ invg(stmt.expression, kernel, stmt)
access_descriptors = invg.access_descriptors
if not access_descriptors:
@@ -754,7 +754,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
kernel = kernel.copy(domains=new_kernel_domains)
- # {{{ set up compute insn
+ # {{{ set up compute stmt
if temporary_name is None:
temporary_name = var_name_gen(based_on=c_subst_name)
@@ -765,7 +765,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
assignee = assignee[
tuple(var(iname) for iname in non1_storage_axis_names)]
- # {{{ process substitutions on compute instruction
+ # {{{ process substitutions on compute statement
storage_axis_subst_dict = {}
@@ -792,29 +792,29 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
# }}}
from loopy.kernel.data import Assignment
- if compute_insn_id is None:
- compute_insn_id = kernel.make_unique_instruction_id(based_on=c_subst_name)
+ if compute_stmt_id is None:
+ compute_stmt_id = kernel.make_unique_statement_id(based_on=c_subst_name)
- compute_insn = Assignment(
- id=compute_insn_id,
+ compute_stmt = Assignment(
+ id=compute_stmt_id,
assignee=assignee,
expression=compute_expression,
# within_inames determined below
)
- compute_dep_id = compute_insn_id
- added_compute_insns = [compute_insn]
+ compute_dep_id = compute_stmt_id
+ added_compute_stmts = [compute_stmt]
if temporary_scope == temp_var_scope.GLOBAL:
- barrier_insn_id = kernel.make_unique_instruction_id(
+ barrier_stmt_id = kernel.make_unique_statement_id(
based_on=c_subst_name+"_barrier")
- from loopy.kernel.instruction import BarrierInstruction
- barrier_insn = BarrierInstruction(
- id=barrier_insn_id,
- depends_on=frozenset([compute_insn_id]),
+ from loopy.kernel.statement import BarrierStatement
+ barrier_stmt = BarrierStatement(
+ id=barrier_stmt_id,
+ depends_on=frozenset([compute_stmt_id]),
kind="global")
- compute_dep_id = barrier_insn_id
+ compute_dep_id = barrier_stmt_id
- added_compute_insns.append(barrier_insn)
+ added_compute_stmts.append(barrier_stmt)
# }}}
@@ -828,58 +828,58 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
access_descriptors, abm,
storage_axis_names, storage_axis_sources,
non1_storage_axis_names,
- temporary_name, compute_insn_id, compute_dep_id,
+ temporary_name, compute_stmt_id, compute_dep_id,
compute_read_variables=get_dependencies(expander(compute_expression)))
kernel = invr.map_kernel(kernel)
kernel = kernel.copy(
- instructions=added_compute_insns + kernel.instructions)
+ statements=added_compute_stmts + kernel.statements)
kernel = rule_mapping_context.finish_kernel(kernel)
# }}}
- # {{{ add dependencies to compute insn
+ # {{{ add dependencies to compute stmt
kernel = kernel.copy(
- instructions=[
- insn.copy(depends_on=frozenset(invr.compute_insn_depends_on))
- if insn.id == compute_insn_id
- else insn
- for insn in kernel.instructions])
+ statements=[
+ stmt.copy(depends_on=frozenset(invr.compute_stmt_depends_on))
+ if stmt.id == compute_stmt_id
+ else stmt
+ for stmt in kernel.statements])
# }}}
- # {{{ propagate storage iname subst to dependencies of compute instructions
+ # {{{ propagate storage iname subst to dependencies of compute statements
from loopy.kernel.tools import find_recursive_dependencies
compute_deps = find_recursive_dependencies(
- kernel, frozenset([compute_insn_id]))
+ kernel, frozenset([compute_stmt_id]))
# FIXME: Need to verify that there are no outside dependencies
# on compute_deps
prior_storage_axis_names = frozenset(storage_axis_subst_dict)
- new_insns = []
- for insn in kernel.instructions:
- if (insn.id in compute_deps
- and insn.within_inames & prior_storage_axis_names):
- insn = (insn
+ new_stmts = []
+ for stmt in kernel.statements:
+ if (stmt.id in compute_deps
+ and stmt.within_inames & prior_storage_axis_names):
+ stmt = (stmt
.with_transformed_expressions(
- lambda expr: expr_subst_map(expr, kernel, insn))
+ lambda expr: expr_subst_map(expr, kernel, stmt))
.copy(within_inames=frozenset(
storage_axis_subst_dict.get(iname, var(iname)).name
- for iname in insn.within_inames)))
+ for iname in stmt.within_inames)))
- new_insns.append(insn)
+ new_stmts.append(stmt)
else:
- new_insns.append(insn)
+ new_stmts.append(stmt)
- kernel = kernel.copy(instructions=new_insns)
+ kernel = kernel.copy(statements=new_stmts)
# }}}
- # {{{ determine inames for compute insn
+ # {{{ determine inames for compute stmt
if precompute_outer_inames is None:
from loopy.kernel.tools import guess_iname_deps_based_on_var_use
@@ -888,7 +888,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
| frozenset(
(expanding_usage_arg_deps | value_inames)
- sweep_inames_set)
- | guess_iname_deps_based_on_var_use(kernel, compute_insn))
+ | guess_iname_deps_based_on_var_use(kernel, compute_stmt))
else:
if not isinstance(precompute_outer_inames, frozenset):
raise TypeError("precompute_outer_inames must be a frozenset")
@@ -897,11 +897,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
| frozenset(non1_storage_axis_names)
kernel = kernel.copy(
- instructions=[
- insn.copy(within_inames=precompute_outer_inames)
- if insn.id == compute_insn_id
- else insn
- for insn in kernel.instructions])
+ statements=[
+ stmt.copy(within_inames=precompute_outer_inames)
+ if stmt.id == compute_stmt_id
+ else stmt
+ for stmt in kernel.statements])
# }}}
diff --git a/loopy/transform/save.py b/loopy/transform/save.py
index 2ba2338b0af541274cc0362c9f71cec9c2887ffc..39824fbd4dd38d40955257bd5fb909774f1ef221 100644
--- a/loopy/transform/save.py
+++ b/loopy/transform/save.py
@@ -30,7 +30,7 @@ import six
from loopy.kernel.data import auto, temp_var_scope
from pytools import memoize_method, Record
from loopy.schedule import (
- EnterLoop, LeaveLoop, RunInstruction,
+ EnterLoop, LeaveLoop, RunStatement,
CallKernel, ReturnFromKernel, Barrier)
from loopy.schedule.tools import get_block_boundaries
@@ -51,12 +51,12 @@ __doc__ = """
class LivenessResult(dict):
- class InstructionResult(Record):
+ class StatementResult(Record):
__slots__ = ["live_in", "live_out"]
@classmethod
def make_empty(cls, nscheditems):
- return cls((idx, cls.InstructionResult(live_in=set(), live_out=set()))
+ return cls((idx, cls.StatementResult(live_in=set(), live_out=set()))
for idx in range(nscheditems))
@@ -83,7 +83,7 @@ class LivenessAnalysis(object):
# Account for empty loop
loop_end = block_bounds[sched_idx + 1]
after = successors[loop_end] | set([sched_idx + 1])
- elif isinstance(next_item, (LeaveLoop, RunInstruction,
+ elif isinstance(next_item, (LeaveLoop, RunStatement,
CallKernel, ReturnFromKernel, Barrier)):
after = set([sched_idx + 1])
else:
@@ -95,7 +95,7 @@ class LivenessAnalysis(object):
# Account for loop
loop_begin = block_bounds[sched_idx]
after |= set([loop_begin])
- elif not isinstance(item, (EnterLoop, RunInstruction,
+ elif not isinstance(item, (EnterLoop, RunStatement,
CallKernel, ReturnFromKernel, Barrier)):
raise LoopyError("unexpected type of schedule item: {ty}"
.format(ty=type(item).__name__))
@@ -109,13 +109,13 @@ class LivenessAnalysis(object):
kill = dict((idx, set()) for idx in range(len(self.schedule)))
for sched_idx, sched_item in enumerate(self.schedule):
- if not isinstance(sched_item, RunInstruction):
+ if not isinstance(sched_item, RunStatement):
continue
- insn = self.kernel.id_to_insn[sched_item.insn_id]
- for var in insn.assignee_var_names():
+ stmt = self.kernel.id_to_stmt[sched_item.stmt_id]
+ for var in stmt.assignee_var_names():
if var not in self.kernel.temporary_variables:
continue
- if not insn.predicates:
+ if not stmt.predicates:
# Fully kills the liveness only when unconditional.
kill[sched_idx].add(var)
if len(self.kernel.temporary_variables[var].shape) > 0:
@@ -127,7 +127,7 @@ class LivenessAnalysis(object):
# or a full write. Instead, we analyze the access
# footprint later on to determine how much to reload/save.
gen[sched_idx].add(var)
- for var in insn.read_dependency_names():
+ for var in stmt.read_dependency_names():
if var not in self.kernel.temporary_variables:
continue
gen[sched_idx].add(var)
@@ -174,10 +174,10 @@ class LivenessAnalysis(object):
def __getitem__(self, sched_idx):
"""
- :arg insn: An instruction name or instance of
- :class:`loopy.instruction.InstructionBase`
+ :arg stmt: An statement name or instance of
+ :class:`loopy.statement.StatementBase`
- :returns: A :class:`LivenessResult` associated with `insn`
+ :returns: A :class:`LivenessResult` associated with `stmt`
"""
return self.liveness()[sched_idx]
@@ -238,20 +238,20 @@ class TemporarySaver(object):
def __init__(self, kernel):
self.kernel = kernel
self.var_name_gen = kernel.get_var_name_generator()
- self.insn_name_gen = kernel.get_instruction_id_generator()
+ self.stmt_name_gen = kernel.get_statement_id_generator()
# These fields keep track of updates to the kernel.
- self.insns_to_insert = []
- self.insns_to_update = {}
+ self.stmts_to_insert = []
+ self.stmts_to_update = {}
self.extra_args_to_add = {}
self.updated_iname_to_tag = {}
self.updated_temporary_variables = {}
- # temporary name -> save or reload insn ids
+ # temporary name -> save or reload stmt ids
from collections import defaultdict
self.temporary_to_save_ids = defaultdict(set)
self.temporary_to_reload_ids = defaultdict(set)
- self.subkernel_to_newly_added_insn_ids = defaultdict(set)
+ self.subkernel_to_newly_added_stmt_ids = defaultdict(set)
# Maps names of base_storage to the name of the temporary
# representative chosen for saves/reloads
@@ -268,9 +268,9 @@ class TemporarySaver(object):
arg.name for arg in kernel.args
if isinstance(arg, ValueArg)))))
- def find_accessing_instructions_in_subkernel(self, temporary, subkernel):
- # Find all accessing instructions in the subkernel. If base_storage is
- # present, this includes instructions that access aliasing memory.
+ def find_accessing_statements_in_subkernel(self, temporary, subkernel):
+ # Find all accessing statements in the subkernel. If base_storage is
+ # present, this includes statements that access aliasing memory.
aliasing_names = set([temporary])
base_storage = self.kernel.temporary_variables[temporary].base_storage
@@ -278,24 +278,24 @@ class TemporarySaver(object):
if base_storage is not None:
aliasing_names |= self.base_storage_to_temporary_map[base_storage]
- from loopy.kernel.tools import get_subkernel_to_insn_id_map
- accessing_insns_in_subkernel = set()
- subkernel_insns = get_subkernel_to_insn_id_map(self.kernel)[subkernel]
+ from loopy.kernel.tools import get_subkernel_to_stmt_id_map
+ accessing_stmts_in_subkernel = set()
+ subkernel_stmts = get_subkernel_to_stmt_id_map(self.kernel)[subkernel]
for name in aliasing_names:
try:
- accessing_insns_in_subkernel |= (
- self.kernel.reader_map()[name] & subkernel_insns)
+ accessing_stmts_in_subkernel |= (
+ self.kernel.reader_map()[name] & subkernel_stmts)
except KeyError:
pass
try:
- accessing_insns_in_subkernel |= (
- self.kernel.writer_map()[name] & subkernel_insns)
+ accessing_stmts_in_subkernel |= (
+ self.kernel.writer_map()[name] & subkernel_stmts)
except KeyError:
pass
- return frozenset(accessing_insns_in_subkernel)
+ return frozenset(accessing_stmts_in_subkernel)
@property
@memoize_method
@@ -356,14 +356,14 @@ class TemporarySaver(object):
try:
pre_barrier = next(item for item in
self.kernel.schedule[subkernel_start::-1]
- if is_global_barrier(item)).originating_insn_id
+ if is_global_barrier(item)).originating_stmt_id
except StopIteration:
pre_barrier = None
try:
post_barrier = next(item for item in
self.kernel.schedule[subkernel_end:]
- if is_global_barrier(item)).originating_insn_id
+ if is_global_barrier(item)).originating_stmt_id
except StopIteration:
post_barrier = None
@@ -379,7 +379,7 @@ class TemporarySaver(object):
In the case of local temporaries, inames that are tagged
hw-local do not contribute to the global storage shape.
"""
- accessor_insn_ids = frozenset(
+ accessor_stmt_ids = frozenset(
self.kernel.reader_map()[temporary.name]
| self.kernel.writer_map()[temporary.name])
@@ -389,13 +389,13 @@ class TemporarySaver(object):
def _sortedtags(tags):
return sorted(tags, key=lambda tag: tag.axis)
- for insn_id in accessor_insn_ids:
- insn = self.kernel.id_to_insn[insn_id]
+ for stmt_id in accessor_stmt_ids:
+ stmt = self.kernel.id_to_stmt[stmt_id]
my_group_tags = []
my_local_tags = []
- for iname in insn.within_inames:
+ for iname in stmt.within_inames:
tag = self.kernel.iname_to_tag.get(iname)
if tag is None:
@@ -418,25 +418,25 @@ class TemporarySaver(object):
if group_tags is None:
group_tags = _sortedtags(my_group_tags)
local_tags = _sortedtags(my_local_tags)
- group_tags_originating_insn_id = insn_id
+ group_tags_originating_stmt_id = stmt_id
if (
group_tags != _sortedtags(my_group_tags)
or local_tags != _sortedtags(my_local_tags)):
raise LoopyError(
- "inconsistent parallel tags across instructions that access "
- "'%s' (specifically, instruction '%s' has tags '%s' but "
- "instruction '%s' has tags '%s')"
+ "inconsistent parallel tags across statements that access "
+ "'%s' (specifically, statement '%s' has tags '%s' but "
+ "statement '%s' has tags '%s')"
% (temporary.name,
- group_tags_originating_insn_id, group_tags + local_tags,
- insn_id, my_group_tags + my_local_tags))
+ group_tags_originating_stmt_id, group_tags + local_tags,
+ stmt_id, my_group_tags + my_local_tags))
if group_tags is None:
assert local_tags is None
return (), ()
group_sizes, local_sizes = (
- self.kernel.get_grid_sizes_for_insn_ids_as_exprs(accessor_insn_ids))
+ self.kernel.get_grid_sizes_for_stmt_ids_as_exprs(accessor_stmt_ids))
if temporary.scope == lp.temp_var_scope.LOCAL:
# Elide local axes in the save slot for local temporaries.
@@ -506,7 +506,7 @@ class TemporarySaver(object):
self.new_subdomain = new_subdomain
- save_or_load_insn_id = self.insn_name_gen(
+ save_or_load_stmt_id = self.stmt_name_gen(
"{name}.{mode}".format(name=temporary, mode=mode))
def add_subscript_if_subscript_nonempty(agg, subscript=()):
@@ -532,15 +532,15 @@ class TemporarySaver(object):
if mode == "save":
args = reversed(args)
- accessing_insns_in_subkernel = self.find_accessing_instructions_in_subkernel(
+ accessing_stmts_in_subkernel = self.find_accessing_statements_in_subkernel(
temporary, subkernel)
if mode == "save":
- depends_on = accessing_insns_in_subkernel
+ depends_on = accessing_stmts_in_subkernel
update_deps = frozenset()
elif mode == "reload":
depends_on = frozenset()
- update_deps = accessing_insns_in_subkernel
+ update_deps = accessing_stmts_in_subkernel
pre_barrier, post_barrier = self.get_enclosing_global_barrier_pair(subkernel)
@@ -550,11 +550,11 @@ class TemporarySaver(object):
if post_barrier is not None:
update_deps |= set([post_barrier])
- # Create the load / store instruction.
+ # Create the load / store statement.
from loopy.kernel.data import Assignment
- save_or_load_insn = Assignment(
+ save_or_load_stmt = Assignment(
*args,
- id=save_or_load_insn_id,
+ id=save_or_load_stmt_id,
within_inames=(
self.subkernel_to_surrounding_inames[subkernel]
| frozenset(hw_inames + dim_inames)),
@@ -564,18 +564,18 @@ class TemporarySaver(object):
boostable_into=frozenset())
if mode == "save":
- self.temporary_to_save_ids[temporary].add(save_or_load_insn_id)
+ self.temporary_to_save_ids[temporary].add(save_or_load_stmt_id)
else:
- self.temporary_to_reload_ids[temporary].add(save_or_load_insn_id)
+ self.temporary_to_reload_ids[temporary].add(save_or_load_stmt_id)
- self.subkernel_to_newly_added_insn_ids[subkernel].add(save_or_load_insn_id)
+ self.subkernel_to_newly_added_stmt_ids[subkernel].add(save_or_load_stmt_id)
- self.insns_to_insert.append(save_or_load_insn)
+ self.stmts_to_insert.append(save_or_load_stmt)
- for insn_id in update_deps:
- insn = self.insns_to_update.get(insn_id, self.kernel.id_to_insn[insn_id])
- self.insns_to_update[insn_id] = insn.copy(
- depends_on=insn.depends_on | frozenset([save_or_load_insn_id]))
+ for stmt_id in update_deps:
+ stmt = self.stmts_to_update.get(stmt_id, self.kernel.id_to_stmt[stmt_id])
+ self.stmts_to_update[stmt_id] = stmt.copy(
+ depends_on=stmt.depends_on | frozenset([save_or_load_stmt_id]))
self.updated_temporary_variables[promoted_temporary.name] = (
promoted_temporary.as_kernel_temporary(self.kernel))
@@ -584,17 +584,17 @@ class TemporarySaver(object):
@memoize_method
def finish(self):
- new_instructions = []
+ new_statements = []
- insns_to_insert = dict((insn.id, insn) for insn in self.insns_to_insert)
+ stmts_to_insert = dict((stmt.id, stmt) for stmt in self.stmts_to_insert)
- for orig_insn in self.kernel.instructions:
- if orig_insn.id in self.insns_to_update:
- new_instructions.append(self.insns_to_update[orig_insn.id])
+ for orig_stmt in self.kernel.statements:
+ if orig_stmt.id in self.stmts_to_update:
+ new_statements.append(self.stmts_to_update[orig_stmt.id])
else:
- new_instructions.append(orig_insn)
- new_instructions.extend(
- sorted(insns_to_insert.values(), key=lambda insn: insn.id))
+ new_statements.append(orig_stmt)
+ new_statements.extend(
+ sorted(stmts_to_insert.values(), key=lambda stmt: stmt.id))
self.updated_iname_to_tag.update(self.kernel.iname_to_tag)
self.updated_temporary_variables.update(self.kernel.temporary_variables)
@@ -606,22 +606,22 @@ class TemporarySaver(object):
kernel = self.kernel.copy(
domains=new_domains,
- instructions=new_instructions,
+ statements=new_statements,
iname_to_tag=self.updated_iname_to_tag,
temporary_variables=self.updated_temporary_variables,
- overridden_get_grid_sizes_for_insn_ids=None)
+ overridden_get_grid_sizes_for_stmt_ids=None)
# Add nosync directives to any saves or reloads that were added with a
# potential dependency chain.
from loopy.kernel.tools import get_subkernels
for subkernel in get_subkernels(kernel):
- relevant_insns = self.subkernel_to_newly_added_insn_ids[subkernel]
+ relevant_stmts = self.subkernel_to_newly_added_stmt_ids[subkernel]
from itertools import product
for temporary in self.temporary_to_reload_ids:
for source, sink in product(
- relevant_insns & self.temporary_to_reload_ids[temporary],
- relevant_insns & self.temporary_to_save_ids[temporary]):
+ relevant_stmts & self.temporary_to_reload_ids[temporary],
+ relevant_stmts & self.temporary_to_save_ids[temporary]):
kernel = lp.add_nosync(kernel, "global", source, sink)
from loopy.kernel.tools import assign_automatic_axes
@@ -662,7 +662,7 @@ class TemporarySaver(object):
+ len(promoted_temporary.hw_dims))
for dim_idx, dim_size in enumerate(promoted_temporary.non_hw_dims):
- new_iname = self.insn_name_gen("{name}_{mode}_axis_{dim}_{sk}".
+ new_iname = self.stmt_name_gen("{name}_{mode}_axis_{dim}_{sk}".
format(name=orig_temporary.name,
mode=mode,
dim=dim_idx,
@@ -689,7 +689,7 @@ class TemporarySaver(object):
# Add hardware dims.
for hw_iname_idx, (hw_tag, dim) in enumerate(
zip(promoted_temporary.hw_tags, promoted_temporary.hw_dims)):
- new_iname = self.insn_name_gen("{name}_{mode}_hw_dim_{dim}_{sk}".
+ new_iname = self.stmt_name_gen("{name}_{mode}_hw_dim_{dim}_{sk}".
format(name=orig_temporary.name,
mode=mode,
dim=hw_iname_idx,
@@ -721,7 +721,7 @@ class TemporarySaver(object):
def save_and_reload_temporaries(knl):
"""
- Add instructions to save and reload temporary variables that are live
+ Add statements to save and reload temporary variables that are live
across kernel calls.
The basic code transformation turns schedule segments::
diff --git a/loopy/transform/statement.py b/loopy/transform/statement.py
new file mode 100644
index 0000000000000000000000000000000000000000..afea0430e891d1313ebb195ba10c41729c7d6bf5
--- /dev/null
+++ b/loopy/transform/statement.py
@@ -0,0 +1,339 @@
+from __future__ import division, absolute_import
+
+__copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import six # noqa
+
+from loopy.diagnostic import LoopyError
+
+
+# {{{ find_statements
+
+def find_statements(kernel, stmt_match):
+ from loopy.match import parse_match
+ match = parse_match(stmt_match)
+ return [stmt for stmt in kernel.statements if match(kernel, stmt)]
+
+# }}}
+
+
+# {{{ map_statements
+
+def map_statements(kernel, stmt_match, f):
+ from loopy.match import parse_match
+ match = parse_match(stmt_match)
+
+ new_stmts = []
+
+ for stmt in kernel.statements:
+ if match(kernel, stmt):
+ new_stmts.append(f(stmt))
+ else:
+ new_stmts.append(stmt)
+
+ return kernel.copy(statements=new_stmts)
+
+# }}}
+
+
+# {{{ set_statement_priority
+
+def set_statement_priority(kernel, stmt_match, priority):
+ """Set the priority of statements matching *stmt_match* to *priority*.
+
+ *stmt_match* may be any statement id match understood by
+ :func:`loopy.match.parse_match`.
+ """
+
+ def set_prio(stmt):
+ return stmt.copy(priority=priority)
+
+ return map_statements(kernel, stmt_match, set_prio)
+
+# }}}
+
+
+# {{{ add_dependency
+
+def add_dependency(kernel, stmt_match, depends_on):
+ """Add the statement dependency *dependency* to the statements matched
+ by *stmt_match*.
+
+ *stmt_match* and *depends_on* may be any statement id match understood by
+ :func:`loopy.match.parse_match`.
+
+ .. versionchanged:: 2016.3
+
+ Third argument renamed to *depends_on* for clarity, allowed to
+ be not just ID but also match expression.
+ """
+
+ if isinstance(depends_on, str) and depends_on in kernel.id_to_stmt:
+ added_deps = frozenset([depends_on])
+ else:
+ added_deps = frozenset(
+ dep.id for dep in find_statements(kernel, depends_on))
+
+ if not added_deps:
+ raise LoopyError("no statements found matching '%s' "
+ "(to add as dependencies)" % depends_on)
+
+ matched = [False]
+
+ def add_dep(stmt):
+ new_deps = stmt.depends_on
+ matched[0] = True
+ if new_deps is None:
+ new_deps = added_deps
+ else:
+ new_deps = new_deps | added_deps
+
+ return stmt.copy(depends_on=new_deps)
+
+ result = map_statements(kernel, stmt_match, add_dep)
+
+ if not matched[0]:
+ raise LoopyError("no statements found matching '%s' "
+ "(to which dependencies would be added)" % stmt_match)
+
+ return result
+
+# }}}
+
+
+# {{{ remove_statements
+
+def remove_statements(kernel, stmt_ids):
+ """Return a new kernel with statements in *stmt_ids* removed.
+
+ Dependencies across (one, for now) deleted isntructions are propagated.
+ Behavior is undefined for now for chains of dependencies within the
+ set of deleted statements.
+
+ This also updates *no_sync_with* for all statements.
+ """
+
+ if not stmt_ids:
+ return kernel
+
+ assert isinstance(stmt_ids, set)
+ id_to_stmt = kernel.id_to_stmt
+
+ new_stmts = []
+ for stmt in kernel.statements:
+ if stmt.id in stmt_ids:
+ continue
+
+ # transitively propagate dependencies
+ # (only one level for now)
+ if stmt.depends_on is None:
+ depends_on = frozenset()
+ else:
+ depends_on = stmt.depends_on
+
+ new_deps = depends_on - stmt_ids
+
+ for dep_id in depends_on & stmt_ids:
+ new_deps = new_deps | id_to_stmt[dep_id].depends_on
+
+ # update no_sync_with
+
+ new_no_sync_with = frozenset((stmt_id, scope)
+ for stmt_id, scope in stmt.no_sync_with
+ if stmt_id not in stmt_ids)
+
+ new_stmts.append(
+ stmt.copy(depends_on=new_deps, no_sync_with=new_no_sync_with))
+
+ return kernel.copy(
+ statements=new_stmts)
+
+# }}}
+
+
+# {{{ replace_statement_ids
+
+def replace_statement_ids(kernel, replacements):
+ new_stmts = []
+
+ for stmt in kernel.statements:
+ changed = False
+ new_depends_on = []
+ new_no_sync_with = []
+
+ for dep in stmt.depends_on:
+ if dep in replacements:
+ new_depends_on.extend(replacements[dep])
+ changed = True
+ else:
+ new_depends_on.append(dep)
+
+ for stmt_id, scope in stmt.no_sync_with:
+ if stmt_id in replacements:
+ new_no_sync_with.extend(
+ (repl, scope) for repl in replacements[stmt_id])
+ changed = True
+ else:
+ new_no_sync_with.append((stmt_id, scope))
+
+ new_stmts.append(
+ stmt.copy(
+ depends_on=frozenset(new_depends_on),
+ no_sync_with=frozenset(new_no_sync_with))
+ if changed else stmt)
+
+ return kernel.copy(statements=new_stmts)
+
+# }}}
+
+
+# {{{ tag_statements
+
+def tag_statements(kernel, new_tag, within=None):
+ from loopy.match import parse_match
+ within = parse_match(within)
+
+ new_stmts = []
+ for stmt in kernel.statements:
+ if within(kernel, stmt):
+ new_stmts.append(
+ stmt.copy(tags=stmt.tags | frozenset([new_tag])))
+ else:
+ new_stmts.append(stmt)
+
+ return kernel.copy(statements=new_stmts)
+
+# }}}
+
+
+# {{{ add nosync
+
+def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False):
+ """Add a *no_sync_with* directive between *source* and *sink*.
+ *no_sync_with* is only added if *sink* depends on *source* or
+ if the statement pair is in a conflicting group.
+
+ This function does not check for the presence of a memory dependency.
+
+ :arg kernel: The kernel
+ :arg source: Either a single statement id, or any statement id
+ match understood by :func:`loopy.match.parse_match`.
+ :arg sink: Either a single statement id, or any statement id
+ match understood by :func:`loopy.match.parse_match`.
+ :arg scope: A valid *no_sync_with* scope. See
+ :attr:`loopy.StatementBase.no_sync_with` for allowable scopes.
+ :arg bidirectional: A :class:`bool`. If *True*, add a *no_sync_with*
+ to both the source and sink statements, otherwise the directive
+ is only added to the sink statements.
+ :arg force: A :class:`bool`. If *True*, add a *no_sync_with* directive
+ even without the presence of a dependency edge or conflicting
+ statement group.
+
+ :return: The updated kernel
+ """
+
+ if isinstance(source, str) and source in kernel.id_to_stmt:
+ sources = frozenset([source])
+ else:
+ sources = frozenset(
+ source.id for source in find_statements(kernel, source))
+
+ if isinstance(sink, str) and sink in kernel.id_to_stmt:
+ sinks = frozenset([sink])
+ else:
+ sinks = frozenset(
+ sink.id for sink in find_statements(kernel, sink))
+
+ def stmts_in_conflicting_groups(stmt1_id, stmt2_id):
+ stmt1 = kernel.id_to_stmt[stmt1_id]
+ stmt2 = kernel.id_to_stmt[stmt2_id]
+ return (
+ bool(stmt1.groups & stmt2.conflicts_with_groups)
+ or
+ bool(stmt2.groups & stmt1.conflicts_with_groups))
+
+ from collections import defaultdict
+ nosync_to_add = defaultdict(set)
+
+ for sink in sinks:
+ for source in sources:
+
+ needs_nosync = force or (
+ source in kernel.recursive_stmt_dep_map()[sink]
+ or stmts_in_conflicting_groups(source, sink))
+
+ if not needs_nosync:
+ continue
+
+ nosync_to_add[sink].add((source, scope))
+ if bidirectional:
+ nosync_to_add[source].add((sink, scope))
+
+ new_statements = list(kernel.statements)
+
+ for i, stmt in enumerate(new_statements):
+ if stmt.id in nosync_to_add:
+ new_statements[i] = stmt.copy(no_sync_with=stmt.no_sync_with
+ | frozenset(nosync_to_add[stmt.id]))
+
+ return kernel.copy(statements=new_statements)
+
+# }}}
+
+
+# {{{ uniquify_statement_ids
+
+def uniquify_statement_ids(kernel):
+ """Converts any ids that are :class:`loopy.UniqueName` or *None* into unique
+ strings.
+
+ This function does *not* deduplicate existing statement ids.
+ """
+
+ from loopy.kernel.creation import UniqueName
+
+ stmt_ids = set(
+ stmt.id for stmt in kernel.statements
+ if stmt.id is not None and not isinstance(stmt.id, UniqueName))
+
+ from pytools import UniqueNameGenerator
+ stmt_id_gen = UniqueNameGenerator(stmt_ids)
+
+ new_statements = []
+
+ for stmt in kernel.statements:
+ if stmt.id is None:
+ new_statements.append(
+ stmt.copy(id=stmt_id_gen("stmt")))
+ elif isinstance(stmt.id, UniqueName):
+ new_statements.append(
+ stmt.copy(id=stmt_id_gen(stmt.id.name)))
+ else:
+ new_statements.append(stmt)
+
+ return kernel.copy(statements=new_statements)
+
+# }}}
+
+
+# vim: foldmethod=marker
diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py
index 79ceff9fdf1e2c4b3b544e8ae85f8194b36ec444..0fb706e2dd53be12fb066fb8bf64ec10d3108d16 100644
--- a/loopy/transform/subst.py
+++ b/loopy/transform/subst.py
@@ -40,7 +40,7 @@ logger = logging.getLogger(__name__)
class ExprDescriptor(ImmutableRecord):
- __slots__ = ["insn", "expr", "unif_var_dict"]
+ __slots__ = ["stmt", "expr", "unif_var_dict"]
# {{{ extract_subst
@@ -128,7 +128,7 @@ def extract_subst(kernel, subst_name, template, parameters=()):
expr_descriptors.append(
ExprDescriptor(
- insn=insn,
+ stmt=stmt,
expr=expr,
unif_var_dict=dict((lhs.name, rhs)
for lhs, rhs in urec.equations)))
@@ -140,8 +140,8 @@ def extract_subst(kernel, subst_name, template, parameters=()):
CallbackMapper, WalkMapper, IdentityMapper)
dfmapper = CallbackMapper(gather_exprs, WalkMapper())
- for insn in kernel.instructions:
- dfmapper(insn.expression)
+ for stmt in kernel.statements:
+ dfmapper(stmt.expression)
for sr in six.itervalues(kernel.substitutions):
dfmapper(sr.expression)
@@ -151,7 +151,7 @@ def extract_subst(kernel, subst_name, template, parameters=()):
if not expr_descriptors:
raise RuntimeError("no expressions matching '%s'" % template)
- # {{{ substitute rule into instructions
+ # {{{ substitute rule into statements
def replace_exprs(expr, mapper):
found = False
@@ -175,11 +175,11 @@ def extract_subst(kernel, subst_name, template, parameters=()):
cbmapper = CallbackMapper(replace_exprs, IdentityMapper())
- new_insns = []
+ new_stmts = []
- for insn in kernel.instructions:
- new_expr = cbmapper(insn.expression)
- new_insns.append(insn.copy(expression=new_expr))
+ for stmt in kernel.statements:
+ new_expr = cbmapper(stmt.expression)
+ new_stmts.append(stmt.copy(expression=new_expr))
from loopy.kernel.data import SubstitutionRule
new_substs = {
@@ -196,7 +196,7 @@ def extract_subst(kernel, subst_name, template, parameters=()):
# }}}
return kernel.copy(
- instructions=new_insns,
+ statements=new_stmts,
substitutions=new_substs)
# }}}
@@ -205,14 +205,14 @@ def extract_subst(kernel, subst_name, template, parameters=()):
# {{{ assignment_to_subst
class AssignmentToSubstChanger(RuleAwareIdentityMapper):
- def __init__(self, rule_mapping_context, lhs_name, definition_insn_ids,
+ def __init__(self, rule_mapping_context, lhs_name, definition_stmt_ids,
usage_to_definition, extra_arguments, within):
self.var_name_gen = rule_mapping_context.make_unique_var_name
super(AssignmentToSubstChanger, self).__init__(rule_mapping_context)
self.lhs_name = lhs_name
- self.definition_insn_ids = definition_insn_ids
+ self.definition_stmt_ids = definition_stmt_ids
self.usage_to_definition = usage_to_definition
from pymbolic import var
@@ -220,18 +220,18 @@ class AssignmentToSubstChanger(RuleAwareIdentityMapper):
self.within = within
- self.definition_insn_id_to_subst_name = {}
+ self.definition_stmt_id_to_subst_name = {}
self.saw_unmatched_usage_sites = {}
- for def_id in self.definition_insn_ids:
+ for def_id in self.definition_stmt_ids:
self.saw_unmatched_usage_sites[def_id] = False
- def get_subst_name(self, def_insn_id):
+ def get_subst_name(self, def_stmt_id):
try:
- return self.definition_insn_id_to_subst_name[def_insn_id]
+ return self.definition_stmt_id_to_subst_name[def_stmt_id]
except KeyError:
subst_name = self.var_name_gen(self.lhs_name+"_subst")
- self.definition_insn_id_to_subst_name[def_insn_id] = subst_name
+ self.definition_stmt_id_to_subst_name[def_stmt_id] = subst_name
return subst_name
def map_variable(self, expr, expn_state):
@@ -255,16 +255,16 @@ class AssignmentToSubstChanger(RuleAwareIdentityMapper):
expr, expn_state)
def transform_access(self, index, expn_state):
- my_insn_id = expn_state.insn_id
+ my_stmt_id = expn_state.stmt_id
- if my_insn_id in self.definition_insn_ids:
+ if my_stmt_id in self.definition_stmt_ids:
return None
- my_def_id = self.usage_to_definition[my_insn_id]
+ my_def_id = self.usage_to_definition[my_stmt_id]
if not self.within(
expn_state.kernel,
- expn_state.instruction,
+ expn_state.statement,
expn_state.stack):
self.saw_unmatched_usage_sites[my_def_id] = True
return None
@@ -314,31 +314,31 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
from loopy.kernel.creation import apply_single_writer_depencency_heuristic
dep_kernel = apply_single_writer_depencency_heuristic(dep_kernel)
- id_to_insn = dep_kernel.id_to_insn
+ id_to_stmt = dep_kernel.id_to_stmt
- def get_relevant_definition_insn_id(usage_insn_id):
- insn = id_to_insn[usage_insn_id]
+ def get_relevant_definition_stmt_id(usage_stmt_id):
+ stmt = id_to_stmt[usage_stmt_id]
def_id = set()
- for dep_id in insn.depends_on:
- dep_insn = id_to_insn[dep_id]
- if lhs_name in dep_insn.write_dependency_names():
- if lhs_name in dep_insn.read_dependency_names():
- raise LoopyError("instruction '%s' both reads *and* "
+ for dep_id in stmt.depends_on:
+ dep_stmt = id_to_stmt[dep_id]
+ if lhs_name in dep_stmt.write_dependency_names():
+ if lhs_name in dep_stmt.read_dependency_names():
+ raise LoopyError("statement '%s' both reads *and* "
"writes '%s'--cannot transcribe to substitution "
"rule" % (dep_id, lhs_name))
def_id.add(dep_id)
else:
- rec_result = get_relevant_definition_insn_id(dep_id)
+ rec_result = get_relevant_definition_stmt_id(dep_id)
if rec_result is not None:
def_id.add(rec_result)
if len(def_id) > 1:
raise LoopyError("more than one write to '%s' found in "
"depdendencies of '%s'--definition cannot be resolved "
- "(writer instructions ids: %s)"
- % (lhs_name, usage_insn_id, ", ".join(def_id)))
+ "(writer statements ids: %s)"
+ % (lhs_name, usage_stmt_id, ", ".join(def_id)))
if not def_id:
return None
@@ -349,26 +349,26 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
usage_to_definition = {}
- for insn in dep_kernel.instructions:
- if lhs_name not in insn.read_dependency_names():
+ for stmt in dep_kernel.statements:
+ if lhs_name not in stmt.read_dependency_names():
continue
- def_id = get_relevant_definition_insn_id(insn.id)
+ def_id = get_relevant_definition_stmt_id(stmt.id)
if def_id is None:
raise LoopyError("no write to '%s' found in dependency tree "
"of '%s'--definition cannot be resolved"
- % (lhs_name, insn.id))
+ % (lhs_name, stmt.id))
- usage_to_definition[insn.id] = def_id
+ usage_to_definition[stmt.id] = def_id
- definition_insn_ids = set()
- for insn in kernel.instructions:
- if lhs_name in insn.write_dependency_names():
- definition_insn_ids.add(insn.id)
+ definition_stmt_ids = set()
+ for stmt in kernel.statements:
+ if lhs_name in stmt.write_dependency_names():
+ definition_stmt_ids.add(stmt.id)
# }}}
- if not definition_insn_ids:
+ if not definition_stmt_ids:
raise LoopyError("no assignments to variable '%s' found"
% lhs_name)
@@ -378,7 +378,7 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
rule_mapping_context = SubstitutionRuleMappingContext(
kernel.substitutions, kernel.get_var_name_generator())
tts = AssignmentToSubstChanger(rule_mapping_context,
- lhs_name, definition_insn_ids,
+ lhs_name, definition_stmt_ids,
usage_to_definition, extra_arguments, within)
kernel = rule_mapping_context.finish_kernel(tts.map_kernel(kernel))
@@ -388,27 +388,27 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
# {{{ create new substitution rules
new_substs = kernel.substitutions.copy()
- for def_id, subst_name in six.iteritems(tts.definition_insn_id_to_subst_name):
- def_insn = kernel.id_to_insn[def_id]
+ for def_id, subst_name in six.iteritems(tts.definition_stmt_id_to_subst_name):
+ def_stmt = kernel.id_to_stmt[def_id]
from loopy.kernel.data import Assignment
- assert isinstance(def_insn, Assignment)
+ assert isinstance(def_stmt, Assignment)
from pymbolic.primitives import Variable, Subscript
- if isinstance(def_insn.assignee, Subscript):
- indices = def_insn.assignee.index_tuple
- elif isinstance(def_insn.assignee, Variable):
+ if isinstance(def_stmt.assignee, Subscript):
+ indices = def_stmt.assignee.index_tuple
+ elif isinstance(def_stmt.assignee, Variable):
indices = ()
else:
raise LoopyError(
"Unrecognized LHS type: %s"
- % type(def_insn.assignee).__name__)
+ % type(def_stmt.assignee).__name__)
arguments = []
for i in indices:
if not isinstance(i, Variable):
- raise LoopyError("In defining instruction '%s': "
+ raise LoopyError("In defining statement '%s': "
"asignee index '%s' is not a plain variable. "
"Perhaps use loopy.affine_map_inames() "
"to perform substitution." % (def_id, i))
@@ -418,7 +418,7 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
new_substs[subst_name] = SubstitutionRule(
name=subst_name,
arguments=tuple(arguments) + extra_arguments,
- expression=def_insn.expression)
+ expression=def_stmt.expression)
# }}}
@@ -450,11 +450,11 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
# }}}
import loopy as lp
- kernel = lp.remove_instructions(
+ kernel = lp.remove_statements(
kernel,
set(
- insn_id
- for insn_id, still_used in six.iteritems(
+ stmt_id
+ for stmt_id, still_used in six.iteritems(
tts.saw_unmatched_usage_sites)
if not still_used))
diff --git a/loopy/type_inference.py b/loopy/type_inference.py
index 409cbbc5ebd5feb13b04eeba1671f639663bfcf1..b38ab7fa57ea6e90d6f0d592b4b9f66d1efa603f 100644
--- a/loopy/type_inference.py
+++ b/loopy/type_inference.py
@@ -418,17 +418,17 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander):
type_inf_mapper = type_inf_mapper.copy()
- for writer_insn_id in kernel.writer_map().get(var_name, []):
- writer_insn = kernel.id_to_insn[writer_insn_id]
- if not isinstance(writer_insn, lp.MultiAssignmentBase):
+ for writer_stmt_id in kernel.writer_map().get(var_name, []):
+ writer_stmt = kernel.id_to_stmt[writer_stmt_id]
+ if not isinstance(writer_stmt, lp.MultiAssignmentBase):
continue
- expr = subst_expander(writer_insn.expression)
+ expr = subst_expander(writer_stmt.expression)
debug(" via expr %s", expr)
- if isinstance(writer_insn, lp.Assignment):
+ if isinstance(writer_stmt, lp.Assignment):
result = type_inf_mapper(expr, return_dtype_set=True)
- elif isinstance(writer_insn, lp.CallInstruction):
+ elif isinstance(writer_stmt, lp.CallStatement):
return_dtype_set = type_inf_mapper(expr, return_tuple=True,
return_dtype_set=True)
@@ -437,7 +437,7 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander):
result_i = None
found = False
for assignee, comp_dtype_set in zip(
- writer_insn.assignee_var_names(), return_dtype_set):
+ writer_stmt.assignee_var_names(), return_dtype_set):
if assignee == var_name:
found = True
result_i = comp_dtype_set
@@ -526,8 +526,8 @@ def infer_unknown_types(kernel, expect_completion=False):
dep_graph = dict(
(written_var, set(
read_var
- for insn_id in writer_map.get(written_var, [])
- for read_var in kernel.id_to_insn[insn_id].read_dependency_names()
+ for stmt_id in writer_map.get(written_var, [])
+ for read_var in kernel.id_to_stmt[stmt_id].read_dependency_names()
if read_var in names_for_type_inference))
for written_var in names_for_type_inference)
diff --git a/proto-tests/test_fem_assembly.py b/proto-tests/test_fem_assembly.py
index a2cba7c5766f5e66ab0e87608ee07b3dff5269ad..ba5c7ecaa65784d9e7b3c0f9c0c0dbf4ca5e075b 100644
--- a/proto-tests/test_fem_assembly.py
+++ b/proto-tests/test_fem_assembly.py
@@ -51,7 +51,7 @@ def test_laplacian_stiffness(ctx_factory):
# This (mostly) reproduces Figure 3.1.
knl = lp.tag_inames(knl, {"dx_axis": "unr"})
- return knl, ["K", "i", "j", "q", "ax_b_insn"]
+ return knl, ["K", "i", "j", "q", "ax_b_stmt"]
def variant_pg4(knl):
# This (mostly) reproduces the unlabeled code snippet on pg. 4.
@@ -60,7 +60,7 @@ def test_laplacian_stiffness(ctx_factory):
Ncloc = 16
knl = lp.split_iname(knl, "K", Ncloc,
outer_iname="Ko", inner_iname="Kloc")
- return knl, ["Ko", "Kloc", "i", "j", "q", "ax_b_insn"]
+ return knl, ["Ko", "Kloc", "i", "j", "q", "ax_b_stmt"]
def variant_fig32(knl):
# This (mostly) reproduces Figure 3.2.
@@ -71,7 +71,7 @@ def test_laplacian_stiffness(ctx_factory):
knl = lp.precompute(knl, "dPsi", np.float32, ["i", "q", "dx_axis"],
default_tag=None)
knl = lp.tag_inames(knl, {"dx_axis": "unr", "dxi": "unr"})
- return knl, ["Ko", "Kloc", "dPsi_q", "ij", "i", "j", "q", "ax_b_insn"]
+ return knl, ["Ko", "Kloc", "dPsi_q", "ij", "i", "j", "q", "ax_b_stmt"]
def variant_fig33(knl):
# This is meant to (mostly) reproduce Figure 3.3.
@@ -97,7 +97,7 @@ def test_laplacian_stiffness(ctx_factory):
outer_iname="Ko", inner_iname="Kloc",
outer_tag="g.0")
knl = lp.tag_inames(knl, {"i": "l.1", "j": "l.0"})
- return knl, ["K", "i", "j", "q", "ax_b_insn"]
+ return knl, ["K", "i", "j", "q", "ax_b_stmt"]
def variant_simple_gpu_prefetch(knl):
# This adds prefetching to the GPU variant above.
@@ -116,7 +116,7 @@ def test_laplacian_stiffness(ctx_factory):
knl = lp.add_prefetch(knl, "DPsi", [0, 1, 2])
knl = lp.add_prefetch(knl, "jacInv", [0, 1, 3])
knl = lp.add_prefetch(knl, "jacDet", [1])
- return knl, ["K", "i", "j", "q", "ax_b_insn"]
+ return knl, ["K", "i", "j", "q", "ax_b_stmt"]
# Plug in variant name here
# |
diff --git a/stmt-compat-fixes.patch b/stmt-compat-fixes.patch
new file mode 100644
index 0000000000000000000000000000000000000000..7cd61941ee308878de6ebf28f67857abd682abf5
--- /dev/null
+++ b/stmt-compat-fixes.patch
@@ -0,0 +1,92 @@
+diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
+index 68fcca1..6d788df 100644
+--- a/loopy/kernel/__init__.py
++++ b/loopy/kernel/__init__.py
+@@ -186,7 +186,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+
+ # {{{ constructor
+
+- def __init__(self, domains, statements, args=[], schedule=None,
++ def __init__(self, domains, statements=None, args=[], schedule=None,
+ name="loopy_kernel",
+ preambles=[],
+ preamble_generators=[],
+@@ -213,7 +213,12 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+ state=kernel_state.INITIAL,
+ target=None,
+
+- overridden_get_grid_sizes_for_stmt_ids=None):
++ overridden_get_grid_sizes_for_stmt_ids=None,
++
++ # compat
++ statements=None,
++ overridden_get_grid_sizes_for_stmt_ids=None,
++ ):
+ """
+ :arg overridden_get_grid_sizes_for_stmt_ids: A callable. When kernels get
+ intersected in slab decomposition, their grid sizes shouldn't
+@@ -224,6 +229,23 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+ from loopy.kernel.tools import SetOperationCacheManager
+ cache_manager = SetOperationCacheManager()
+
++ if statements is not None and statements is not None:
++ raise TypeError("may not specify both statements and statements")
++ elif statements is None and statements is None:
++ raise TypeError(
++ "must specify exactly one of statements and statements")
++ elif statements is not None:
++ statements = statements
++
++ if (overridden_get_grid_sizes_for_stmt_ids is not None
++ and overridden_get_grid_sizes_for_stmt_ids is not None):
++ raise TypeError("may not specify both "
++ "overridden_get_grid_sizes_for_stmt_ids "
++ "and overridden_get_grid_sizes_for_stmt_ids{")
++ elif overridden_get_grid_sizes_for_stmt_ids is not None:
++ overridden_get_grid_sizes_for_stmt_ids = \
++ overridden_get_grid_sizes_for_stmt_ids
++
+ # {{{ process assumptions
+
+ if assumptions is None:
+@@ -1094,7 +1117,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+ "tags",
+ "variables",
+ "rules",
+- "statements",
++ "Statements",
++ "statements",
+ "Dependencies",
+ "schedule",
+ ])
+@@ -1171,7 +1195,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+ for rule_name in natsorted(six.iterkeys(kernel.substitutions)):
+ lines.append(str(kernel.substitutions[rule_name]))
+
+- if "statements" in what:
++ if "Statements" in what or "statements" in what:
+ lines.extend(sep)
+ if show_labels:
+ lines.append("STATEMENTS:")
+@@ -1387,6 +1411,21 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+
+ # }}}
+
++ # {{{ "statement" compat goop
++
++ @property
++ def id_to_stmt(self):
++ return self.id_to_stmt
++
++ @property
++ def statements(self):
++ return self.statements
++
++ def get_statement_id_generator(self, based_on="stmt"):
++ return self.get_statement_id_generator(based_on)
++
++ # }}}
++
+ # }}}
+
+ # vim: foldmethod=marker
diff --git a/test/test_diff.py b/test/test_diff.py
index 95471f9b126fd6b763530d115c21509d14d2ba47..c4d752349d36396ca7de1cdd60b01dc657c011bd 100644
--- a/test/test_diff.py
+++ b/test/test_diff.py
@@ -65,7 +65,7 @@ def test_diff(ctx_factory):
dknl, diff_map = diff_kernel(knl, "z", "x")
dknl = lp.remove_unused_arguments(dknl)
- dknl = lp.add_inames_to_insn(dknl, "diff_i0", "writes:a_dx or writes:a")
+ dknl = lp.add_inames_to_stmt(dknl, "diff_i0", "writes:a_dx or writes:a")
print(dknl)
diff --git a/test/test_fortran.py b/test/test_fortran.py
index 6e05aa6adba66ce0a1896527249d321de104c512..4fe79c2bf4da553d9c4d92cff9dc8fd3c28f0360 100644
--- a/test/test_fortran.py
+++ b/test/test_fortran.py
@@ -267,7 +267,7 @@ def test_tagged(ctx_factory):
knl, = lp.parse_fortran(fortran_src)
- assert sum(1 for insn in lp.find_instructions(knl, "tag:input")) == 2
+ assert sum(1 for stmt in lp.find_statements(knl, "tag:input")) == 2
@pytest.mark.parametrize("buffer_inames", [
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 772d536d1e00fedc0b7abcd2f8c05350fe3b633e..e7c6897578f5b483fa90bc4c69dadb1ce428a625 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -290,7 +290,7 @@ def test_rank_one(ctx_factory):
knl = lp.add_prefetch(knl, "a")
knl = lp.add_prefetch(knl, "b")
knl = lp.prioritize_loops(knl, ["i", "j"])
- knl = lp.add_inames_to_insn(knl, "i", "writes:b_fetch")
+ knl = lp.add_inames_to_stmt(knl, "i", "writes:b_fetch")
return knl
def variant_2(knl):
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 704fd391f33ab9f3a24b3cc2b534a5b61bd3e90b..8fcba1e2162549c691b0662fa774ab5711ef6c0a 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -628,14 +628,14 @@ def test_vector_ilp_with_prefetch(ctx_factory):
assert len(list(re.finditer("barrier", code))) == 1
-def test_c_instruction(ctx_factory):
+def test_c_statement(ctx_factory):
#logging.basicConfig(level=logging.DEBUG)
ctx = ctx_factory()
knl = lp.make_kernel(
"{[i,j]: 0<=i,jt = 1 {id=insn1,nosync=insn1}
- t = 2 {id=insn2,nosync=insn1:insn2}
- t = 3 {id=insn3,nosync=insn1@local:insn2@global:insn3@any}
- t = 4 {id=insn4,nosync_query=id:insn*@local}
- t = 5 {id=insn5,nosync_query=id:insn1}
+ <>t = 1 {id=stmt1,nosync=stmt1}
+ t = 2 {id=stmt2,nosync=stmt1:stmt2}
+ t = 3 {id=stmt3,nosync=stmt1@local:stmt2@global:stmt3@any}
+ t = 4 {id=stmt4,nosync_query=id:stmt*@local}
+ t = 5 {id=stmt5,nosync_query=id:stmt1}
""",
options=lp.Options(allow_terminal_colors=False))
kernel_str = str(knl)
print(kernel_str)
- assert "id=insn1, no_sync_with=insn1@any" in kernel_str
- assert "id=insn2, no_sync_with=insn1@any:insn2@any" in kernel_str
- assert "id=insn3, no_sync_with=insn1@local:insn2@global:insn3@any" in kernel_str
- assert "id=insn4, no_sync_with=insn1@local:insn2@local:insn3@local:insn5@local" in kernel_str # noqa
- assert "id=insn5, no_sync_with=insn1@any" in kernel_str
+ assert "id=stmt1, no_sync_with=stmt1@any" in kernel_str
+ assert "id=stmt2, no_sync_with=stmt1@any:stmt2@any" in kernel_str
+ assert "id=stmt3, no_sync_with=stmt1@local:stmt2@global:stmt3@any" in kernel_str
+ assert "id=stmt4, no_sync_with=stmt1@local:stmt2@local:stmt3@local:stmt5@local" in kernel_str # noqa
+ assert "id=stmt5, no_sync_with=stmt1@any" in kernel_str
def assert_barrier_between(knl, id1, id2, ignore_barriers_in_levels=()):
- from loopy.schedule import (RunInstruction, Barrier, EnterLoop, LeaveLoop)
+ from loopy.schedule import (RunStatement, Barrier, EnterLoop, LeaveLoop)
watch_for_barrier = False
seen_barrier = False
loop_level = 0
for sched_item in knl.schedule:
- if isinstance(sched_item, RunInstruction):
- if sched_item.insn_id == id1:
+ if isinstance(sched_item, RunStatement):
+ if sched_item.stmt_id == id1:
watch_for_barrier = True
- elif sched_item.insn_id == id2:
+ elif sched_item.stmt_id == id2:
assert watch_for_barrier
assert seen_barrier
return
@@ -2313,17 +2313,17 @@ def test_barrier_in_overridden_get_grid_size_expanded_kernel():
vecsize = 16
knl = lp.split_iname(knl, 'i', vecsize, inner_tag='l.0')
- # artifically expand via overridden_get_grid_sizes_for_insn_ids
+ # artifically expand via overridden_get_grid_sizes_for_stmt_ids
class GridOverride(object):
def __init__(self, clean, vecsize=vecsize):
self.clean = clean
self.vecsize = vecsize
- def __call__(self, insn_ids, ignore_auto=True):
- gsize, _ = self.clean.get_grid_sizes_for_insn_ids(insn_ids, ignore_auto)
+ def __call__(self, stmt_ids, ignore_auto=True):
+ gsize, _ = self.clean.get_grid_sizes_for_stmt_ids(stmt_ids, ignore_auto)
return gsize, (self.vecsize,)
- knl = knl.copy(overridden_get_grid_sizes_for_insn_ids=GridOverride(
+ knl = knl.copy(overridden_get_grid_sizes_for_stmt_ids=GridOverride(
knl.copy(), vecsize))
# make sure we can generate the code
lp.generate_code_v2(knl)
@@ -2384,7 +2384,7 @@ def test_global_barrier_order_finding():
assert lp.get_global_barrier_order(knl) == ("top", "yoink", "postloop")
- for insn, barrier in (
+ for stmt, barrier in (
("nop", None),
("top", None),
("wr_z", "top"),
@@ -2392,7 +2392,7 @@ def test_global_barrier_order_finding():
("yoink", "top"),
("postloop", "yoink"),
("zzzv", "postloop")):
- assert lp.find_most_recent_global_barrier(knl, insn) == barrier
+ assert lp.find_most_recent_global_barrier(knl, stmt) == barrier
def test_global_barrier_error_if_unordered():
diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py
index 0de08f5f616937604bc2c93581c5a8a1770164f4..5f0d03e7263388a74213ca2543ca1975f1e7bcbf 100644
--- a/test/test_numa_diff.py
+++ b/test/test_numa_diff.py
@@ -59,8 +59,8 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa
knl for knl in lp.parse_fortran(source, filename, auto_dependencies=False)
if "KernelR" in knl.name or "KernelS" in knl.name
]
- hsv_r = lp.tag_instructions(hsv_r, "rknl")
- hsv_s = lp.tag_instructions(hsv_s, "sknl")
+ hsv_r = lp.tag_statements(hsv_r, "rknl")
+ hsv_s = lp.tag_statements(hsv_s, "sknl")
hsv = lp.fuse_kernels([hsv_r, hsv_s], ["_r", "_s"])
#hsv = hsv_s
@@ -92,8 +92,8 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa
# turn the first reads into subst rules
local_prep_var_names = set()
- for insn in lp.find_instructions(hsv, "tag:local_prep"):
- assignee, = insn.assignee_var_names()
+ for stmt in lp.find_statements(hsv, "tag:local_prep"):
+ assignee, = stmt.assignee_var_names()
local_prep_var_names.add(assignee)
hsv = lp.assignment_to_subst(hsv, assignee)
@@ -101,8 +101,8 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa
hsv = lp.assignment_to_subst(hsv, "JinvD_r")
hsv = lp.assignment_to_subst(hsv, "JinvD_s")
- r_fluxes = lp.find_instructions(hsv, "tag:compute_fluxes and tag:rknl")
- s_fluxes = lp.find_instructions(hsv, "tag:compute_fluxes and tag:sknl")
+ r_fluxes = lp.find_statements(hsv, "tag:compute_fluxes and tag:rknl")
+ s_fluxes = lp.find_statements(hsv, "tag:compute_fluxes and tag:sknl")
if ilp_multiple > 1:
hsv = lp.split_iname(hsv, "k", 2, inner_tag="ilp")
@@ -117,15 +117,15 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa
flux_store_idx = 0
- for rflux_insn, sflux_insn in zip(r_fluxes, s_fluxes):
- for knl_tag, insn, flux_inames, tmps, flux_precomp_inames in [
- ("rknl", rflux_insn, ("j", "n",), rtmps, ("jj", "ii",)),
- ("sknl", sflux_insn, ("i", "n",), stmps, ("ii", "jj",)),
+ for rflux_stmt, sflux_stmt in zip(r_fluxes, s_fluxes):
+ for knl_tag, stmt, flux_inames, tmps, flux_precomp_inames in [
+ ("rknl", rflux_stmt, ("j", "n",), rtmps, ("jj", "ii",)),
+ ("sknl", sflux_stmt, ("i", "n",), stmps, ("ii", "jj",)),
]:
- flux_var, = insn.assignee_var_names()
- print(insn)
+ flux_var, = stmt.assignee_var_names()
+ print(stmt)
- reader, = lp.find_instructions(hsv,
+ reader, = lp.find_statements(hsv,
"tag:{knl_tag} and reads:{flux_var}"
.format(knl_tag=knl_tag, flux_var=flux_var))
diff --git a/test/test_reduction.py b/test/test_reduction.py
index 555b8c0cccd3a5ca32eb438c6cca44a1b0434a73..86b917a420f4df233a97d31a10c7b0fca96e4479 100644
--- a/test/test_reduction.py
+++ b/test/test_reduction.py
@@ -415,7 +415,7 @@ def test_parallel_multi_output_reduction(ctx_factory):
def test_reduction_with_conditional():
# Test whether realization of a reduction inherits predicates
- # of the original instruction. Tested with the CTarget, because
+ # of the original statement. Tested with the CTarget, because
# the PyOpenCL target will hoist the conditional into the host
# code in this minimal example.
knl = lp.make_kernel(
diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py
index 0571e41910020aa0a60cd911a63b6ce2984ed939..7056c25a33011191c9c619909b0487ab1244a33d 100644
--- a/test/test_sem_reagan.py
+++ b/test/test_sem_reagan.py
@@ -94,7 +94,7 @@ def test_tim2d(ctx_factory):
knl = lp.tag_inames(knl, dict(o="unr"))
knl = lp.tag_inames(knl, dict(m="unr"))
- knl = lp.set_instruction_priority(knl, "id:D_fetch", 5)
+ knl = lp.set_statement_priority(knl, "id:D_fetch", 5)
print(knl)
return knl
diff --git a/test/test_transform.py b/test/test_transform.py
index d17f6c707d36214f0f6713da25c8be0691b58d2c..5bd140e0dd72e81875a610050270fe69c4574a67 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -210,8 +210,8 @@ def test_extract_subst(ctx_factory):
from loopy.symbolic import parse
- insn, = knl.instructions
- assert insn.expression == parse("bsquare(23) + bsquare(25)")
+ stmt, = knl.statements
+ assert stmt.expression == parse("bsquare(23) + bsquare(25)")
def test_join_inames(ctx_factory):
@@ -405,14 +405,14 @@ def test_precompute_with_preexisting_inames_fail():
def test_add_nosync():
orig_knl = lp.make_kernel("{[i]: 0<=i<10}",
"""
- <>tmp[i] = 10 {id=insn1}
- <>tmp2[i] = 10 {id=insn2}
+ <>tmp[i] = 10 {id=stmt1}
+ <>tmp2[i] = 10 {id=stmt2}
- <>tmp3[2*i] = 0 {id=insn3}
- <>tmp4 = 1 + tmp3[2*i] {id=insn4}
+ <>tmp3[2*i] = 0 {id=stmt3}
+ <>tmp4 = 1 + tmp3[2*i] {id=stmt4}
- <>tmp5[i] = 0 {id=insn5,groups=g1}
- tmp5[i] = 1 {id=insn6,conflicts=g1}
+ <>tmp5[i] = 0 {id=stmt5,groups=g1}
+ tmp5[i] = 1 {id=stmt6,conflicts=g1}
""")
orig_knl = lp.set_temporary_scope(orig_knl, "tmp3", "local")
@@ -420,39 +420,39 @@ def test_add_nosync():
# No dependency present - don't add nosync
knl = lp.add_nosync(orig_knl, "any", "writes:tmp", "writes:tmp2")
- assert frozenset() == knl.id_to_insn["insn2"].no_sync_with
+ assert frozenset() == knl.id_to_stmt["stmt2"].no_sync_with
# Dependency present
knl = lp.add_nosync(orig_knl, "local", "writes:tmp3", "reads:tmp3")
- assert frozenset() == knl.id_to_insn["insn3"].no_sync_with
- assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with
+ assert frozenset() == knl.id_to_stmt["stmt3"].no_sync_with
+ assert frozenset([("stmt3", "local")]) == knl.id_to_stmt["stmt4"].no_sync_with
# Bidirectional
knl = lp.add_nosync(
orig_knl, "local", "writes:tmp3", "reads:tmp3", bidirectional=True)
- assert frozenset([("insn4", "local")]) == knl.id_to_insn["insn3"].no_sync_with
- assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with
+ assert frozenset([("stmt4", "local")]) == knl.id_to_stmt["stmt3"].no_sync_with
+ assert frozenset([("stmt3", "local")]) == knl.id_to_stmt["stmt4"].no_sync_with
# Groups
- knl = lp.add_nosync(orig_knl, "local", "insn5", "insn6")
- assert frozenset([("insn5", "local")]) == knl.id_to_insn["insn6"].no_sync_with
+ knl = lp.add_nosync(orig_knl, "local", "stmt5", "stmt6")
+ assert frozenset([("stmt5", "local")]) == knl.id_to_stmt["stmt6"].no_sync_with
-def test_uniquify_instruction_ids():
+def test_uniquify_statement_ids():
i1 = lp.Assignment("b", 1, id=None)
i2 = lp.Assignment("b", 1, id=None)
i3 = lp.Assignment("b", 1, id=lp.UniqueName("b"))
i4 = lp.Assignment("b", 1, id=lp.UniqueName("b"))
- knl = lp.make_kernel("{[i]: i = 1}", []).copy(instructions=[i1, i2, i3, i4])
+ knl = lp.make_kernel("{[i]: i = 1}", []).copy(statements=[i1, i2, i3, i4])
- from loopy.transform.instruction import uniquify_instruction_ids
- knl = uniquify_instruction_ids(knl)
+ from loopy.transform.statement import uniquify_statement_ids
+ knl = uniquify_statement_ids(knl)
- insn_ids = set(insn.id for insn in knl.instructions)
+ stmt_ids = set(stmt.id for stmt in knl.statements)
- assert len(insn_ids) == 4
- assert all(isinstance(id, str) for id in insn_ids)
+ assert len(stmt_ids) == 4
+ assert all(isinstance(id, str) for id in stmt_ids)
if __name__ == "__main__":