From ca62fb99bf671a534122a67e4e6065651962f38e Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 16 Oct 2017 19:42:56 -0500
Subject: [PATCH 1/8] Add initial version of insn-to-statement script

---
 insn-to-statement.sh | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 insn-to-statement.sh

diff --git a/insn-to-statement.sh b/insn-to-statement.sh
new file mode 100644
index 000000000..b89796cbc
--- /dev/null
+++ b/insn-to-statement.sh
@@ -0,0 +1,8 @@
+set -x
+set -e
+sed -i s/Instruction/Statement/g $(git ls-files | grep -v compyte)
+sed -i s/instruction/statement/g $(git ls-files | grep -v compyte)
+sed -i s/insn/stmt/g $(git ls-files | grep -v compyte)
+for d in kernel codegen transform; do
+  git mv loopy/$d/instruction.py loopy/$d/statement.py
+done
-- 
GitLab


From e2261aa78d0f4ea0386ff323a8357ae321372eb4 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 17 Oct 2017 09:19:03 -0500
Subject: [PATCH 2/8] Add stringify stmt patch

---
 0001-fix-stringify.patch | 27 +++++++++++++++++++++++++++
 insn-to-statement.sh     |  1 +
 2 files changed, 28 insertions(+)
 create mode 100644 0001-fix-stringify.patch

diff --git a/0001-fix-stringify.patch b/0001-fix-stringify.patch
new file mode 100644
index 000000000..66906dc1c
--- /dev/null
+++ b/0001-fix-stringify.patch
@@ -0,0 +1,27 @@
+diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
+index 481f567..ae94b07 100644
+--- a/loopy/kernel/__init__.py
++++ b/loopy/kernel/__init__.py
+@@ -1092,7 +1092,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+             "tags",
+             "variables",
+             "rules",
+-            "statements",
++            "Statements",
++            "instructions",
+             "Dependencies",
+             "schedule",
+             ])
+@@ -1183,10 +1184,10 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+             for rule_name in natsorted(six.iterkeys(kernel.substitutions)):
+                 lines.append(str(kernel.substitutions[rule_name]))
+ 
+-        if "statements" in what:
++        if "Statements" in what or "instructions" in what:
+             lines.extend(sep)
+             if show_labels:
+-                lines.append("INSTRUCTIONS:")
++                lines.append("STATEMENTS:")
+             loop_list_width = 35
+ 
+             # {{{ topological sort
diff --git a/insn-to-statement.sh b/insn-to-statement.sh
index b89796cbc..68a8264e4 100644
--- a/insn-to-statement.sh
+++ b/insn-to-statement.sh
@@ -6,3 +6,4 @@ sed -i s/insn/stmt/g $(git ls-files | grep -v compyte)
 for d in kernel codegen transform; do
   git mv loopy/$d/instruction.py loopy/$d/statement.py
 done
+patch -p1 < ./0001-fix-stringify.patch
-- 
GitLab


From 568e31a2319c0d05ae230dc4c6a367da26175d59 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 17 Oct 2017 10:00:35 -0500
Subject: [PATCH 3/8] Add another stringify fix

---
 0002-fix-stringify.patch | 12 ++++++++++++
 insn-to-statement.sh     |  1 +
 2 files changed, 13 insertions(+)
 create mode 100644 0002-fix-stringify.patch

diff --git a/0002-fix-stringify.patch b/0002-fix-stringify.patch
new file mode 100644
index 000000000..65801b762
--- /dev/null
+++ b/0002-fix-stringify.patch
@@ -0,0 +1,12 @@
+diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
+index e71655f..555bb2e 100644
+--- a/loopy/kernel/__init__.py
++++ b/loopy/kernel/__init__.py
+@@ -1093,7 +1093,6 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+             "variables",
+             "rules",
+             "Statements",
+-            "statements",
+             "Dependencies",
+             "schedule",
+             ])
diff --git a/insn-to-statement.sh b/insn-to-statement.sh
index 68a8264e4..0a857dac3 100644
--- a/insn-to-statement.sh
+++ b/insn-to-statement.sh
@@ -7,3 +7,4 @@ for d in kernel codegen transform; do
   git mv loopy/$d/instruction.py loopy/$d/statement.py
 done
 patch -p1 < ./0001-fix-stringify.patch
+patch -p1 < ./0002-fix-stringify.patch
-- 
GitLab


From 286a9c56a26cf07d539be07615bcc60906f30e28 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Tue, 17 Oct 2017 11:36:02 -0500
Subject: [PATCH 4/8] Add all-caps spelling to instruction->stmt replace

---
 insn-to-statement.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/insn-to-statement.sh b/insn-to-statement.sh
index 0a857dac3..4770f10ca 100644
--- a/insn-to-statement.sh
+++ b/insn-to-statement.sh
@@ -2,6 +2,7 @@ set -x
 set -e
 sed -i s/Instruction/Statement/g $(git ls-files | grep -v compyte)
 sed -i s/instruction/statement/g $(git ls-files | grep -v compyte)
+sed -i s/INSTRUCTION/STATEMENT/g $(git ls-files | grep -v compyte)
 sed -i s/insn/stmt/g $(git ls-files | grep -v compyte)
 for d in kernel codegen transform; do
   git mv loopy/$d/instruction.py loopy/$d/statement.py
-- 
GitLab


From 6777c90dab16a4cdf25e64b94ac69f0f31e50ca3 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 19 Oct 2017 12:50:25 -0500
Subject: [PATCH 5/8] Update insn-to-statement patches

---
 0002-fix-stringify.patch                             | 12 ------------
 0001-fix-stringify.patch => 0003-fix-stringify.patch | 12 ++++--------
 insn-to-statement.sh                                 |  3 +--
 3 files changed, 5 insertions(+), 22 deletions(-)
 delete mode 100644 0002-fix-stringify.patch
 rename 0001-fix-stringify.patch => 0003-fix-stringify.patch (66%)

diff --git a/0002-fix-stringify.patch b/0002-fix-stringify.patch
deleted file mode 100644
index 65801b762..000000000
--- a/0002-fix-stringify.patch
+++ /dev/null
@@ -1,12 +0,0 @@
-diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
-index e71655f..555bb2e 100644
---- a/loopy/kernel/__init__.py
-+++ b/loopy/kernel/__init__.py
-@@ -1093,7 +1093,6 @@ class LoopKernel(ImmutableRecordWithoutPickling):
-             "variables",
-             "rules",
-             "Statements",
--            "statements",
-             "Dependencies",
-             "schedule",
-             ])
diff --git a/0001-fix-stringify.patch b/0003-fix-stringify.patch
similarity index 66%
rename from 0001-fix-stringify.patch
rename to 0003-fix-stringify.patch
index 66906dc1c..b9ad0739a 100644
--- a/0001-fix-stringify.patch
+++ b/0003-fix-stringify.patch
@@ -1,8 +1,8 @@
 diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
-index 481f567..ae94b07 100644
+index 68fcca1..d20dddf 100644
 --- a/loopy/kernel/__init__.py
 +++ b/loopy/kernel/__init__.py
-@@ -1092,7 +1092,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+@@ -1094,7 +1094,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
              "tags",
              "variables",
              "rules",
@@ -12,7 +12,7 @@ index 481f567..ae94b07 100644
              "Dependencies",
              "schedule",
              ])
-@@ -1183,10 +1184,10 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+@@ -1171,7 +1172,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
              for rule_name in natsorted(six.iterkeys(kernel.substitutions)):
                  lines.append(str(kernel.substitutions[rule_name]))
  
@@ -20,8 +20,4 @@ index 481f567..ae94b07 100644
 +        if "Statements" in what or "instructions" in what:
              lines.extend(sep)
              if show_labels:
--                lines.append("INSTRUCTIONS:")
-+                lines.append("STATEMENTS:")
-             loop_list_width = 35
- 
-             # {{{ topological sort
+                 lines.append("STATEMENTS:")
diff --git a/insn-to-statement.sh b/insn-to-statement.sh
index 4770f10ca..a6f2eab0c 100644
--- a/insn-to-statement.sh
+++ b/insn-to-statement.sh
@@ -7,5 +7,4 @@ sed -i s/insn/stmt/g $(git ls-files | grep -v compyte)
 for d in kernel codegen transform; do
   git mv loopy/$d/instruction.py loopy/$d/statement.py
 done
-patch -p1 < ./0001-fix-stringify.patch
-patch -p1 < ./0002-fix-stringify.patch
+patch -p1 < ./0003-fix-stringify.patch
-- 
GitLab


From 36d789d0b02fe7cfb39286c69d9d25f318151fdf Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 19 Oct 2017 13:17:07 -0500
Subject: [PATCH 6/8] Update insn-to-statement patches

---
 0003-fix-stringify.patch |  23 ---------
 insn-to-statement.sh     |   2 +-
 stmt-compat-fixes.patch  | 100 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+), 24 deletions(-)
 delete mode 100644 0003-fix-stringify.patch
 create mode 100644 stmt-compat-fixes.patch

diff --git a/0003-fix-stringify.patch b/0003-fix-stringify.patch
deleted file mode 100644
index b9ad0739a..000000000
--- a/0003-fix-stringify.patch
+++ /dev/null
@@ -1,23 +0,0 @@
-diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
-index 68fcca1..d20dddf 100644
---- a/loopy/kernel/__init__.py
-+++ b/loopy/kernel/__init__.py
-@@ -1094,7 +1094,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
-             "tags",
-             "variables",
-             "rules",
--            "statements",
-+            "Statements",
-+            "instructions",
-             "Dependencies",
-             "schedule",
-             ])
-@@ -1171,7 +1172,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
-             for rule_name in natsorted(six.iterkeys(kernel.substitutions)):
-                 lines.append(str(kernel.substitutions[rule_name]))
- 
--        if "statements" in what:
-+        if "Statements" in what or "instructions" in what:
-             lines.extend(sep)
-             if show_labels:
-                 lines.append("STATEMENTS:")
diff --git a/insn-to-statement.sh b/insn-to-statement.sh
index a6f2eab0c..26ca317aa 100644
--- a/insn-to-statement.sh
+++ b/insn-to-statement.sh
@@ -7,4 +7,4 @@ sed -i s/insn/stmt/g $(git ls-files | grep -v compyte)
 for d in kernel codegen transform; do
   git mv loopy/$d/instruction.py loopy/$d/statement.py
 done
-patch -p1 < ./0003-fix-stringify.patch
+patch -p1 < ./stmt-compat-fixes.patch
diff --git a/stmt-compat-fixes.patch b/stmt-compat-fixes.patch
new file mode 100644
index 000000000..738a37241
--- /dev/null
+++ b/stmt-compat-fixes.patch
@@ -0,0 +1,100 @@
+diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
+index 68fcca1..6d788df 100644
+--- a/loopy/kernel/__init__.py
++++ b/loopy/kernel/__init__.py
+@@ -186,7 +186,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+ 
+     # {{{ constructor
+ 
+-    def __init__(self, domains, statements, args=[], schedule=None,
++    def __init__(self, domains, statements=None, args=[], schedule=None,
+             name="loopy_kernel",
+             preambles=[],
+             preamble_generators=[],
+@@ -213,7 +213,12 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+             state=kernel_state.INITIAL,
+             target=None,
+ 
+-            overridden_get_grid_sizes_for_stmt_ids=None):
++            overridden_get_grid_sizes_for_stmt_ids=None,
++
++            # compat
++            instructions=None,
++            overridden_get_grid_sizes_for_insn_ids=None,
++            ):
+         """
+         :arg overridden_get_grid_sizes_for_stmt_ids: A callable. When kernels get
+             intersected in slab decomposition, their grid sizes shouldn't
+@@ -224,6 +229,23 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+             from loopy.kernel.tools import SetOperationCacheManager
+             cache_manager = SetOperationCacheManager()
+ 
++        if statements is not None and instructions is not None:
++            raise TypeError("may not specify both instructions and statements")
++        elif statements is None and instructions is None:
++            raise TypeError(
++                    "must specify exactly one of instructions and statements")
++        elif instructions is not None:
++            statements = instructions
++
++        if (overridden_get_grid_sizes_for_stmt_ids is not None
++                and overridden_get_grid_sizes_for_stmt_ids is not None):
++            raise TypeError("may not specify both "
++                    "overridden_get_grid_sizes_for_stmt_ids "
++                    "and overridden_get_grid_sizes_for_insn_ids{")
++        elif overridden_get_grid_sizes_for_insn_ids is not None:
++            overridden_get_grid_sizes_for_stmt_ids = \
++                    overridden_get_grid_sizes_for_insn_ids
++
+         # {{{ process assumptions
+ 
+         if assumptions is None:
+@@ -264,6 +286,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+         assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains)
+         assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT
+ 
++
+         ImmutableRecordWithoutPickling.__init__(self,
+                 domains=domains,
+                 statements=statements,
+@@ -1094,7 +1117,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+             "tags",
+             "variables",
+             "rules",
+-            "statements",
++            "Statements",
++            "instructions",
+             "Dependencies",
+             "schedule",
+             ])
+@@ -1171,7 +1195,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+             for rule_name in natsorted(six.iterkeys(kernel.substitutions)):
+                 lines.append(str(kernel.substitutions[rule_name]))
+ 
+-        if "statements" in what:
++        if "Statements" in what or "instructions" in what:
+             lines.extend(sep)
+             if show_labels:
+                 lines.append("STATEMENTS:")
+@@ -1387,6 +1411,21 @@ class LoopKernel(ImmutableRecordWithoutPickling):
+ 
+     # }}}
+ 
++    # {{{ "instruction" compat goop
++
++    @property
++    def id_to_insn(self):
++        return self.id_to_stmt
++
++    @property
++    def instructions(self):
++        return self.statements
++
++    def get_instruction_id_generator(self, based_on="insn"):
++        return self.get_statement_id_generator(based_on)
++
++    # }}}
++
+ # }}}
+ 
+ # vim: foldmethod=marker
-- 
GitLab


From 6f8e4fd689b1a97e82bab4adf1ed50e2018d9756 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 19 Oct 2017 13:18:23 -0500
Subject: [PATCH 7/8] Update insn-to-statement patches

---
 stmt-compat-fixes.patch | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/stmt-compat-fixes.patch b/stmt-compat-fixes.patch
index 738a37241..7c7c9322e 100644
--- a/stmt-compat-fixes.patch
+++ b/stmt-compat-fixes.patch
@@ -49,14 +49,6 @@ index 68fcca1..6d788df 100644
          # {{{ process assumptions
  
          if assumptions is None:
-@@ -264,6 +286,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
-         assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains)
-         assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT
- 
-+
-         ImmutableRecordWithoutPickling.__init__(self,
-                 domains=domains,
-                 statements=statements,
 @@ -1094,7 +1117,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
              "tags",
              "variables",
-- 
GitLab


From 7533143210099e2ccc48e910e1e2ba77d194228f Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 19 Oct 2017 13:18:36 -0500
Subject: [PATCH 8/8] Rename instruction -> statement

---
 MEMO                                          |  22 +-
 README.rst                                    |   2 +-
 doc/images/dep-graph-correct.svg              |  10 +-
 doc/images/dep-graph-incorrect.svg            |  10 +-
 doc/images/dep-graph-nesting.svg              |   6 +-
 doc/misc.rst                                  |  12 +-
 doc/ref_creation.rst                          |   2 +-
 doc/ref_kernel.rst                            |  96 +--
 doc/ref_transform.rst                         |  12 +-
 doc/tutorial.rst                              |  76 +-
 .../fortran/ipython-integration-demo.ipynb    |   8 +-
 examples/python/ispc-stream-harness.py        |   4 +-
 insn-to-statement.sh                          |  10 +-
 loopy/__init__.py                             |  48 +-
 loopy/check.py                                | 310 ++++-----
 loopy/codegen/bounds.py                       |  10 +-
 loopy/codegen/control.py                      |  46 +-
 loopy/codegen/loop.py                         |  16 +-
 loopy/codegen/result.py                       |  10 +-
 .../codegen/{instruction.py => statement.py}  |  68 +-
 loopy/diagnostic.py                           |   2 +-
 loopy/frontend/fortran/translator.py          |  36 +-
 loopy/kernel/__init__.py                      | 230 +++---
 loopy/kernel/creation.py                      | 620 ++++++++---------
 loopy/kernel/data.py                          |  12 +-
 loopy/kernel/{instruction.py => statement.py} | 186 ++---
 loopy/kernel/tools.py                         | 398 +++++------
 loopy/loop.py                                 |   4 +-
 loopy/match.py                                |  10 +-
 loopy/maxima.py                               |  36 +-
 loopy/options.py                              |   4 +-
 loopy/preprocess.py                           | 654 +++++++++---------
 loopy/schedule/__init__.py                    | 538 +++++++-------
 loopy/schedule/device_mapping.py              |   6 +-
 loopy/schedule/tools.py                       |  18 +-
 loopy/statistics.py                           |  68 +-
 loopy/symbolic.py                             |  42 +-
 loopy/target/__init__.py                      |   4 +-
 loopy/target/c/__init__.py                    |  36 +-
 loopy/target/cuda.py                          |   8 +-
 loopy/target/ispc.py                          |  14 +-
 loopy/target/opencl.py                        |   8 +-
 loopy/target/python.py                        |   8 +-
 loopy/tools.py                                |   4 +-
 loopy/transform/arithmetic.py                 |  66 +-
 loopy/transform/batch.py                      |   6 +-
 loopy/transform/buffer.py                     |  68 +-
 loopy/transform/data.py                       |  46 +-
 loopy/transform/diff.py                       |  60 +-
 loopy/transform/fusion.py                     |  54 +-
 loopy/transform/ilp.py                        |  44 +-
 loopy/transform/iname.py                      | 200 +++---
 loopy/transform/instruction.py                | 339 ---------
 loopy/transform/padding.py                    |   2 +-
 loopy/transform/precompute.py                 | 138 ++--
 loopy/transform/save.py                       | 146 ++--
 loopy/transform/statement.py                  | 339 +++++++++
 loopy/transform/subst.py                      | 110 +--
 loopy/type_inference.py                       |  18 +-
 proto-tests/test_fem_assembly.py              |  10 +-
 stmt-compat-fixes.patch                       |  34 +-
 test/test_diff.py                             |   2 +-
 test/test_fortran.py                          |   2 +-
 test/test_linalg.py                           |   2 +-
 test/test_loopy.py                            |  84 +--
 test/test_numa_diff.py                        |  26 +-
 test/test_reduction.py                        |   2 +-
 test/test_sem_reagan.py                       |   2 +-
 test/test_transform.py                        |  44 +-
 69 files changed, 2803 insertions(+), 2765 deletions(-)
 rename loopy/codegen/{instruction.py => statement.py} (81%)
 rename loopy/kernel/{instruction.py => statement.py} (88%)
 delete mode 100644 loopy/transform/instruction.py
 create mode 100644 loopy/transform/statement.py

diff --git a/MEMO b/MEMO
index f4e5c34e4..340a3da17 100644
--- a/MEMO
+++ b/MEMO
@@ -10,7 +10,7 @@ Things to consider
 - Depedencies are pointwise for shared loop dimensions
   and global over non-shared ones (between dependent and ancestor)
 
-- multiple insns could fight over which iname gets local axis 0
+- multiple stmts could fight over which iname gets local axis 0
   -> complicated optimization problem
 
 - Every loop in loopy is opened at most once.
@@ -35,7 +35,7 @@ Things to consider
 - Loopy as a data model for implementing custom rewritings
 
 - We won't generate WAW barrier-needing dependencies
-  from one instruction to itself.
+  from one statement to itself.
 
 - Loopy is semi-interactive.
 
@@ -45,7 +45,7 @@ Things to consider
 
 - Dependency on non-local global writes is ill-formed
 
-- No substitution rules allowed on lhs of insns
+- No substitution rules allowed on lhs of stmts
 
 To-do
 ^^^^^
@@ -78,7 +78,7 @@ Fixes:
   old inames may still be around, so the rewrite may or may not have to be
   applied.
 
-- Group instructions by dependency/inames for scheduling, to
+- Group statements by dependency/inames for scheduling, to
   increase sched. scalability
 
 - What if no universally valid precompute base index expression is found?
@@ -109,7 +109,7 @@ Future ideas
 
 - Check for unordered (no-dependency) writes to the same location
 
-- Vanilla C string instructions?
+- Vanilla C string statements?
 
 - Barriers for data exchanged via global vars?
 
@@ -183,7 +183,7 @@ Dealt with
 
 - Add dependencies after the fact
 
-- Scalar insn priority
+- Scalar stmt priority
 
 - ScalarArg is a bad name
   -> renamed to ValueArg
@@ -209,8 +209,8 @@ Dealt with
   -> pending better prefetch spec
   - Prefetch by sample access
 
-- How is intra-instruction ordering of ILP loops going to be determined?
-  (taking into account that it could vary even per-instruction?)
+- How is intra-statement ordering of ILP loops going to be determined?
+  (taking into account that it could vary even per-statement?)
 
 - Sharing of checks across ILP instances
 
@@ -257,7 +257,7 @@ Dealt with
   property.
 
 - Just touching a variable written to by a non-idempotent
-  instruction makes that instruction also not idempotent
+  statement makes that statement also not idempotent
   -> Idempotent renamed to boostable.
   -> Done.
 
@@ -274,7 +274,7 @@ Dealt with
 - Slab decomposition for ILP
   -> I don't think that's possible.
 
-- It is hard to understand error messages that referred to instructions that
+- It is hard to understand error messages that referred to statements that
   are generated during preprocessing.
 
   -> Expose preprocessing to the user so she can inspect the preprocessed
@@ -314,7 +314,7 @@ Dealt with
 
 - Make syntax for iname dependencies
 
-- make syntax for insn dependencies
+- make syntax for stmt dependencies
 
 - Implement get_problems()
 
diff --git a/README.rst b/README.rst
index 0e551fbed..f58a75de1 100644
--- a/README.rst
+++ b/README.rst
@@ -28,7 +28,7 @@ It can capture the following types of optimizations:
 * Loopy Unrolling
 * Loop tiling with efficient handling of boundary cases
 * Prefetching/copy optimizations
-* Instruction level parallelism
+* Statement level parallelism
 * and many more
 
 Loopy targets array-type computations, such as the following:
diff --git a/doc/images/dep-graph-correct.svg b/doc/images/dep-graph-correct.svg
index 397cb2d10..0bd743391 100644
--- a/doc/images/dep-graph-correct.svg
+++ b/doc/images/dep-graph-correct.svg
@@ -32,15 +32,15 @@
 <text text-anchor="middle" x="122" y="-163.9" font-family="Times Roman,serif" font-size="14.00">out[(j, i)] &lt;&#45; a[(i, j)]</text>
 </a>
 </g>
-<!-- insn -->
-<g id="node2" class="node"><title>insn</title>
-<a xlink:title="insn">
+<!-- stmt -->
+<g id="node2" class="node"><title>stmt</title>
+<a xlink:title="stmt">
 <polygon fill="none" stroke="black" points="220,-60 24,-60 24,-24 220,-24 220,-60"/>
 <text text-anchor="middle" x="122" y="-37.9" font-family="Times Roman,serif" font-size="14.00">out[(ii, jj)] &lt;&#45; 2*out[(ii, jj)]</text>
 </a>
 </g>
-<!-- transpose&#45;&gt;insn -->
-<g id="edge2" class="edge"><title>transpose&#45;&gt;insn</title>
+<!-- transpose&#45;&gt;stmt -->
+<g id="edge2" class="edge"><title>transpose&#45;&gt;stmt</title>
 <path fill="none" stroke="black" d="M122,-149.947C122,-129.082 122,-94.4928 122,-70.1006"/>
 <polygon fill="black" stroke="black" points="125.5,-70.0192 122,-60.0192 118.5,-70.0193 125.5,-70.0192"/>
 </g>
diff --git a/doc/images/dep-graph-incorrect.svg b/doc/images/dep-graph-incorrect.svg
index 363080aef..d072248af 100644
--- a/doc/images/dep-graph-incorrect.svg
+++ b/doc/images/dep-graph-incorrect.svg
@@ -24,15 +24,15 @@
 <text text-anchor="middle" x="114" y="-109.9" font-family="Times Roman,serif" font-size="14.00">out[(j, i)] &lt;&#45; a[(i, j)]</text>
 </a>
 </g>
-<!-- insn -->
-<g id="node2" class="node"><title>insn</title>
-<a xlink:title="insn">
+<!-- stmt -->
+<g id="node2" class="node"><title>stmt</title>
+<a xlink:title="stmt">
 <polygon fill="none" stroke="black" points="204,-60 24,-60 24,-24 204,-24 204,-60"/>
 <text text-anchor="middle" x="114" y="-37.9" font-family="Times Roman,serif" font-size="14.00">out[(i, j)] &lt;&#45; 2*out[(i, j)]</text>
 </a>
 </g>
-<!-- transpose&#45;&gt;insn -->
-<g id="edge2" class="edge"><title>transpose&#45;&gt;insn</title>
+<!-- transpose&#45;&gt;stmt -->
+<g id="edge2" class="edge"><title>transpose&#45;&gt;stmt</title>
 <path fill="none" stroke="black" d="M114,-95.8314C114,-88.131 114,-78.9743 114,-70.4166"/>
 <polygon fill="black" stroke="black" points="117.5,-70.4132 114,-60.4133 110.5,-70.4133 117.5,-70.4132"/>
 </g>
diff --git a/doc/images/dep-graph-nesting.svg b/doc/images/dep-graph-nesting.svg
index 72cb9c463..a50ca0509 100644
--- a/doc/images/dep-graph-nesting.svg
+++ b/doc/images/dep-graph-nesting.svg
@@ -17,9 +17,9 @@
 <polygon fill="none" stroke="black" points="16,-16 16,-93 130,-93 130,-16 16,-16"/>
 <text text-anchor="middle" x="73" y="-76.4" font-family="Times Roman,serif" font-size="14.00">i</text>
 </g>
-<!-- insn -->
-<g id="node1" class="node"><title>insn</title>
-<a xlink:title="insn">
+<!-- stmt -->
+<g id="node1" class="node"><title>stmt</title>
+<a xlink:title="stmt">
 <polygon fill="none" stroke="black" points="122,-60 24,-60 24,-24 122,-24 122,-60"/>
 <text text-anchor="middle" x="73" y="-37.9" font-family="Times Roman,serif" font-size="14.00">a[(i, j)] &lt;&#45; 0</text>
 </a>
diff --git a/doc/misc.rst b/doc/misc.rst
index 9db3b85a7..4dba9c7c1 100644
--- a/doc/misc.rst
+++ b/doc/misc.rst
@@ -138,11 +138,11 @@ This example is included in the :mod:`loopy` distribution as
 What this does is find nearby "centers" satisfying some criteria
 for an array of points ("targets").
 
-Specifying dependencies for groups of instructions is cumbersome. Help?
+Specifying dependencies for groups of statements is cumbersome. Help?
 -----------------------------------------------------------------------
 
-You can now specify instruction ID prefixes and dependencies for groups
-of instructions, like this::
+You can now specify statement ID prefixes and dependencies for groups
+of statements, like this::
 
     with {id_prefix=init_m}
         <> m[0] =   ...
@@ -253,7 +253,7 @@ This list is always growing, but here are a few pointers:
   Separated array axes must have a fixed size. (See either
   :func:`loopy.split_array_axis`.)
 
-* Realization of Instruction-level parallelism
+* Realization of Statement-level parallelism
 
   Use :func:`loopy.tag_inames` with the ``"ilp"`` tag.
   ILP loops must have a fixed size. (See either
@@ -284,7 +284,7 @@ This list is always growing, but here are a few pointers:
 Uh-oh. I got a scheduling error. Any hints?
 -------------------------------------------
 
-* Make sure that dependencies between instructions are as
+* Make sure that dependencies between statements are as
   you intend.
 
   Use :func:`loopy.show_dependency_graph` to check.
@@ -304,7 +304,7 @@ Uh-oh. I got a scheduling error. Any hints?
 
 * Make sure that your loops are correctly nested.
 
-  Print the kernel to make sure all instructions are within
+  Print the kernel to make sure all statements are within
   the set of inames you intend them to be in.
 
 * One iname is one for loop.
diff --git a/doc/ref_creation.rst b/doc/ref_creation.rst
index 92eff09c9..9cc02be74 100644
--- a/doc/ref_creation.rst
+++ b/doc/ref_creation.rst
@@ -6,7 +6,7 @@
 Reference: Creating Kernels
 ===========================
 
-From Loop Domains and Instructions
+From Loop Domains and Statements
 ----------------------------------
 
 .. autofunction:: make_kernel
diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst
index 3f01b0764..cbf881354 100644
--- a/doc/ref_kernel.rst
+++ b/doc/ref_kernel.rst
@@ -17,7 +17,7 @@ Example::
 A kernel's iteration domain is given by a list of :class:`islpy.BasicSet`
 instances (which parametrically represent multi-dimensional sets of
 tuples of integers).  They define the integer values of the loop variables
-for which instructions (see below) will be executed.
+for which statements (see below) will be executed.
 It is written in :ref:`isl-syntax`.  :mod:`loopy` calls the loop variables
 *inames*. In this case, *i* is the sole iname. The loop
 domain is given as a conjunction of affine equality
@@ -46,7 +46,7 @@ inside of the 'l' loop.
 
 The idea is that domains form a forest (a collection of trees), and a
 "sub-forest" is extracted that covers all the inames for each
-instruction. Each individual sub-tree is then checked for branching,
+statement. Each individual sub-tree is then checked for branching,
 which is ill-formed. It is declared ill-formed because intersecting, in
 the above case, the l, i, and j domains could result in restrictions from the
 i domain affecting the j domain by way of how i affects l--which would
@@ -59,7 +59,7 @@ Inames
 
 Loops are (by default) entered exactly once. This is necessary to preserve
 dependency semantics--otherwise e.g. a fetch could happen inside one loop nest,
-and then the instruction using that fetch could be inside a wholly different
+and then the statement using that fetch could be inside a wholly different
 loop nest.
 
 .. _isl-syntax:
@@ -134,7 +134,7 @@ Tag                             Meaning
 ``"l.N"``                       Local (intra-group) axis N ("local")
 ``"g.N"``                       Group-number axis N ("group")
 ``"unr"``                       Unroll
-``"ilp"`` | ``"ilp.unr"``       Unroll using instruction-level parallelism
+``"ilp"`` | ``"ilp.unr"``       Unroll using statement-level parallelism
 ``"ilp.seq"``                   Realize parallel iname as innermost loop
 ``"like.INAME"``                Can be used when tagging inames to tag like another
 ``"unused.g"`` | ``"unused.l"`` Can be to tag as the next unused group/local axis
@@ -147,18 +147,18 @@ Tag                             Meaning
 * Restricts loops to be innermost
 * Duplicates reduction storage for any reductions nested around ILP usage
 * Causes a loop (unrolled or not) to be opened/generated for each
-  involved instruction
+  involved statement
 
 .. }}}
 
-.. _instructions:
+.. _statements:
 
-Instructions
+Statements
 ------------
 
 .. {{{
 
-.. autoclass:: InstructionBase
+.. autoclass:: StatementBase
 
 .. _assignments:
 
@@ -172,18 +172,18 @@ Assignment objects
 Textual Assignment Syntax
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The general syntax of an instruction is a simple assignment::
+The general syntax of an statement is a simple assignment::
 
     LHS[i,j,k] = EXPRESSION
 
 Several extensions of this syntax are defined, as discussed below.  They
 may be combined freely.
 
-You can also use an instruction to declare a new temporary variable. (See
+You can also use an statement to declare a new temporary variable. (See
 :ref:`temporaries`.) See :ref:`types` for what types are acceptable. If the
 ``LHS`` has a subscript, bounds on the indices are inferred (which must be
 constants at the time of kernel creation) and the declared temporary is
-created as an array. Instructions declaring temporaries have the following
+created as an array. Statements declaring temporaries have the following
 form::
 
     <temp_var_type> LHS[i,j,k] = EXPRESSION
@@ -193,31 +193,31 @@ automatically. This uses the following syntax::
 
     <> LHS[i,j,k] = EXPRESSION
 
-Lastly, each instruction may optionally have a number of attributes
+Lastly, each statement may optionally have a number of attributes
 specified, using the following format::
 
     LHS[i,j,k] = EXPRESSION {attr1,attr2=value1:value2}
 
 These are usually key-value pairs. The following attributes are recognized:
 
-* ``id=value`` sets the instruction's identifier to ``value``. ``value``
+* ``id=value`` sets the statement's identifier to ``value``. ``value``
   must be unique within the kernel. This identifier is used to refer to the
-  instruction after it has been created, such as from ``dep`` attributes
+  statement after it has been created, such as from ``dep`` attributes
   (see below) or from :mod:`context matches <loopy.match>`.
 
-* ``id_prefix=value`` also sets the instruction's identifier, however
+* ``id_prefix=value`` also sets the statement's identifier, however
   uniqueness is ensured by loopy itself, by appending further components
   (often numbers) to the given ``id_prefix``.
 
-* ``inames=i:j:k`` forces the instruction to reside within the loops over
+* ``inames=i:j:k`` forces the statement to reside within the loops over
   :ref:`inames` ``i``, ``j`` and ``k`` (and only those).
 
   .. note::
 
-      The default for the inames that the instruction depends on is
-      the inames used in the instruction itself plus the common
+      The default for the inames that the statement depends on is
+      the inames used in the statement itself plus the common
       subset of inames shared by writers of all variables read by the
-      instruction.
+      statement.
 
       You can add a plus sign ("``+``") to the front of this option
       value to indicate that you would like the inames you specify here
@@ -232,9 +232,9 @@ These are usually key-value pairs. The following attributes are recognized:
   This is a shortcut for calling :func:`loopy.duplicate_inames` later
   (once the kernel is created).
 
-* ``dep=id1:id2`` creates a dependency of this instruction on the
-  instructions with identifiers ``id1`` and ``id2``. The meaning of this
-  dependency is that the code generated for this instruction is required to
+* ``dep=id1:id2`` creates a dependency of this statement on the
+  statements with identifiers ``id1`` and ``id2``. The meaning of this
+  dependency is that the code generated for this statement is required to
   appear textually after all of these dependees' generated code.
 
   Identifiers here are allowed to be wildcards as defined by the Python
@@ -246,14 +246,14 @@ These are usually key-value pairs. The following attributes are recognized:
       Since specifying all possible dependencies is cumbersome and
       error-prone, :mod:`loopy` employs a heuristic to automatically find
       dependencies. Specifically, :mod:`loopy` will automatically add
-      a dependency to an instruction reading a variable if there is
-      exactly one instruction writing that variable. ("Variable" here may
+      a dependency to an statement reading a variable if there is
+      exactly one statement writing that variable. ("Variable" here may
       mean either temporary variable or kernel argument.)
 
       If each variable in a kernel is only written once, then this
       heuristic should be able to compute all required dependencies.
 
-      Conversely, if a variable is written by two different instructions,
+      Conversely, if a variable is written by two different statements,
       all ordering around that variable needs to be specified explicitly.
       It is recommended to use :func:`get_dot_dependency_graph` to
       visualize the dependency graph of possible orderings.
@@ -262,14 +262,14 @@ These are usually key-value pairs. The following attributes are recognized:
       heuristic and indicate that the specified list of dependencies is
       exhaustive.
 
-* ``dep_query=...`` provides an alternative way of specifying instruction
+* ``dep_query=...`` provides an alternative way of specifying statement
   dependencies. The given string is parsed as a match expression object by
   :func:`loopy.match.parse_match`. Upon kernel generation, this match
-  expression is used to match instructions in the kernel and add them as
+  expression is used to match statements in the kernel and add them as
   dependencies.
 
 * ``nosync=id1:id2`` prescribes that no barrier synchronization is necessary
-  for the instructions with identifiers ``id1`` and ``id2``, even if a
+  for the statements with identifiers ``id1`` and ``id2``, even if a
   dependency chain exists and variables are accessed in an apparently racy
   way.
 
@@ -287,8 +287,8 @@ These are usually key-value pairs. The following attributes are recognized:
   * `any`
 
   As an example, ``nosync=id1@local:id2@global`` prescribes that no local
-  synchronization is needed with instruction ``id1`` and no global
-  synchronization is needed with instruction ``id2``.
+  synchronization is needed with statement ``id1`` and no global
+  synchronization is needed with statement ``id2``.
 
   ``nosync=id1@any`` has the same effect as ``nosync=id1``.
 
@@ -296,25 +296,25 @@ These are usually key-value pairs. The following attributes are recognized:
   just like ``dep_query`` and ``dep``. As with ``nosync``, ``nosync_query``
   accepts an optional `@scope` suffix.
 
-* ``priority=integer`` sets the instructions priority to the value
-  ``integer``. Instructions with higher priority will be scheduled sooner,
+* ``priority=integer`` sets the statements priority to the value
+  ``integer``. Statements with higher priority will be scheduled sooner,
   if possible. Note that the scheduler may still schedule a lower-priority
-  instruction ahead of a higher-priority one if loop orders or dependencies
+  statement ahead of a higher-priority one if loop orders or dependencies
   require it.
 
-* ``if=variable1:variable2`` Only execute this instruction if all condition
+* ``if=variable1:variable2`` Only execute this statement if all condition
   variables (which must be scalar variables) evaluate to ``true`` (as
   defined by C).
 
-* ``tags=tag1:tag2`` Apply tags to this instruction that can then be used
+* ``tags=tag1:tag2`` Apply tags to this statement that can then be used
   for :ref:`context-matching`.
 
-* ``groups=group1:group2`` Make this instruction part of the given
-  instruction groups. See :class:`InstructionBase.groups`.
+* ``groups=group1:group2`` Make this statement part of the given
+  statement groups. See :class:`StatementBase.groups`.
 
-* ``conflicts_grp=group1:group2`` Make this instruction conflict with the
-  given instruction groups. See
-  :class:`InstructionBase.conflicts_with_groups`.
+* ``conflicts_grp=group1:group2`` Make this statement conflict with the
+  given statement groups. See
+  :class:`StatementBase.conflicts_with_groups`.
 
 * ``atomic`` The update embodied by the assignment is carried out
   atomically. See :attr:`Assignment.atomicity` for precise semantics.
@@ -340,15 +340,15 @@ Loopy's expressions are a slight superset of the expressions supported by
 TODO: Functions
 TODO: Reductions
 
-Function Call Instructions
+Function Call Statements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-.. autoclass:: CallInstruction
+.. autoclass:: CallStatement
 
-C Block Instructions
+C Block Statements
 ^^^^^^^^^^^^^^^^^^^^
 
-.. autoclass:: CInstruction
+.. autoclass:: CStatement
 
 Atomic Operations
 ^^^^^^^^^^^^^^^^^
@@ -363,15 +363,15 @@ Atomic Operations
 
 .. autoclass:: AtomicUpdate
 
-No-Op Instruction
+No-Op Statement
 ^^^^^^^^^^^^^^^^^
 
-.. autoclass:: NoOpInstruction
+.. autoclass:: NoOpStatement
 
-Barrier Instructions
+Barrier Statements
 ^^^^^^^^^^^^^^^^^^^^
 
-.. autoclass:: BarrierInstruction
+.. autoclass:: BarrierStatement
 
 .. }}}
 
diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst
index d293e3ebe..a3e43c1fb 100644
--- a/doc/ref_transform.rst
+++ b/doc/ref_transform.rst
@@ -59,18 +59,18 @@ Padding Data
 
 .. autofunction:: add_padding
 
-Manipulating Instructions
+Manipulating Statements
 -------------------------
 
-.. autofunction:: set_instruction_priority
+.. autofunction:: set_statement_priority
 
 .. autofunction:: add_dependency
 
-.. autofunction:: remove_instructions
+.. autofunction:: remove_statements
 
-.. autofunction:: replace_instruction_ids
+.. autofunction:: replace_statement_ids
 
-.. autofunction:: tag_instructions
+.. autofunction:: tag_statements
 
 .. autofunction:: add_nosync
 
@@ -135,7 +135,7 @@ Setting options
 Matching contexts
 -----------------
 
-TODO: Matching instruction tags
+TODO: Matching statement tags
 
 .. automodule:: loopy.match
 
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index 8b8538725..20b3610e0 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -88,7 +88,7 @@ The parts that you see here are the two main components of a loopy kernel:
   passed to the kernel by the user that, in this case, determines the
   length of the vector being multiplied.
 
-* The **instructions** to be executed. These are generally scalar
+* The **statements** to be executed. These are generally scalar
   assignments between array elements, consisting of a left hand
   side and a right hand side. See :ref:`assignments` for the
   full syntax of an assignment.
@@ -121,9 +121,9 @@ always see loopy's view of a kernel by printing it.
     INAME IMPLEMENTATION TAGS:
     i: None
     ---------------------------------------------------------------------------
-    INSTRUCTIONS:
+    STATEMENTS:
     for i
-      out[i] = 2*a[i]  {id=insn}
+      out[i] = 2*a[i]  {id=stmt}
     end i
     ---------------------------------------------------------------------------
 
@@ -132,7 +132,7 @@ than there was in the input. Most of this comes from default values that
 loopy assumes to cover common use cases. These defaults can all be
 overridden.
 
-We've seen the domain and the instructions above, and we'll discuss the
+We've seen the domain and the statements above, and we'll discuss the
 'iname-to-tag-map' in :ref:`implementing-inames`. The remaining big chunk
 of added information is in the 'arguments' section, where we observe the
 following:
@@ -307,13 +307,13 @@ that:
   ``i==17``. Your program is only correct if it produces a valid result
   irrespective of this ordering.
 
-* In addition, there is (by default) no ordering between instructions
-  either. In other words, loopy is free to execute the instructions above
+* In addition, there is (by default) no ordering between statements
+  either. In other words, loopy is free to execute the statements above
   in any order whatsoever.
 
 Reading the above two rules, you'll notice that our transpose-and-multiply
 kernel is incorrect, because it only computes the desired result if the
-first instruction completes before the second one. To fix this, we declare
+first statement completes before the second one. To fix this, we declare
 an explicit dependency:
 
 .. doctest::
@@ -327,8 +327,8 @@ an explicit dependency:
     ...     """)
 
 ``{id=transpose}`` assigns the identifier *transpose* to the first
-instruction, and ``{dep=transpose}`` declares a dependency of the second
-instruction on the first. Looking at loopy's view of this kernel, we see
+statement, and ``{dep=transpose}`` declares a dependency of the second
+statement on the first. Looking at loopy's view of this kernel, we see
 that these dependencies show up there, too:
 
 .. doctest::
@@ -340,14 +340,14 @@ that these dependencies show up there, too:
     ...
     ---------------------------------------------------------------------------
     DEPENDENCIES: (use loopy.show_dependency_graph to visualize)
-    insn : transpose
+    stmt : transpose
     ---------------------------------------------------------------------------
 
 These dependencies are in a ``dependent : prerequisite`` format that should
 be familiar if you have previously dealt with Makefiles. For larger
 kernels, these dependency lists can become quite verbose, and there is an
 increasing risk that required dependencies are missed. To help catch these,
-loopy can also show an instruction dependency graph, using
+loopy can also show an statement dependency graph, using
 :func:`loopy.show_dependency_graph`:
 
 .. image:: images/dep-graph-incorrect.svg
@@ -360,16 +360,16 @@ graph will open in a browser window.
 Since manually notating lots of dependencies is cumbersome, loopy has
 a heuristic:
 
-    If a variable is written by exactly one instruction, then all
-    instructions reading that variable will automatically depend on the
-    writing instruction.
+    If a variable is written by exactly one statement, then all
+    statements reading that variable will automatically depend on the
+    writing statement.
 
 The intent of this heuristic is to cover the common case of a
 precomputed result being stored and used many times. Generally, these
 dependencies are *in addition* to any manual dependencies added via
 ``{dep=...}``.  It is possible (but rare) that the heuristic adds undesired
 dependencies.  In this case, ``{dep=*...}`` (i.e. a leading asterisk) to
-prevent the heuristic from adding dependencies for this instruction.
+prevent the heuristic from adding dependencies for this statement.
 
 Loops and dependencies
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -395,7 +395,7 @@ Let us take a look at the generated code for the above kernel:
         }
     }
 
-While our requested instruction ordering has been obeyed, something is
+While our requested statement ordering has been obeyed, something is
 still not right:
 
 .. doctest::
@@ -404,7 +404,7 @@ still not right:
     False
 
 For the kernel to perform the desired computation, *all
-instances* (loop iterations) of the first instruction need to be completed,
+instances* (loop iterations) of the first statement need to be completed,
 not just the one for the current values of *(i, j)*.
 
     Dependencies in loopy act *within* the largest common set of shared
@@ -960,7 +960,7 @@ Consider the following example:
         a_temp[lid(0)] = a[16 * gid(0) + lid(0)];
         acc_k = 0.0f;
       }
-      barrier(CLK_LOCAL_MEM_FENCE) /* for a_temp (insn_0_k_update depends on insn) */;
+      barrier(CLK_LOCAL_MEM_FENCE) /* for a_temp (stmt_0_k_update depends on stmt) */;
       if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
       {
         for (int k = 0; k <= 15; ++k)
@@ -971,8 +971,8 @@ Consider the following example:
 
 Observe that *a_temp* was automatically placed in local memory, because
 it is written in parallel across values of the group-local iname
-*i_inner*. In addition, :mod:`loopy` has emitted a barrier instruction to
-achieve the :ref:`ordering` specified by the instruction dependencies.
+*i_inner*. In addition, :mod:`loopy` has emitted a barrier statement to
+achieve the :ref:`ordering` specified by the statement dependencies.
 
 (The ``priority=10`` attribute was added to make the output of the test
 deterministic.)
@@ -1045,7 +1045,7 @@ earlier:
         acc_k = 0.0f;
       if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
         a_fetch[lid(0)] = a[16 * gid(0) + lid(0)];
-      barrier(CLK_LOCAL_MEM_FENCE) /* for a_fetch (insn_k_update depends on a_fetch_rule) */;
+      barrier(CLK_LOCAL_MEM_FENCE) /* for a_fetch (stmt_k_update depends on a_fetch_rule) */;
       if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
       {
         for (int k = 0; k <= 15; ++k)
@@ -1108,7 +1108,7 @@ work item:
 :mod:`loopy` supports two kinds of barriers:
 
 * *Local barriers* ensure consistency of local memory accesses to items within
-  *the same* work group. This synchronizes with all instructions in the work
+  *the same* work group. This synchronizes with all statements in the work
   group.
 
 * *Global barriers* ensure consistency of global memory accesses
@@ -1123,7 +1123,7 @@ all work items reach the same barrier, the kernel will hang during execution.
 Barrier insertion
 ~~~~~~~~~~~~~~~~~
 
-By default, :mod:`loopy` inserts local barriers between two instructions when it
+By default, :mod:`loopy` inserts local barriers between two statements when it
 detects that a dependency involving local memory may occur across work items. To
 see this in action, take a look at the section on :ref:`local_temporaries`.
 
@@ -1156,11 +1156,11 @@ this, :mod:`loopy` will complain that global barrier needs to be inserted:
    >>> cgr = lp.generate_code_v2(knl)
    Traceback (most recent call last):
    ...
-   MissingBarrierError: Dependency 'rotate depends on maketmp' (for variable 'arr') requires synchronization by a global barrier (add a 'no_sync_with' instruction option to state that no synchronization is needed)
+   MissingBarrierError: Dependency 'rotate depends on maketmp' (for variable 'arr') requires synchronization by a global barrier (add a 'no_sync_with' statement option to state that no synchronization is needed)
 
-The syntax for a inserting a global barrier instruction is
+The syntax for a inserting a global barrier statement is
 ``... gbarrier``. :mod:`loopy` also supports manually inserting local
-barriers. The syntax for a local barrier instruction is ``... lbarrier``.
+barriers. The syntax for a local barrier statement is ``... lbarrier``.
 
 Saving temporaries across global barriers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1173,7 +1173,7 @@ of how to use :func:`loopy.save_and_reload_temporaries` which is helpful for
 that purpose.
 
 Let us start with an example. Consider the kernel from above with a
-``... gbarrier`` instruction that has already been inserted.
+``... gbarrier`` statement that has already been inserted.
 
 .. doctest::
 
@@ -1202,7 +1202,7 @@ Here is what happens when we try to generate code for the kernel:
    MissingDefinitionError: temporary variable 'tmp' gets used in subkernel 'rotate_v2_0' without a definition (maybe you forgot to call loopy.save_and_reload_temporaries?)
 
 This happens due to the kernel splitting done by :mod:`loopy`. The splitting
-happens when the instruction schedule is generated. To see the schedule, we
+happens when the statement schedule is generated. To see the schedule, we
 should call :func:`loopy.get_one_scheduled_kernel`:
 
    >>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl))
@@ -1222,7 +1222,7 @@ should call :func:`loopy.get_one_scheduled_kernel`:
       6: RETURN FROM KERNEL rotate_v2_0
    ---------------------------------------------------------------------------
 
-As the error message suggests, taking a look at the generated instruction
+As the error message suggests, taking a look at the generated statement
 schedule will show that while ``tmp`` is assigned in the first kernel, the
 assignment to ``tmp`` is not seen by the second kernel. Because the temporary is
 in private memory, it does not persist across calls to device kernels (the same
@@ -1231,13 +1231,13 @@ goes for local temporaries).
 :mod:`loopy` provides a function called
 :func:`loopy.save_and_reload_temporaries` for the purpose of handling the
 task of saving and restoring temporary values across global barriers. This
-function adds instructions to the kernel without scheduling them. That means
+function adds statements to the kernel without scheduling them. That means
 that :func:`loopy.get_one_scheduled_kernel` needs to be called one more time to
-put those instructions into the schedule.
+put those statements into the schedule.
 
    >>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl))
    >>> knl = lp.save_and_reload_temporaries(knl)
-   >>> knl = lp.get_one_scheduled_kernel(knl)  # Schedule added instructions
+   >>> knl = lp.get_one_scheduled_kernel(knl)  # Schedule added statements
    >>> print(knl)
    ---------------------------------------------------------------------------
    KERNEL: rotate_v2
@@ -1461,7 +1461,7 @@ sign that something is amiss:
     >>> evt, (out,) = knl(queue, a=a_mat_dev)
     Traceback (most recent call last):
     ...
-    WriteRaceConditionWarning: in kernel transpose: instruction 'a_fetch_rule' looks invalid: it assigns to indices based on local IDs, but its temporary 'a_fetch' cannot be made local because a write race across the iname(s) 'j_inner' would emerge. (Do you need to add an extra iname to your prefetch?) (add 'write_race_local(a_fetch_rule)' to silenced_warnings kernel attribute to disable)
+    WriteRaceConditionWarning: in kernel transpose: statement 'a_fetch_rule' looks invalid: it assigns to indices based on local IDs, but its temporary 'a_fetch' cannot be made local because a write race across the iname(s) 'j_inner' would emerge. (Do you need to add an extra iname to your prefetch?) (add 'write_race_local(a_fetch_rule)' to silenced_warnings kernel attribute to disable)
 
 When we ask to see the code, the issue becomes apparent:
 
@@ -1495,7 +1495,7 @@ Barriers
 ~~~~~~~~
 
 :mod:`loopy` may infer the need for a barrier when it is not necessary. The
-``no_sync_with`` instruction attribute can be used to resolve this.
+``no_sync_with`` statement attribute can be used to resolve this.
 
 See also :func:`loopy.add_nosync`.
 
@@ -1868,16 +1868,16 @@ Now to make things more interesting, we'll create a kernel with barriers:
         for (int j = 0; j <= 9; ++j)
           for (int i = 0; i <= 49; ++i)
           {
-            barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn rev-depends on insn_0) */;
+            barrier(CLK_LOCAL_MEM_FENCE) /* for c (stmt rev-depends on stmt_0) */;
             c[990 * i + 99 * j + lid(0) + 1] = 2 * a[980 * i + 98 * j + lid(0) + 1];
-            barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn_0 depends on insn) */;
+            barrier(CLK_LOCAL_MEM_FENCE) /* for c (stmt_0 depends on stmt) */;
             e[980 * i + 98 * j + lid(0) + 1] = c[990 * i + 99 * j + 1 + lid(0) + 1] + c[990 * i + 99 * j + -1 + lid(0) + 1];
           }
       }
     }
 
-In this kernel, when a thread performs the second instruction it uses data
-produced by *different* threads during the first instruction. Because of this,
+In this kernel, when a thread performs the second statement it uses data
+produced by *different* threads during the first statement. Because of this,
 barriers are required for correct execution, so loopy inserts them. Now we'll
 count the barriers using :func:`loopy.get_synchronization_map`:
 
diff --git a/examples/fortran/ipython-integration-demo.ipynb b/examples/fortran/ipython-integration-demo.ipynb
index c2b34f1d1..27347bd94 100644
--- a/examples/fortran/ipython-integration-demo.ipynb
+++ b/examples/fortran/ipython-integration-demo.ipynb
@@ -82,8 +82,8 @@
         "INAME IMPLEMENTATION TAGS:\n",
         "i: None\n",
         "---------------------------------------------------------------------------\n",
-        "INSTRUCTIONS:\n",
-        "[i]                                  out[i] <- a   # insn0\n",
+        "STATEMENTS:\n",
+        "[i]                                  out[i] <- a   # stmt0\n",
         "---------------------------------------------------------------------------\n"
        ]
       }
@@ -167,8 +167,8 @@
         "i_inner: l.0\n",
         "i_outer: g.0\n",
         "---------------------------------------------------------------------------\n",
-        "INSTRUCTIONS:\n",
-        "[i_inner,i_outer]                    out[i_inner + i_outer*128] <- a   # insn0\n",
+        "STATEMENTS:\n",
+        "[i_inner,i_outer]                    out[i_inner + i_outer*128] <- a   # stmt0\n",
         "---------------------------------------------------------------------------\n"
        ]
       }
diff --git a/examples/python/ispc-stream-harness.py b/examples/python/ispc-stream-harness.py
index fa581d426..edf49c8aa 100644
--- a/examples/python/ispc-stream-harness.py
+++ b/examples/python/ispc-stream-harness.py
@@ -59,10 +59,10 @@ def main():
     with open("tasksys.cpp", "r") as ts_file:
         tasksys_source = ts_file.read()
 
-    def make_knl(name, insn, vars):
+    def make_knl(name, stmt, vars):
         knl = lp.make_kernel(
                 "{[i]: 0<=i<n}",
-                insn,
+                stmt,
                 target=lp.ISPCTarget(), index_dtype=INDEX_DTYPE,
                 name="stream_"+name+"_tasks")
 
diff --git a/insn-to-statement.sh b/insn-to-statement.sh
index 26ca317aa..f2c82681f 100644
--- a/insn-to-statement.sh
+++ b/insn-to-statement.sh
@@ -1,10 +1,10 @@
 set -x
 set -e
-sed -i s/Instruction/Statement/g $(git ls-files | grep -v compyte)
-sed -i s/instruction/statement/g $(git ls-files | grep -v compyte)
-sed -i s/INSTRUCTION/STATEMENT/g $(git ls-files | grep -v compyte)
-sed -i s/insn/stmt/g $(git ls-files | grep -v compyte)
+sed -i s/Statement/Statement/g $(git ls-files | grep -v compyte)
+sed -i s/statement/statement/g $(git ls-files | grep -v compyte)
+sed -i s/STATEMENT/STATEMENT/g $(git ls-files | grep -v compyte)
+sed -i s/stmt/stmt/g $(git ls-files | grep -v compyte)
 for d in kernel codegen transform; do
-  git mv loopy/$d/instruction.py loopy/$d/statement.py
+  git mv loopy/$d/statement.py loopy/$d/statement.py
 done
 patch -p1 < ./stmt-compat-fixes.patch
diff --git a/loopy/__init__.py b/loopy/__init__.py
index aa1d43172..fa747bbf4 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -36,11 +36,11 @@ from loopy.diagnostic import LoopyError, LoopyWarning
 from loopy.library.function import (
         default_function_mangler, single_arg_function_mangler)
 
-from loopy.kernel.instruction import (
+from loopy.kernel.statement import (
         memory_ordering, memory_scope, VarAtomicity, AtomicInit, AtomicUpdate,
-        InstructionBase,
-        MultiAssignmentBase, Assignment, ExpressionInstruction,
-        CallInstruction, CInstruction, NoOpInstruction, BarrierInstruction)
+        StatementBase,
+        MultiAssignmentBase, Assignment, ExpressionStatement,
+        CallStatement, CStatement, NoOpStatement, BarrierStatement)
 from loopy.kernel.data import (
         auto,
         KernelArgument,
@@ -58,7 +58,7 @@ from loopy.kernel.tools import (
         get_global_barrier_order,
         find_most_recent_global_barrier,
         get_subkernels,
-        get_subkernel_to_insn_id_map)
+        get_subkernel_to_stmt_id_map)
 from loopy.kernel.creation import make_kernel, UniqueName
 from loopy.library.reduction import register_reduction_parser
 
@@ -72,14 +72,14 @@ from loopy.transform.iname import (
         affine_map_inames, find_unused_axis_tag,
         make_reduction_inames_unique,
         has_schedulable_iname_nesting, get_iname_duplication_options,
-        add_inames_to_insn)
-
-from loopy.transform.instruction import (
-        find_instructions, map_instructions,
-        set_instruction_priority, add_dependency,
-        remove_instructions,
-        replace_instruction_ids,
-        tag_instructions,
+        add_inames_to_stmt)
+
+from loopy.transform.statement import (
+        find_statements, map_statements,
+        set_statement_priority, add_dependency,
+        remove_statements,
+        replace_statement_ids,
+        tag_statements,
         add_nosync)
 
 from loopy.transform.data import (
@@ -153,10 +153,10 @@ __all__ = [
 
         "memory_ordering", "memory_scope", "VarAtomicity",
         "AtomicInit", "AtomicUpdate",
-        "InstructionBase",
-        "MultiAssignmentBase", "Assignment", "ExpressionInstruction",
-        "CallInstruction", "CInstruction", "NoOpInstruction",
-        "BarrierInstruction",
+        "StatementBase",
+        "MultiAssignmentBase", "Assignment", "ExpressionStatement",
+        "CallStatement", "CStatement", "NoOpStatement",
+        "BarrierStatement",
 
         "KernelArgument",
         "ValueArg", "GlobalArg", "ConstantArg", "ImageArg",
@@ -180,7 +180,7 @@ __all__ = [
         "affine_map_inames", "find_unused_axis_tag",
         "make_reduction_inames_unique",
         "has_schedulable_iname_nesting", "get_iname_duplication_options",
-        "add_inames_to_insn",
+        "add_inames_to_stmt",
 
         "add_prefetch", "change_arg_to_image",
         "tag_array_axes", "tag_data_axes",
@@ -189,11 +189,11 @@ __all__ = [
         "alias_temporaries", "set_argument_order",
         "rename_argument", "set_temporary_scope",
 
-        "find_instructions", "map_instructions",
-        "set_instruction_priority", "add_dependency",
-        "remove_instructions",
-        "replace_instruction_ids",
-        "tag_instructions",
+        "find_statements", "map_statements",
+        "set_statement_priority", "add_dependency",
+        "remove_statements",
+        "replace_statement_ids",
+        "tag_statements",
         "add_nosync",
 
         "extract_subst", "expand_subst", "assignment_to_subst",
@@ -224,7 +224,7 @@ __all__ = [
         "get_global_barrier_order",
         "find_most_recent_global_barrier",
         "get_subkernels",
-        "get_subkernel_to_insn_id_map",
+        "get_subkernel_to_stmt_id_map",
 
         "infer_unknown_types",
 
diff --git a/loopy/check.py b/loopy/check.py
index 11e045d86..53ced2802 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -66,45 +66,45 @@ def check_identifiers_in_subst_rules(knl):
 VALID_NOSYNC_SCOPES = frozenset(["local", "global", "any"])
 
 
-def check_insn_attributes(kernel):
-    all_insn_ids = set(insn.id for insn in kernel.instructions)
+def check_stmt_attributes(kernel):
+    all_stmt_ids = set(stmt.id for stmt in kernel.statements)
 
-    for insn in kernel.instructions:
-        if not insn.within_inames <= kernel.all_inames():
-            raise LoopyError("insn '%s' has unknown forced iname "
+    for stmt in kernel.statements:
+        if not stmt.within_inames <= kernel.all_inames():
+            raise LoopyError("stmt '%s' has unknown forced iname "
                     "dependencies: %s"
-                    % (insn.id, ", ".join(
-                        insn.within_inames - kernel.all_inames())))
+                    % (stmt.id, ", ".join(
+                        stmt.within_inames - kernel.all_inames())))
 
-        if insn.depends_on is not None and not insn.depends_on <= all_insn_ids:
-            raise LoopyError("insn '%s' has unknown instruction "
+        if stmt.depends_on is not None and not stmt.depends_on <= all_stmt_ids:
+            raise LoopyError("stmt '%s' has unknown statement "
                     "dependencies: %s"
-                    % (insn.id, ", ".join(
-                        insn.depends_on - all_insn_ids)))
+                    % (stmt.id, ", ".join(
+                        stmt.depends_on - all_stmt_ids)))
 
-        no_sync_with_insn_ids = set(id for id, scope in insn.no_sync_with)
-        if not no_sync_with_insn_ids <= all_insn_ids:
-            raise LoopyError("insn '%s' has nosync directive with unknown "
-                    "instruction ids: %s"
-                    % (insn.id,
-                       ", ".join(no_sync_with_insn_ids - all_insn_ids)))
+        no_sync_with_stmt_ids = set(id for id, scope in stmt.no_sync_with)
+        if not no_sync_with_stmt_ids <= all_stmt_ids:
+            raise LoopyError("stmt '%s' has nosync directive with unknown "
+                    "statement ids: %s"
+                    % (stmt.id,
+                       ", ".join(no_sync_with_stmt_ids - all_stmt_ids)))
 
-        no_sync_with_scopes = set(scope for id, scope in insn.no_sync_with)
+        no_sync_with_scopes = set(scope for id, scope in stmt.no_sync_with)
         if not no_sync_with_scopes <= VALID_NOSYNC_SCOPES:
-            raise LoopyError("insn '%s' has invalid nosync scopes: %s"
-                    % (insn.id,
+            raise LoopyError("stmt '%s' has invalid nosync scopes: %s"
+                    % (stmt.id,
                        ", ".join(no_sync_with_scopes - VALID_NOSYNC_SCOPES)))
 
 
-def check_for_duplicate_insn_ids(knl):
-    insn_ids = set()
+def check_for_duplicate_stmt_ids(knl):
+    stmt_ids = set()
 
-    for insn in knl.instructions:
-        if not isinstance(insn.id, str):
-            raise LoopyError("instruction id %r is not a string" % insn.id)
-        if insn.id in insn_ids:
-            raise LoopyError("duplicate instruction id: '%s'" % insn.id)
-        insn_ids.add(insn.id)
+    for stmt in knl.statements:
+        if not isinstance(stmt.id, str):
+            raise LoopyError("statement id %r is not a string" % stmt.id)
+        if stmt.id in stmt_ids:
+            raise LoopyError("duplicate statement id: '%s'" % stmt.id)
+        stmt_ids.add(stmt.id)
 
 
 def check_loop_priority_inames_known(kernel):
@@ -117,29 +117,29 @@ def check_loop_priority_inames_known(kernel):
 def check_for_double_use_of_hw_axes(kernel):
     from loopy.kernel.data import UniqueTag
 
-    for insn in kernel.instructions:
-        insn_tag_keys = set()
-        for iname in kernel.insn_inames(insn):
+    for stmt in kernel.statements:
+        stmt_tag_keys = set()
+        for iname in kernel.stmt_inames(stmt):
             tag = kernel.iname_to_tag.get(iname)
             if isinstance(tag, UniqueTag):
                 key = tag.key
-                if key in insn_tag_keys:
-                    raise LoopyError("instruction '%s' has multiple "
-                            "inames tagged '%s'" % (insn.id, tag))
+                if key in stmt_tag_keys:
+                    raise LoopyError("statement '%s' has multiple "
+                            "inames tagged '%s'" % (stmt.id, tag))
 
-                insn_tag_keys.add(key)
+                stmt_tag_keys.add(key)
 
 
 def check_for_inactive_iname_access(kernel):
-    for insn in kernel.instructions:
-        expression_inames = insn.read_dependency_names() & kernel.all_inames()
+    for stmt in kernel.statements:
+        expression_inames = stmt.read_dependency_names() & kernel.all_inames()
 
-        if not expression_inames <= kernel.insn_inames(insn):
+        if not expression_inames <= kernel.stmt_inames(stmt):
             raise LoopyError(
-                    "instruction '%s' references "
-                    "inames '%s' that the instruction does not depend on"
-                    % (insn.id,
-                        ", ".join(expression_inames - kernel.insn_inames(insn))))
+                    "statement '%s' references "
+                    "inames '%s' that the statement does not depend on"
+                    % (stmt.id,
+                        ", ".join(expression_inames - kernel.stmt_inames(stmt))))
 
 
 def _is_racing_iname_tag(tv, tag):
@@ -172,46 +172,46 @@ def check_for_write_races(kernel):
     from loopy.kernel.data import ConcurrentTag
 
     iname_to_tag = kernel.iname_to_tag.get
-    for insn in kernel.instructions:
+    for stmt in kernel.statements:
         for assignee_name, assignee_indices in zip(
-                insn.assignee_var_names(),
-                insn.assignee_subscript_deps()):
+                stmt.assignee_var_names(),
+                stmt.assignee_subscript_deps()):
             assignee_inames = assignee_indices & kernel.all_inames()
-            if not assignee_inames <= kernel.insn_inames(insn):
+            if not assignee_inames <= kernel.stmt_inames(stmt):
                 raise LoopyError(
                         "assignee of instructiosn '%s' references "
-                        "iname that the instruction does not depend on"
-                        % insn.id)
+                        "iname that the statement does not depend on"
+                        % stmt.id)
 
             if assignee_name in kernel.arg_dict:
                 # Any parallel tags that are not depended upon by the assignee
                 # will cause write races.
 
-                raceable_parallel_insn_inames = set(
+                raceable_parallel_stmt_inames = set(
                         iname
-                        for iname in kernel.insn_inames(insn)
+                        for iname in kernel.stmt_inames(stmt)
                         if isinstance(iname_to_tag(iname), ConcurrentTag))
 
             elif assignee_name in kernel.temporary_variables:
                 temp_var = kernel.temporary_variables[assignee_name]
-                raceable_parallel_insn_inames = set(
+                raceable_parallel_stmt_inames = set(
                             iname
-                            for iname in kernel.insn_inames(insn)
+                            for iname in kernel.stmt_inames(stmt)
                             if _is_racing_iname_tag(temp_var, iname_to_tag(iname)))
 
             else:
-                raise LoopyError("invalid assignee name in instruction '%s'"
-                        % insn.id)
+                raise LoopyError("invalid assignee name in statement '%s'"
+                        % stmt.id)
 
             race_inames = \
-                    raceable_parallel_insn_inames - assignee_inames
+                    raceable_parallel_stmt_inames - assignee_inames
 
             if race_inames:
-                warn_with_kernel(kernel, "write_race(%s)" % insn.id,
-                        "instruction '%s' contains a write race: "
-                        "instruction will be run across parallel iname(s) "
+                warn_with_kernel(kernel, "write_race(%s)" % stmt.id,
+                        "statement '%s' contains a write race: "
+                        "statement will be run across parallel iname(s) "
                         "'%s', which is/are not referenced in the lhs index"
-                        % (insn.id, ",".join(race_inames)),
+                        % (stmt.id, ",".join(race_inames)),
                         WriteRaceConditionWarning)
 
 
@@ -251,10 +251,10 @@ def check_for_data_dependent_parallel_bounds(kernel):
 
 
 class _AccessCheckMapper(WalkMapper):
-    def __init__(self, kernel, domain, insn_id):
+    def __init__(self, kernel, domain, stmt_id):
         self.kernel = kernel
         self.domain = domain
-        self.insn_id = insn_id
+        self.stmt_id = stmt_id
 
     def map_subscript(self, expr):
         WalkMapper.map_subscript(self, expr)
@@ -318,44 +318,44 @@ class _AccessCheckMapper(WalkMapper):
                     shape_domain = shape_domain.intersect(slab)
 
             if not access_range.is_subset(shape_domain):
-                raise LoopyError("'%s' in instruction '%s' "
+                raise LoopyError("'%s' in statement '%s' "
                         "accesses out-of-bounds array element"
-                        % (expr, self.insn_id))
+                        % (expr, self.stmt_id))
 
 
 def check_bounds(kernel):
     temp_var_names = set(kernel.temporary_variables)
-    for insn in kernel.instructions:
-        domain = kernel.get_inames_domain(kernel.insn_inames(insn))
+    for stmt in kernel.statements:
+        domain = kernel.get_inames_domain(kernel.stmt_inames(stmt))
 
         # data-dependent bounds? can't do much
         if set(domain.get_var_names(dim_type.param)) & temp_var_names:
             continue
 
-        acm = _AccessCheckMapper(kernel, domain, insn.id)
+        acm = _AccessCheckMapper(kernel, domain, stmt.id)
 
         def run_acm(expr):
             acm(expr)
             return expr
 
-        insn.with_transformed_expressions(run_acm)
+        stmt.with_transformed_expressions(run_acm)
 
 
 def check_write_destinations(kernel):
-    for insn in kernel.instructions:
-        for wvar in insn.assignee_var_names():
+    for stmt in kernel.statements:
+        for wvar in stmt.assignee_var_names():
             if wvar in kernel.all_inames():
                 raise LoopyError("iname '%s' may not be written" % wvar)
 
-            insn_domain = kernel.get_inames_domain(kernel.insn_inames(insn))
-            insn_params = set(insn_domain.get_var_names(dim_type.param))
+            stmt_domain = kernel.get_inames_domain(kernel.stmt_inames(stmt))
+            stmt_params = set(stmt_domain.get_var_names(dim_type.param))
 
             if wvar in kernel.all_params():
                 if wvar not in kernel.temporary_variables:
                     raise LoopyError("domain parameter '%s' may not be written"
                             "--it is not a temporary variable" % wvar)
 
-                if wvar in insn_params:
+                if wvar in stmt_params:
                     raise LoopyError("domain parameter '%s' may not be written "
                             "inside a domain dependent on it" % wvar)
 
@@ -370,7 +370,7 @@ def check_has_schedulable_iname_nesting(kernel):
     if not has_schedulable_iname_nesting(kernel):
         import itertools as it
         opt = get_iname_duplication_options(kernel)
-        opt_str = "\n".join("* Duplicate %s within instructions %s" % (i, w)
+        opt_str = "\n".join("* Duplicate %s within statements %s" % (i, w)
                             for i, w in it.islice(opt, 3))
         raise LoopyError("Kernel does not have a schedulable iname nesting. "
                 "In order for there to exist a feasible loop nesting, you "
@@ -386,10 +386,10 @@ def pre_schedule_checks(kernel):
     try:
         logger.debug("%s: pre-schedule check: start" % kernel.name)
 
-        check_for_duplicate_insn_ids(kernel)
+        check_for_duplicate_stmt_ids(kernel)
         check_for_orphaned_user_hardware_axes(kernel)
         check_for_double_use_of_hw_axes(kernel)
-        check_insn_attributes(kernel)
+        check_stmt_attributes(kernel)
         check_loop_priority_inames_known(kernel)
         check_for_inactive_iname_access(kernel)
         check_for_write_races(kernel)
@@ -415,9 +415,9 @@ def pre_schedule_checks(kernel):
 # {{{ check for unused hw axes
 
 def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None):
-    from loopy.schedule import (CallKernel, RunInstruction,
+    from loopy.schedule import (CallKernel, RunStatement,
             Barrier, EnterLoop, LeaveLoop, ReturnFromKernel,
-            get_insn_ids_for_block_at, gather_schedule_block)
+            get_stmt_ids_for_block_at, gather_schedule_block)
 
     if sched_index is None:
         group_axes = set()
@@ -428,8 +428,8 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None):
     else:
         assert isinstance(kernel.schedule[sched_index], CallKernel)
         _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
-        group_size, local_size = kernel.get_grid_sizes_for_insn_ids_as_exprs(
-                get_insn_ids_for_block_at(kernel.schedule, sched_index))
+        group_size, local_size = kernel.get_grid_sizes_for_stmt_ids_as_exprs(
+                get_stmt_ids_for_block_at(kernel.schedule, sched_index))
 
         group_axes = set(ax for ax, length in enumerate(group_size))
         local_axes = set(ax for ax, length in enumerate(local_size))
@@ -447,17 +447,17 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None):
         if isinstance(sched_item, CallKernel):
             i = _check_for_unused_hw_axes_in_kernel_chunk(kernel, i)
 
-        elif isinstance(sched_item, RunInstruction):
-            insn = kernel.id_to_insn[sched_item.insn_id]
+        elif isinstance(sched_item, RunStatement):
+            stmt = kernel.id_to_stmt[sched_item.stmt_id]
             i += 1
 
-            if insn.boostable:
+            if stmt.boostable:
                 continue
 
             group_axes_used = set()
             local_axes_used = set()
 
-            for iname in kernel.insn_inames(insn):
+            for iname in kernel.stmt_inames(stmt):
                 tag = kernel.iname_to_tag.get(iname)
 
                 if isinstance(tag, LocalIndexTag):
@@ -468,15 +468,15 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None):
                     raise LoopyError("auto local tag encountered")
 
             if group_axes != group_axes_used:
-                raise LoopyError("instruction '%s' does not use all group hw axes "
+                raise LoopyError("statement '%s' does not use all group hw axes "
                         "(available: %s used:%s)"
-                        % (insn.id,
+                        % (stmt.id,
                             ",".join(str(i) for i in group_axes),
                             ",".join(str(i) for i in group_axes_used)))
             if local_axes != local_axes_used:
-                raise LoopyError("instruction '%s' does not use all local hw axes "
+                raise LoopyError("statement '%s' does not use all local hw axes "
                         "(available: %s used:%s)"
-                        % (insn.id,
+                        % (stmt.id,
                             ",".join(str(i) for i in local_axes),
                             ",".join(str(i) for i in local_axes_used)))
 
@@ -491,7 +491,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None):
     return past_end_i
 
 
-def check_for_unused_hw_axes_in_insns(kernel):
+def check_for_unused_hw_axes_in_stmts(kernel):
     if kernel.schedule:
         _check_for_unused_hw_axes_in_kernel_chunk(kernel)
 
@@ -511,24 +511,24 @@ def check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel):
                 if isinstance(v, ArrayBase)
                 and isinstance(v.dtype, AtomicType)))
 
-    for insn in kernel.instructions:
-        if not isinstance(insn, Assignment):
+    for stmt in kernel.statements:
+        if not isinstance(stmt, Assignment):
             continue
 
-        atomic_accesses = set(a.var_name for a in insn.atomicity)
+        atomic_accesses = set(a.var_name for a in stmt.atomicity)
         if not atomic_accesses <= atomicity_candidates:
-            raise LoopyError("atomic access in instruction '%s' to "
+            raise LoopyError("atomic access in statement '%s' to "
                     "non-atomic variable(s) '%s'"
-                    % (insn.id,
+                    % (stmt.id,
                         ",".join(atomic_accesses - atomicity_candidates)))
 
-        accessed_atomic_vars = insn.dependency_names() & atomicity_candidates
+        accessed_atomic_vars = stmt.dependency_names() & atomicity_candidates
         if not accessed_atomic_vars <= atomic_accesses:
-            raise LoopyError("atomic variable(s) '%s' in instruction '%s' "
+            raise LoopyError("atomic variable(s) '%s' in statement '%s' "
                     "used in non-atomic access"
                     % (
                         ",".join(accessed_atomic_vars - atomic_accesses),
-                        insn.id))
+                        stmt.id))
 
 # }}}
 
@@ -577,28 +577,28 @@ def check_that_temporaries_are_defined_in_subkernels_where_used(kernel):
 # }}}
 
 
-# {{{ check that all instructions are scheduled
+# {{{ check that all statements are scheduled
 
-def check_that_all_insns_are_scheduled(kernel):
-    from loopy.kernel.instruction import NoOpInstruction
+def check_that_all_stmts_are_scheduled(kernel):
+    from loopy.kernel.statement import NoOpStatement
 
-    all_schedulable_insns = set(
-        insn.id for insn in kernel.instructions
+    all_schedulable_stmts = set(
+        stmt.id for stmt in kernel.statements
         # nops are not schedulable
-        if not isinstance(insn, NoOpInstruction))
-    from loopy.schedule import sched_item_to_insn_id
-    scheduled_insns = set(
-        insn_id
+        if not isinstance(stmt, NoOpStatement))
+    from loopy.schedule import sched_item_to_stmt_id
+    scheduled_stmts = set(
+        stmt_id
         for sched_item in kernel.schedule
-        for insn_id in sched_item_to_insn_id(sched_item))
+        for stmt_id in sched_item_to_stmt_id(sched_item))
 
-    assert scheduled_insns <= all_schedulable_insns
+    assert scheduled_stmts <= all_schedulable_stmts
 
-    if scheduled_insns < all_schedulable_insns:
-        from loopy.diagnostic import UnscheduledInstructionError
-        raise UnscheduledInstructionError(
-            "unscheduled instructions: '%s'"
-            % ', '.join(all_schedulable_insns - scheduled_insns))
+    if scheduled_stmts < all_schedulable_stmts:
+        from loopy.diagnostic import UnscheduledStatementError
+        raise UnscheduledStatementError(
+            "unscheduled statements: '%s'"
+            % ', '.join(all_schedulable_stmts - scheduled_stmts))
 
 # }}}
 
@@ -651,10 +651,10 @@ def pre_codegen_checks(kernel):
     try:
         logger.debug("pre-codegen check %s: start" % kernel.name)
 
-        check_for_unused_hw_axes_in_insns(kernel)
+        check_for_unused_hw_axes_in_stmts(kernel)
         check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel)
         check_that_temporaries_are_defined_in_subkernels_where_used(kernel)
-        check_that_all_insns_are_scheduled(kernel)
+        check_that_all_stmts_are_scheduled(kernel)
         kernel.target.pre_codegen_check(kernel)
         check_that_shapes_and_strides_are_arguments(kernel)
 
@@ -670,7 +670,7 @@ def pre_codegen_checks(kernel):
 # }}}
 
 
-# {{{ sanity-check for implemented domains of each instruction
+# {{{ sanity-check for implemented domains of each statement
 
 def check_implemented_domains(kernel, implemented_domains, code=None):
     from islpy import dim_type
@@ -678,77 +678,77 @@ def check_implemented_domains(kernel, implemented_domains, code=None):
     from islpy import align_two
 
     last_idomains = None
-    last_insn_inames = None
+    last_stmt_inames = None
 
-    for insn_id, idomains in six.iteritems(implemented_domains):
-        insn = kernel.id_to_insn[insn_id]
+    for stmt_id, idomains in six.iteritems(implemented_domains):
+        stmt = kernel.id_to_stmt[stmt_id]
 
         assert idomains
 
-        insn_inames = kernel.insn_inames(insn)
+        stmt_inames = kernel.stmt_inames(stmt)
 
         # {{{ if we've checked the same thing before, no need to check it again
 
-        if last_idomains is not None and last_insn_inames is not None:
-            if idomains == last_idomains and insn_inames == last_insn_inames:
+        if last_idomains is not None and last_stmt_inames is not None:
+            if idomains == last_idomains and stmt_inames == last_stmt_inames:
                 continue
 
         last_idomains = idomains
-        last_insn_inames = insn_inames
+        last_stmt_inames = stmt_inames
 
         # }}}
 
-        insn_impl_domain = idomains[0]
+        stmt_impl_domain = idomains[0]
         for idomain in idomains[1:]:
-            insn_impl_domain = insn_impl_domain | idomain
+            stmt_impl_domain = stmt_impl_domain | idomain
         assumption_non_param = isl.BasicSet.from_params(kernel.assumptions)
-        assumptions, insn_impl_domain = align_two(
-                assumption_non_param, insn_impl_domain)
-        insn_impl_domain = (
-                (insn_impl_domain & assumptions)
-                .project_out_except(insn_inames, [dim_type.set]))
+        assumptions, stmt_impl_domain = align_two(
+                assumption_non_param, stmt_impl_domain)
+        stmt_impl_domain = (
+                (stmt_impl_domain & assumptions)
+                .project_out_except(stmt_inames, [dim_type.set]))
 
-        from loopy.kernel.instruction import BarrierInstruction
+        from loopy.kernel.statement import BarrierStatement
         from loopy.kernel.data import LocalIndexTag
-        if isinstance(insn, BarrierInstruction):
+        if isinstance(stmt, BarrierStatement):
             # project out local-id-mapped inames, solves #94 on gitlab
             non_lid_inames = frozenset(
-                [iname for iname in insn_inames if not isinstance(
+                [iname for iname in stmt_inames if not isinstance(
                     kernel.iname_to_tag.get(iname), LocalIndexTag)])
-            insn_impl_domain = insn_impl_domain.project_out_except(
+            stmt_impl_domain = stmt_impl_domain.project_out_except(
                 non_lid_inames, [dim_type.set])
 
-        insn_domain = kernel.get_inames_domain(insn_inames)
-        insn_parameters = frozenset(insn_domain.get_var_names(dim_type.param))
-        assumptions, insn_domain = align_two(assumption_non_param, insn_domain)
-        desired_domain = ((insn_domain & assumptions)
-            .project_out_except(insn_inames, [dim_type.set])
-            .project_out_except(insn_parameters, [dim_type.param]))
+        stmt_domain = kernel.get_inames_domain(stmt_inames)
+        stmt_parameters = frozenset(stmt_domain.get_var_names(dim_type.param))
+        assumptions, stmt_domain = align_two(assumption_non_param, stmt_domain)
+        desired_domain = ((stmt_domain & assumptions)
+            .project_out_except(stmt_inames, [dim_type.set])
+            .project_out_except(stmt_parameters, [dim_type.param]))
 
-        if isinstance(insn, BarrierInstruction):
+        if isinstance(stmt, BarrierStatement):
             # project out local-id-mapped inames, solves #94 on gitlab
             desired_domain = desired_domain.project_out_except(
                 non_lid_inames, [dim_type.set])
 
-        insn_impl_domain = (insn_impl_domain
-                .project_out_except(insn_parameters, [dim_type.param]))
-        insn_impl_domain, desired_domain = align_two(
-                insn_impl_domain, desired_domain)
+        stmt_impl_domain = (stmt_impl_domain
+                .project_out_except(stmt_parameters, [dim_type.param]))
+        stmt_impl_domain, desired_domain = align_two(
+                stmt_impl_domain, desired_domain)
 
-        if insn_impl_domain != desired_domain:
-            i_minus_d = insn_impl_domain - desired_domain
-            d_minus_i = desired_domain - insn_impl_domain
+        if stmt_impl_domain != desired_domain:
+            i_minus_d = stmt_impl_domain - desired_domain
+            d_minus_i = desired_domain - stmt_impl_domain
 
             parameter_inames = set(
-                    insn_domain.get_dim_name(dim_type.param, i)
-                    for i in range(insn_impl_domain.dim(dim_type.param)))
+                    stmt_domain.get_dim_name(dim_type.param, i)
+                    for i in range(stmt_impl_domain.dim(dim_type.param)))
 
             lines = []
             for bigger, smaller, diff_set, gist_domain in [
                     ("implemented", "desired", i_minus_d,
-                        desired_domain.gist(insn_impl_domain)),
+                        desired_domain.gist(stmt_impl_domain)),
                     ("desired", "implemented", d_minus_i,
-                        insn_impl_domain.gist(desired_domain))]:
+                        stmt_impl_domain.gist(desired_domain))]:
 
                 if diff_set.is_empty():
                     continue
@@ -758,12 +758,12 @@ def check_implemented_domains(kernel, implemented_domains, code=None):
                 assert not pt.is_void()
 
                 #pt_set = isl.Set.from_point(pt)
-                #lines.append("point implemented: %s" % (pt_set <= insn_impl_domain))
+                #lines.append("point implemented: %s" % (pt_set <= stmt_impl_domain))
                 #lines.append("point desired: %s" % (pt_set <= desired_domain))
 
                 iname_to_dim = pt.get_space().get_var_dict()
                 point_axes = []
-                for iname in kernel.insn_inames(insn) | parameter_inames:
+                for iname in kernel.stmt_inames(stmt) | parameter_inames:
                     tp, dim = iname_to_dim[iname]
                     point_axes.append("%s=%d" % (
                         iname, pt.get_coordinate_val(tp, dim).to_python()))
@@ -784,10 +784,10 @@ def check_implemented_domains(kernel, implemented_domains, code=None):
                 print(79*"-")
 
             raise LoopyError("sanity check failed--implemented and desired "
-                    "domain for instruction '%s' do not match\n\n"
+                    "domain for statement '%s' do not match\n\n"
                     "implemented: %s\n\n"
                     "desired:%s\n\n%s"
-                    % (insn_id, insn_impl_domain, desired_domain, "\n".join(lines)))
+                    % (stmt_id, stmt_impl_domain, desired_domain, "\n".join(lines)))
 
     # placate the assert at the call site
     return True
diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py
index f398a063d..adf660812 100644
--- a/loopy/codegen/bounds.py
+++ b/loopy/codegen/bounds.py
@@ -57,13 +57,13 @@ def get_approximate_convex_bounds_checks(domain, check_inames, implemented_domai
 
 def get_usable_inames_for_conditional(kernel, sched_index):
     from loopy.schedule import (
-        find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within)
+        find_active_inames_at, get_stmt_ids_for_block_at, has_barrier_within)
     from loopy.kernel.data import ConcurrentTag, LocalIndexTagBase, IlpBaseTag
 
     result = find_active_inames_at(kernel, sched_index)
     crosses_barrier = has_barrier_within(kernel, sched_index)
 
-    # Find our containing subkernel. Grab inames for all insns from there.
+    # Find our containing subkernel. Grab inames for all stmts from there.
     within_subkernel = False
 
     for sched_item_index, sched_item in enumerate(kernel.schedule[:sched_index+1]):
@@ -78,13 +78,13 @@ def get_usable_inames_for_conditional(kernel, sched_index):
         # Outside all subkernels - use only inames available to host.
         return frozenset(result)
 
-    insn_ids_for_subkernel = get_insn_ids_for_block_at(
+    stmt_ids_for_subkernel = get_stmt_ids_for_block_at(
         kernel.schedule, subkernel_index)
 
     inames_for_subkernel = (
         iname
-        for insn in insn_ids_for_subkernel
-        for iname in kernel.insn_inames(insn))
+        for stmt in stmt_ids_for_subkernel
+        for iname in kernel.stmt_inames(stmt))
 
     for iname in inames_for_subkernel:
         tag = kernel.iname_to_tag.get(iname)
diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py
index 524004233..78bc4fbd7 100644
--- a/loopy/codegen/control.py
+++ b/loopy/codegen/control.py
@@ -28,7 +28,7 @@ import six
 from loopy.codegen.result import merge_codegen_results, wrap_in_if
 import islpy as isl
 from loopy.schedule import (
-        EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel,
+        EnterLoop, LeaveLoop, RunStatement, Barrier, CallKernel,
         gather_schedule_block, generate_sub_sched_items)
 from loopy.diagnostic import LoopyError
 
@@ -96,7 +96,7 @@ def generate_code_for_sched_index(codegen_state, sched_index):
     if isinstance(sched_item, CallKernel):
         assert not codegen_state.is_generating_device_code
 
-        from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at)
+        from loopy.schedule import (gather_schedule_block, get_stmt_ids_for_block_at)
         _, past_end_i = gather_schedule_block(kernel.schedule, sched_index)
         assert past_end_i <= codegen_state.schedule_index_end
 
@@ -113,8 +113,8 @@ def generate_code_for_sched_index(codegen_state, sched_index):
         codegen_result = generate_host_or_device_program(
                 new_codegen_state, sched_index)
 
-        glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs(
-                get_insn_ids_for_block_at(kernel.schedule, sched_index))
+        glob_grid, loc_grid = kernel.get_grid_sizes_for_stmt_ids_as_exprs(
+                get_stmt_ids_for_block_at(kernel.schedule, sched_index))
 
         return merge_codegen_results(codegen_state, [
             codegen_result,
@@ -157,10 +157,10 @@ def generate_code_for_sched_index(codegen_state, sched_index):
         if codegen_state.is_generating_device_code:
             barrier_ast = codegen_state.ast_builder.emit_barrier(
                     sched_item.kind, sched_item.comment)
-            if sched_item.originating_insn_id:
+            if sched_item.originating_stmt_id:
                 return CodeGenerationResult.new(
                         codegen_state,
-                        sched_item.originating_insn_id,
+                        sched_item.originating_stmt_id,
                         barrier_ast,
                         codegen_state.implemented_domain)
             else:
@@ -181,13 +181,13 @@ def generate_code_for_sched_index(codegen_state, sched_index):
 
         # }}}
 
-    elif isinstance(sched_item, RunInstruction):
-        insn = kernel.id_to_insn[sched_item.insn_id]
+    elif isinstance(sched_item, RunStatement):
+        stmt = kernel.id_to_stmt[sched_item.stmt_id]
 
-        from loopy.codegen.instruction import generate_instruction_code
+        from loopy.codegen.statement import generate_statement_code
         return codegen_state.try_vectorized(
-                "instruction %s" % insn.id,
-                lambda inner_cgs: generate_instruction_code(inner_cgs, insn))
+                "statement %s" % stmt.id,
+                lambda inner_cgs: generate_statement_code(inner_cgs, stmt))
 
     else:
         raise RuntimeError("unexpected schedule item type: %s"
@@ -199,8 +199,8 @@ def get_required_predicates(kernel, sched_index):
     for _, sched_item in generate_sub_sched_items(kernel.schedule, sched_index):
         if isinstance(sched_item, Barrier):
             my_preds = frozenset()
-        elif isinstance(sched_item, RunInstruction):
-            my_preds = kernel.id_to_insn[sched_item.insn_id].predicates
+        elif isinstance(sched_item, RunStatement):
+            my_preds = kernel.id_to_stmt[sched_item.stmt_id].predicates
         else:
             raise RuntimeError("unexpected schedule item type: %s"
                     % type(sched_item))
@@ -237,7 +237,7 @@ def group_by(l, key, merge):
 
 def build_loop_nest(codegen_state, schedule_index):
     # Most of the complexity of this function goes towards finding groups of
-    # instructions that can be nested inside a shared conditional.
+    # statements that can be nested inside a shared conditional.
 
     kernel = codegen_state.kernel
 
@@ -273,7 +273,7 @@ def build_loop_nest(codegen_state, schedule_index):
         elif isinstance(sched_item, Barrier):
             i += 1
 
-        elif isinstance(sched_item, RunInstruction):
+        elif isinstance(sched_item, RunStatement):
             i += 1
         else:
             raise RuntimeError("unexpected schedule item type: %s"
@@ -339,12 +339,12 @@ def build_loop_nest(codegen_state, schedule_index):
                     self.kernel.get_inames_domain(check_inames),
                     self.impl_domain, obj_bigger_ok=True)
             from loopy.codegen.bounds import get_approximate_convex_bounds_checks
-            # Each instruction individually gets its bounds checks,
+            # Each statement individually gets its bounds checks,
             # so we can safely overapproximate here.
             return get_approximate_convex_bounds_checks(domain,
                     check_inames, self.impl_domain)
 
-    def build_insn_group(sched_index_info_entries, codegen_state,
+    def build_stmt_group(sched_index_info_entries, codegen_state,
             done_group_lengths=set()):
         """
         :arg done_group_lengths: A set of group lengths (integers) that grows
@@ -355,7 +355,7 @@ def build_loop_nest(codegen_state, schedule_index):
 
         from loopy.symbolic import get_dependencies
 
-        # The rough plan here is that build_insn_group starts out with the
+        # The rough plan here is that build_stmt_group starts out with the
         # entirety of the current schedule item's downward siblings (i.e. all
         # the ones up to the next LeaveLoop). It will then iterate upward to
         # find the largest usable conditional hoist group.
@@ -366,7 +366,7 @@ def build_loop_nest(codegen_state, schedule_index):
         # considered down so that a callee cannot find a *longer* hoist group.)
         #
         # Upon return the hoist is wrapped around the returned code and
-        # build_insn_group calls itself for the remainder of schedule indices
+        # build_stmt_group calls itself for the remainder of schedule indices
         # that were not in the hoist group.
 
         if not sched_index_info_entries:
@@ -484,7 +484,7 @@ def build_loop_nest(codegen_state, schedule_index):
             else:
                 # recurse with a bigger done_group_lengths
                 def gen_code(inner_codegen_state):
-                    return build_insn_group(
+                    return build_stmt_group(
                             sched_index_info_entries[0:group_length],
                             inner_codegen_state,
                             done_group_lengths=(
@@ -534,15 +534,15 @@ def build_loop_nest(codegen_state, schedule_index):
             else:
                 result = gen_code(new_codegen_state)
 
-        return result + build_insn_group(
+        return result + build_stmt_group(
                 sched_index_info_entries[group_length:], codegen_state)
 
     # }}}
 
-    insn_group = build_insn_group(sched_index_info_entries, codegen_state)
+    stmt_group = build_stmt_group(sched_index_info_entries, codegen_state)
     return merge_codegen_results(
             codegen_state,
-            insn_group)
+            stmt_group)
 
 
 # vim: foldmethod=marker
diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index 1db7b0445..c3b3cce92 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -233,23 +233,23 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func,
     from loopy.kernel.data import (
             UniqueTag, HardwareConcurrentTag, LocalIndexTag, GroupIndexTag)
 
-    from loopy.schedule import get_insn_ids_for_block_at
-    insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index)
+    from loopy.schedule import get_stmt_ids_for_block_at
+    stmt_ids_for_block = get_stmt_ids_for_block_at(kernel.schedule, schedule_index)
 
     if hw_inames_left is None:
-        all_inames_by_insns = set()
-        for insn_id in insn_ids_for_block:
-            all_inames_by_insns |= kernel.insn_inames(insn_id)
+        all_inames_by_stmts = set()
+        for stmt_id in stmt_ids_for_block:
+            all_inames_by_stmts |= kernel.stmt_inames(stmt_id)
 
         hw_inames_left = [iname
-                for iname in all_inames_by_insns
+                for iname in all_inames_by_stmts
                 if isinstance(kernel.iname_to_tag.get(iname), HardwareConcurrentTag)]
 
     if not hw_inames_left:
         return next_func(codegen_state)
 
-    global_size, local_size = kernel.get_grid_sizes_for_insn_ids(
-            insn_ids_for_block)
+    global_size, local_size = kernel.get_grid_sizes_for_stmt_ids(
+            stmt_ids_for_block)
 
     hw_inames_left = hw_inames_left[:]
     iname = hw_inames_left.pop()
diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py
index 4318ad71c..b2b58cc85 100644
--- a/loopy/codegen/result.py
+++ b/loopy/codegen/result.py
@@ -74,7 +74,7 @@ class CodeGenerationResult(ImmutableRecord):
 
     .. attribute:: implemented_domains
 
-        A mapping from instruction ID to a list of :class:`islpy.Set`
+        A mapping from statement ID to a list of :class:`islpy.Set`
         objects.
 
     .. attribute:: host_preambles
@@ -91,7 +91,7 @@ class CodeGenerationResult(ImmutableRecord):
     """
 
     @staticmethod
-    def new(codegen_state, insn_id, ast, implemented_domain):
+    def new(codegen_state, stmt_id, ast, implemented_domain):
         prg = GeneratedProgram(
                 name=codegen_state.gen_program_name,
                 is_device_program=codegen_state.is_generating_device_code,
@@ -110,7 +110,7 @@ class CodeGenerationResult(ImmutableRecord):
 
         return CodeGenerationResult(
                 implemented_data_info=codegen_state.implemented_data_info,
-                implemented_domains={insn_id: [implemented_domain]},
+                implemented_domains={stmt_id: [implemented_domain]},
                 **kwargs)
 
     def host_code(self):
@@ -218,8 +218,8 @@ def merge_codegen_results(codegen_state, elements, collapse=True):
                         el.current_program(codegen_state).name
                         == codegen_result.current_program(codegen_state).name)
 
-            for insn_id, idoms in six.iteritems(el.implemented_domains):
-                implemented_domains.setdefault(insn_id, []).extend(idoms)
+            for stmt_id, idoms in six.iteritems(el.implemented_domains):
+                implemented_domains.setdefault(stmt_id, []).extend(idoms)
 
             if not codegen_state.is_generating_device_code:
                 for dp in el.device_programs:
diff --git a/loopy/codegen/instruction.py b/loopy/codegen/statement.py
similarity index 81%
rename from loopy/codegen/instruction.py
rename to loopy/codegen/statement.py
index e590502fb..ea7f4d17c 100644
--- a/loopy/codegen/instruction.py
+++ b/loopy/codegen/statement.py
@@ -1,4 +1,4 @@
-"""Code generation for Instruction objects."""
+"""Code generation for Statement objects."""
 
 from __future__ import division, absolute_import
 
@@ -34,7 +34,7 @@ from pymbolic.mapper.stringifier import PREC_NONE
 
 
 def to_codegen_result(
-        codegen_state, insn_id, domain, check_inames, required_preds, ast):
+        codegen_state, stmt_id, domain, check_inames, required_preds, ast):
     # {{{ get bounds check
 
     chk_domain = isl.Set.from_basic_set(domain)
@@ -69,34 +69,34 @@ def to_codegen_result(
                 ast)
 
     return CodeGenerationResult.new(
-            codegen_state, insn_id, ast, new_implemented_domain)
+            codegen_state, stmt_id, ast, new_implemented_domain)
 
 
-def generate_instruction_code(codegen_state, insn):
+def generate_statement_code(codegen_state, stmt):
     kernel = codegen_state.kernel
 
-    from loopy.kernel.instruction import Assignment, CallInstruction, CInstruction
+    from loopy.kernel.statement import Assignment, CallStatement, CStatement
 
-    if isinstance(insn, Assignment):
-        ast = generate_assignment_instruction_code(codegen_state, insn)
-    elif isinstance(insn, CallInstruction):
-        ast = generate_call_code(codegen_state, insn)
-    elif isinstance(insn, CInstruction):
-        ast = generate_c_instruction_code(codegen_state, insn)
+    if isinstance(stmt, Assignment):
+        ast = generate_assignment_statement_code(codegen_state, stmt)
+    elif isinstance(stmt, CallStatement):
+        ast = generate_call_code(codegen_state, stmt)
+    elif isinstance(stmt, CStatement):
+        ast = generate_c_statement_code(codegen_state, stmt)
     else:
-        raise RuntimeError("unexpected instruction type")
+        raise RuntimeError("unexpected statement type")
 
-    insn_inames = kernel.insn_inames(insn)
+    stmt_inames = kernel.stmt_inames(stmt)
 
     return to_codegen_result(
             codegen_state,
-            insn.id,
-            kernel.get_inames_domain(insn_inames), insn_inames,
-            insn.predicates,
+            stmt.id,
+            kernel.get_inames_domain(stmt_inames), stmt_inames,
+            stmt.predicates,
             ast)
 
 
-def generate_assignment_instruction_code(codegen_state, insn):
+def generate_assignment_statement_code(codegen_state, stmt):
     kernel = codegen_state.kernel
 
     ecm = codegen_state.expression_to_code_mapper
@@ -106,14 +106,14 @@ def generate_assignment_instruction_code(codegen_state, insn):
     # {{{ vectorization handling
 
     if codegen_state.vectorization_info:
-        if insn.atomicity:
+        if stmt.atomicity:
             raise Unvectorizable("atomic operation")
 
         vinfo = codegen_state.vectorization_info
         vcheck = VectorizabilityChecker(
                 kernel, vinfo.iname, vinfo.length)
-        lhs_is_vector = vcheck(insn.assignee)
-        rhs_is_vector = vcheck(insn.expression)
+        lhs_is_vector = vcheck(stmt.assignee)
+        rhs_is_vector = vcheck(stmt.expression)
 
         if not lhs_is_vector and rhs_is_vector:
             raise Unvectorizable(
@@ -129,7 +129,7 @@ def generate_assignment_instruction_code(codegen_state, insn):
     from pymbolic.primitives import Variable, Subscript, Lookup
     from loopy.symbolic import LinearSubscript
 
-    lhs = insn.assignee
+    lhs = stmt.assignee
     if isinstance(lhs, Lookup):
         lhs = lhs.aggregate
 
@@ -150,7 +150,7 @@ def generate_assignment_instruction_code(codegen_state, insn):
 
     del lhs
 
-    result = codegen_state.ast_builder.emit_assignment(codegen_state, insn)
+    result = codegen_state.ast_builder.emit_assignment(codegen_state, stmt)
 
     # {{{ tracing
 
@@ -161,7 +161,7 @@ def generate_assignment_instruction_code(codegen_state, insn):
             raise Unvectorizable("tracing does not support vectorization")
 
         from pymbolic.mapper.stringifier import PREC_NONE
-        lhs_code = codegen_state.expression_to_code_mapper(insn.assignee, PREC_NONE)
+        lhs_code = codegen_state.expression_to_code_mapper(stmt.assignee, PREC_NONE)
 
         from cgen import Statement as S  # noqa
 
@@ -169,7 +169,7 @@ def generate_assignment_instruction_code(codegen_state, insn):
 
         printf_format = "%s.%s[%s][%s]: %s" % (
                 kernel.name,
-                insn.id,
+                stmt.id,
                 ", ".join("gid%d=%%d" % i for i in range(len(gs))),
                 ", ".join("lid%d=%%d" % i for i in range(len(ls))),
                 assignee_var_name)
@@ -204,34 +204,34 @@ def generate_assignment_instruction_code(codegen_state, insn):
         else:
             printf_args_str = ""
 
-        printf_insn = S("printf(\"%s\\n\"%s)" % (
+        printf_stmt = S("printf(\"%s\\n\"%s)" % (
                     printf_format, printf_args_str))
 
         from cgen import Block
         if kernel.options.trace_assignment_values:
-            result = Block([result, printf_insn])
+            result = Block([result, printf_stmt])
         else:
             # print first, execute later -> helps find segfaults
-            result = Block([printf_insn, result])
+            result = Block([printf_stmt, result])
 
     # }}}
 
     return result
 
 
-def generate_call_code(codegen_state, insn):
+def generate_call_code(codegen_state, stmt):
     kernel = codegen_state.kernel
 
     # {{{ vectorization handling
 
     if codegen_state.vectorization_info:
-        if insn.atomicity:
+        if stmt.atomicity:
             raise Unvectorizable("atomic operation")
 
     # }}}
 
     result = codegen_state.ast_builder.emit_multiple_assignment(
-            codegen_state, insn)
+            codegen_state, stmt)
 
     # {{{ tracing
 
@@ -243,11 +243,11 @@ def generate_call_code(codegen_state, insn):
     return result
 
 
-def generate_c_instruction_code(codegen_state, insn):
+def generate_c_statement_code(codegen_state, stmt):
     kernel = codegen_state.kernel
 
     if codegen_state.vectorization_info is not None:
-        raise Unvectorizable("C instructions cannot be vectorized")
+        raise Unvectorizable("C statements cannot be vectorized")
 
     body = []
 
@@ -255,7 +255,7 @@ def generate_c_instruction_code(codegen_state, insn):
     from cgen import Initializer, Block, Line
 
     from pymbolic.primitives import Variable
-    for name, iname_expr in insn.iname_exprs:
+    for name, iname_expr in stmt.iname_exprs:
         if (isinstance(iname_expr, Variable)
                 and name not in codegen_state.var_subst_map):
             # No need, the bare symbol will work
@@ -270,7 +270,7 @@ def generate_c_instruction_code(codegen_state, insn):
     if body:
         body.append(Line())
 
-    body.extend(Line(l) for l in insn.code.split("\n"))
+    body.extend(Line(l) for l in stmt.code.split("\n"))
 
     return Block(body)
 
diff --git a/loopy/diagnostic.py b/loopy/diagnostic.py
index 512e4ac86..1c30de8a3 100644
--- a/loopy/diagnostic.py
+++ b/loopy/diagnostic.py
@@ -100,7 +100,7 @@ class MissingDefinitionError(LoopyError):
     pass
 
 
-class UnscheduledInstructionError(LoopyError):
+class UnscheduledStatementError(LoopyError):
     pass
 
 
diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py
index e801d09dc..391ec1414 100644
--- a/loopy/frontend/fortran/translator.py
+++ b/loopy/frontend/fortran/translator.py
@@ -113,12 +113,12 @@ class Scope(object):
 
         self.active_loopy_inames = set()
 
-        self.instructions = []
+        self.statements = []
         self.temporary_variables = []
 
         self.used_names = set()
 
-        self.previous_instruction_id = None
+        self.previous_statement_id = None
 
     def known_names(self):
         return (self.used_names
@@ -205,12 +205,12 @@ class F2LoopyTranslator(FTreeWalkerBase):
 
         self.scope_stack = []
 
-        self.insn_id_counter = 0
+        self.stmt_id_counter = 0
         self.condition_id_counter = 0
 
         self.kernels = []
 
-        self.instruction_tags = []
+        self.statement_tags = []
         self.conditions = []
         self.conditions_data = []
 
@@ -220,23 +220,23 @@ class F2LoopyTranslator(FTreeWalkerBase):
 
         self.block_nest = []
 
-    def add_expression_instruction(self, lhs, rhs):
+    def add_expression_statement(self, lhs, rhs):
         scope = self.scope_stack[-1]
 
-        new_id = intern("insn%d" % self.insn_id_counter)
-        self.insn_id_counter += 1
+        new_id = intern("stmt%d" % self.stmt_id_counter)
+        self.stmt_id_counter += 1
 
         from loopy.kernel.data import Assignment
-        insn = Assignment(
+        stmt = Assignment(
                 lhs, rhs,
                 within_inames=frozenset(
                     scope.active_loopy_inames),
                 id=new_id,
                 predicates=frozenset(self.conditions),
-                tags=tuple(self.instruction_tags))
+                tags=tuple(self.statement_tags))
 
-        scope.previous_instruction_id = new_id
-        scope.instructions.append(insn)
+        scope.previous_statement_id = new_id
+        scope.statements.append(stmt)
 
     # {{{ map_XXX functions
 
@@ -413,7 +413,7 @@ class F2LoopyTranslator(FTreeWalkerBase):
 
         rhs = scope.process_expression_for_loopy(self.parse_expr(node, node.expr))
 
-        self.add_expression_instruction(lhs, rhs)
+        self.add_expression_statement(lhs, rhs)
 
     def map_Allocate(self, node):
         raise NotImplementedError("allocate")
@@ -464,7 +464,7 @@ class F2LoopyTranslator(FTreeWalkerBase):
         from pymbolic import var
         cond_var = var(cond_name)
 
-        self.add_expression_instruction(
+        self.add_expression_statement(
                 cond_var, self.parse_expr(node, node.expr))
 
         cond_expr = cond_var
@@ -646,16 +646,16 @@ class F2LoopyTranslator(FTreeWalkerBase):
 
         if begin_tag_match:
             tag = begin_tag_match.group(1)
-            if tag in self.instruction_tags:
+            if tag in self.statement_tags:
                 raise TranslationError("nested begin tag for tag '%s'" % tag)
-            self.instruction_tags.append(tag)
+            self.statement_tags.append(tag)
 
         elif end_tag_match:
             tag = end_tag_match.group(1)
-            if tag not in self.instruction_tags:
+            if tag not in self.statement_tags:
                 raise TranslationError(
                         "end tag without begin tag for tag '%s'" % tag)
-            self.instruction_tags.remove(tag)
+            self.statement_tags.remove(tag)
 
         elif faulty_loopy_pragma_match is not None:
             from warnings import warn
@@ -710,7 +710,7 @@ class F2LoopyTranslator(FTreeWalkerBase):
 
             knl = lp.make_kernel(
                     sub.index_sets,
-                    sub.instructions,
+                    sub.statements,
                     kernel_data,
                     name=sub.subprogram_name,
                     default_order="F",
diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index bdef1133e..9005fcbfa 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -109,10 +109,10 @@ class LoopKernel(ImmutableRecordWithoutPickling):
         a list of :class:`islpy.BasicSet` instances
         representing the :ref:`domain-tree`.
 
-    .. attribute:: instructions
+    .. attribute:: statements
 
-        A list of :class:`InstructionBase` instances, e.g.
-        :class:`Assignment`. See :ref:`instructions`.
+        A list of :class:`StatementBase` instances, e.g.
+        :class:`Assignment`. See :ref:`statements`.
 
     .. attribute:: args
 
@@ -186,7 +186,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
     # {{{ constructor
 
-    def __init__(self, domains, instructions, args=[], schedule=None,
+    def __init__(self, domains, statements=None, args=[], schedule=None,
             name="loopy_kernel",
             preambles=[],
             preamble_generators=[],
@@ -213,9 +213,14 @@ class LoopKernel(ImmutableRecordWithoutPickling):
             state=kernel_state.INITIAL,
             target=None,
 
-            overridden_get_grid_sizes_for_insn_ids=None):
+            overridden_get_grid_sizes_for_stmt_ids=None,
+
+            # compat
+            statements=None,
+            overridden_get_grid_sizes_for_stmt_ids=None,
+            ):
         """
-        :arg overridden_get_grid_sizes_for_insn_ids: A callable. When kernels get
+        :arg overridden_get_grid_sizes_for_stmt_ids: A callable. When kernels get
             intersected in slab decomposition, their grid sizes shouldn't
             change. This provides a way to forward sub-kernel grid size requests.
         """
@@ -224,6 +229,23 @@ class LoopKernel(ImmutableRecordWithoutPickling):
             from loopy.kernel.tools import SetOperationCacheManager
             cache_manager = SetOperationCacheManager()
 
+        if statements is not None and statements is not None:
+            raise TypeError("may not specify both statements and statements")
+        elif statements is None and statements is None:
+            raise TypeError(
+                    "must specify exactly one of statements and statements")
+        elif statements is not None:
+            statements = statements
+
+        if (overridden_get_grid_sizes_for_stmt_ids is not None
+                and overridden_get_grid_sizes_for_stmt_ids is not None):
+            raise TypeError("may not specify both "
+                    "overridden_get_grid_sizes_for_stmt_ids "
+                    "and overridden_get_grid_sizes_for_stmt_ids{")
+        elif overridden_get_grid_sizes_for_stmt_ids is not None:
+            overridden_get_grid_sizes_for_stmt_ids = \
+                    overridden_get_grid_sizes_for_stmt_ids
+
         # {{{ process assumptions
 
         if assumptions is None:
@@ -266,7 +288,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
         ImmutableRecordWithoutPickling.__init__(self,
                 domains=domains,
-                instructions=instructions,
+                statements=statements,
                 args=args,
                 schedule=schedule,
                 name=name,
@@ -288,8 +310,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
                 options=options,
                 state=state,
                 target=target,
-                overridden_get_grid_sizes_for_insn_ids=(
-                    overridden_get_grid_sizes_for_insn_ids))
+                overridden_get_grid_sizes_for_stmt_ids=(
+                    overridden_get_grid_sizes_for_stmt_ids))
 
         self._kernel_executor_cache = {}
 
@@ -375,17 +397,17 @@ class LoopKernel(ImmutableRecordWithoutPickling):
     def get_var_name_generator(self):
         return _UniqueVarNameGenerator(self.all_variable_names())
 
-    def get_instruction_id_generator(self, based_on="insn"):
-        used_ids = set(insn.id for insn in self.instructions)
+    def get_statement_id_generator(self, based_on="stmt"):
+        used_ids = set(stmt.id for stmt in self.statements)
 
         return UniqueNameGenerator(used_ids)
 
-    def make_unique_instruction_id(self, insns=None, based_on="insn",
+    def make_unique_statement_id(self, stmts=None, based_on="stmt",
             extra_used_ids=set()):
-        if insns is None:
-            insns = self.instructions
+        if stmts is None:
+            stmts = self.statements
 
-        used_ids = set(insn.id for insn in insns) | extra_used_ids
+        used_ids = set(stmt.id for stmt in stmts) | extra_used_ids
 
         for id_str in generate_unique_names(based_on):
             if id_str not in used_ids:
@@ -393,9 +415,9 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
     def all_group_names(self):
         result = set()
-        for insn in self.instructions:
-            result.update(insn.groups)
-            result.update(insn.conflicts_with_groups)
+        for stmt in self.statements:
+            result.update(stmt.groups)
+            result.update(stmt.conflicts_with_groups)
 
         return frozenset(result)
 
@@ -417,8 +439,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
     @property
     @memoize_method
-    def id_to_insn(self):
-        return dict((insn.id, insn) for insn in self.instructions)
+    def id_to_stmt(self):
+        return dict((stmt.id, stmt) for stmt in self.statements)
 
     # }}}
 
@@ -659,35 +681,35 @@ class LoopKernel(ImmutableRecordWithoutPickling):
         return intern_frozenset_of_ids(all_params-all_inames)
 
     @memoize_method
-    def all_insn_inames(self):
-        """Return a mapping from instruction ids to inames inside which
+    def all_stmt_inames(self):
+        """Return a mapping from statement ids to inames inside which
         they should be run.
         """
         result = {}
-        for insn in self.instructions:
-            result[insn.id] = insn.within_inames
+        for stmt in self.statements:
+            result[stmt.id] = stmt.within_inames
 
         return result
 
     @memoize_method
     def all_referenced_inames(self):
         result = set()
-        for inames in six.itervalues(self.all_insn_inames()):
+        for inames in six.itervalues(self.all_stmt_inames()):
             result.update(inames)
         return result
 
-    def insn_inames(self, insn):
-        if isinstance(insn, str):
-            insn = self.id_to_insn[insn]
-        return insn.within_inames
+    def stmt_inames(self, stmt):
+        if isinstance(stmt, str):
+            stmt = self.id_to_stmt[stmt]
+        return stmt.within_inames
 
     @memoize_method
-    def iname_to_insns(self):
+    def iname_to_stmts(self):
         result = dict(
                 (iname, set()) for iname in self.all_inames())
-        for insn in self.instructions:
-            for iname in self.insn_inames(insn):
-                result[iname].add(insn.id)
+        for stmt in self.statements:
+            for iname in self.stmt_inames(stmt):
+                result[iname].add(stmt.id)
 
         return result
 
@@ -727,31 +749,31 @@ class LoopKernel(ImmutableRecordWithoutPickling):
     # {{{ dependency wrangling
 
     @memoize_method
-    def recursive_insn_dep_map(self):
-        """Returns a :class:`dict` mapping an instruction IDs *a*
-        to all instruction IDs it directly or indirectly depends
+    def recursive_stmt_dep_map(self):
+        """Returns a :class:`dict` mapping an statement IDs *a*
+        to all statement IDs it directly or indirectly depends
         on.
         """
 
         result = {}
 
-        def compute_deps(insn_id):
+        def compute_deps(stmt_id):
             try:
-                return result[insn_id]
+                return result[stmt_id]
             except KeyError:
                 pass
 
-            insn = self.id_to_insn[insn_id]
-            insn_result = set(insn.depends_on)
+            stmt = self.id_to_stmt[stmt_id]
+            stmt_result = set(stmt.depends_on)
 
-            for dep in list(insn.depends_on):
-                insn_result.update(compute_deps(dep))
+            for dep in list(stmt.depends_on):
+                stmt_result.update(compute_deps(dep))
 
-            result[insn_id] = frozenset(insn_result)
-            return insn_result
+            result[stmt_id] = frozenset(stmt_result)
+            return stmt_result
 
-        for insn in self.instructions:
-            compute_deps(insn.id)
+        for stmt in self.statements:
+            compute_deps(stmt.id)
 
         return result
 
@@ -762,7 +784,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
     @memoize_method
     def reader_map(self):
         """
-        :return: a dict that maps variable names to ids of insns that read that
+        :return: a dict that maps variable names to ids of stmts that read that
           variable.
         """
         result = {}
@@ -771,39 +793,39 @@ class LoopKernel(ImmutableRecordWithoutPickling):
                 set(arg.name for arg in self.args)
                 | set(six.iterkeys(self.temporary_variables)))
 
-        for insn in self.instructions:
-            for var_name in insn.read_dependency_names() & admissible_vars:
-                result.setdefault(var_name, set()).add(insn.id)
+        for stmt in self.statements:
+            for var_name in stmt.read_dependency_names() & admissible_vars:
+                result.setdefault(var_name, set()).add(stmt.id)
 
         return result
 
     @memoize_method
     def writer_map(self):
         """
-        :return: a dict that maps variable names to ids of insns that write
+        :return: a dict that maps variable names to ids of stmts that write
             to that variable.
         """
         result = {}
 
-        for insn in self.instructions:
-            for var_name in insn.assignee_var_names():
-                result.setdefault(var_name, set()).add(insn.id)
+        for stmt in self.statements:
+            for var_name in stmt.assignee_var_names():
+                result.setdefault(var_name, set()).add(stmt.id)
 
         return result
 
     @memoize_method
     def get_read_variables(self):
         result = set()
-        for insn in self.instructions:
-            result.update(insn.read_dependency_names())
+        for stmt in self.statements:
+            result.update(stmt.read_dependency_names())
         return result
 
     @memoize_method
     def get_written_variables(self):
         return frozenset(
                 var_name
-                for insn in self.instructions
-                for var_name in insn.assignee_var_names())
+                for stmt in self.statements
+                for var_name in stmt.assignee_var_names())
 
     @memoize_method
     def get_temporary_to_base_storage_map(self):
@@ -902,29 +924,29 @@ class LoopKernel(ImmutableRecordWithoutPickling):
                 constants_only=True)))
 
     @memoize_method
-    def get_grid_sizes_for_insn_ids(self, insn_ids, ignore_auto=False):
+    def get_grid_sizes_for_stmt_ids(self, stmt_ids, ignore_auto=False):
         """Return a tuple (global_size, local_size) containing a grid that
-        could accommodate execution of all instructions whose IDs are given
-        in *insn_ids*.
+        could accommodate execution of all statements whose IDs are given
+        in *stmt_ids*.
 
-        :arg insn_ids: a :class:`frozenset` of instruction IDs
+        :arg stmt_ids: a :class:`frozenset` of statement IDs
 
         *global_size* and *local_size* are :class:`islpy.PwAff` objects.
         """
 
-        if self.overridden_get_grid_sizes_for_insn_ids:
-            return self.overridden_get_grid_sizes_for_insn_ids(
-                    insn_ids,
+        if self.overridden_get_grid_sizes_for_stmt_ids:
+            return self.overridden_get_grid_sizes_for_stmt_ids(
+                    stmt_ids,
                     ignore_auto=ignore_auto)
 
-        all_inames_by_insns = set()
-        for insn_id in insn_ids:
-            all_inames_by_insns |= self.insn_inames(insn_id)
+        all_inames_by_stmts = set()
+        for stmt_id in stmt_ids:
+            all_inames_by_stmts |= self.stmt_inames(stmt_id)
 
-        if not all_inames_by_insns <= self.all_inames():
-            raise RuntimeError("some inames collected from instructions (%s) "
+        if not all_inames_by_stmts <= self.all_inames():
+            raise RuntimeError("some inames collected from statements (%s) "
                     "are not present in domain (%s)"
-                    % (", ".join(sorted(all_inames_by_insns)),
+                    % (", ".join(sorted(all_inames_by_stmts)),
                         ", ".join(sorted(self.all_inames()))))
 
         global_sizes = {}
@@ -934,7 +956,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
                 GroupIndexTag, LocalIndexTag,
                 AutoLocalIndexTagBase)
 
-        for iname in all_inames_by_insns:
+        for iname in all_inames_by_stmts:
             tag = self.iname_to_tag.get(iname)
 
             if isinstance(tag, GroupIndexTag):
@@ -995,18 +1017,18 @@ class LoopKernel(ImmutableRecordWithoutPickling):
         return (to_dim_tuple(global_sizes, "global"),
                 to_dim_tuple(local_sizes, "local", forced_sizes=self.local_sizes))
 
-    def get_grid_sizes_for_insn_ids_as_exprs(self, insn_ids, ignore_auto=False):
+    def get_grid_sizes_for_stmt_ids_as_exprs(self, stmt_ids, ignore_auto=False):
         """Return a tuple (global_size, local_size) containing a grid that
-        could accommodate execution of all instructions whose IDs are given
-        in *insn_ids*.
+        could accommodate execution of all statements whose IDs are given
+        in *stmt_ids*.
 
-        :arg insn_ids: a :class:`frozenset` of instruction IDs
+        :arg stmt_ids: a :class:`frozenset` of statement IDs
 
         *global_size* and *local_size* are :mod:`pymbolic` expressions
         """
 
-        grid_size, group_size = self.get_grid_sizes_for_insn_ids(
-                insn_ids, ignore_auto)
+        grid_size, group_size = self.get_grid_sizes_for_stmt_ids(
+                stmt_ids, ignore_auto)
 
         def tup_to_exprs(tup):
             from loopy.symbolic import pw_aff_to_expr
@@ -1016,23 +1038,23 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
     def get_grid_size_upper_bounds(self, ignore_auto=False):
         """Return a tuple (global_size, local_size) containing a grid that
-        could accommodate execution of *all* instructions in the kernel.
+        could accommodate execution of *all* statements in the kernel.
 
         *global_size* and *local_size* are :class:`islpy.PwAff` objects.
         """
-        return self.get_grid_sizes_for_insn_ids(
-                frozenset(insn.id for insn in self.instructions),
+        return self.get_grid_sizes_for_stmt_ids(
+                frozenset(stmt.id for stmt in self.statements),
                 ignore_auto=ignore_auto)
 
     def get_grid_size_upper_bounds_as_exprs(self, ignore_auto=False):
         """Return a tuple (global_size, local_size) containing a grid that
-        could accommodate execution of *all* instructions in the kernel.
+        could accommodate execution of *all* statements in the kernel.
 
         *global_size* and *local_size* are :mod:`pymbolic` expressions
         """
 
-        return self.get_grid_sizes_for_insn_ids_as_exprs(
-                frozenset(insn.id for insn in self.instructions),
+        return self.get_grid_sizes_for_stmt_ids_as_exprs(
+                frozenset(stmt.id for stmt in self.statements),
                 ignore_auto=ignore_auto)
 
     # }}}
@@ -1058,12 +1080,12 @@ class LoopKernel(ImmutableRecordWithoutPickling):
     # {{{ nosync sets
 
     @memoize_method
-    def get_nosync_set(self, insn_id, scope):
+    def get_nosync_set(self, stmt_id, scope):
         assert scope in ("local", "global")
 
         return frozenset(
-            insn_id
-            for insn_id, nosync_scope in self.id_to_insn[insn_id].no_sync_with
+            stmt_id
+            for stmt_id, nosync_scope in self.id_to_stmt[stmt_id].no_sync_with
             if nosync_scope == scope or nosync_scope == "any")
 
     # }}}
@@ -1094,7 +1116,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
             "tags",
             "variables",
             "rules",
-            "instructions",
+            "Statements",
+            "statements",
             "Dependencies",
             "schedule",
             ])
@@ -1171,18 +1194,18 @@ class LoopKernel(ImmutableRecordWithoutPickling):
             for rule_name in natsorted(six.iterkeys(kernel.substitutions)):
                 lines.append(str(kernel.substitutions[rule_name]))
 
-        if "instructions" in what:
+        if "Statements" in what or "statements" in what:
             lines.extend(sep)
             if show_labels:
-                lines.append("INSTRUCTIONS:")
+                lines.append("STATEMENTS:")
 
-            from loopy.kernel.tools import stringify_instruction_list
-            lines.extend(stringify_instruction_list(kernel))
+            from loopy.kernel.tools import stringify_statement_list
+            lines.extend(stringify_statement_list(kernel))
 
         dep_lines = []
-        for insn in kernel.instructions:
-            if insn.depends_on:
-                dep_lines.append("%s : %s" % (insn.id, ",".join(insn.depends_on)))
+        for stmt in kernel.statements:
+            if stmt.depends_on:
+                dep_lines.append("%s : %s" % (stmt.id, ",".join(stmt.depends_on)))
 
         if "Dependencies" in what and dep_lines:
             lines.extend(sep)
@@ -1307,7 +1330,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
     hash_fields = (
             "domains",
-            "instructions",
+            "statements",
             "args",
             "schedule",
             "name",
@@ -1387,6 +1410,21 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
     # }}}
 
+    # {{{ "statement" compat goop
+
+    @property
+    def id_to_stmt(self):
+        return self.id_to_stmt
+
+    @property
+    def statements(self):
+        return self.statements
+
+    def get_statement_id_generator(self, based_on="stmt"):
+        return self.get_statement_id_generator(based_on)
+
+    # }}}
+
 # }}}
 
 # vim: foldmethod=marker
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index c6618d62f..b49a7d9a9 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -31,7 +31,7 @@ from pymbolic.mapper import CSECachingMapperMixin
 from loopy.tools import intern_frozenset_of_ids
 from loopy.symbolic import IdentityMapper, WalkMapper
 from loopy.kernel.data import (
-        InstructionBase,
+        StatementBase,
         MultiAssignmentBase, Assignment,
         SubstitutionRule)
 from loopy.diagnostic import LoopyError, warn_with_kernel
@@ -73,7 +73,7 @@ WORD_RE = re.compile(r"\b([a-zA-Z0-9_]+)\b")
 BRACE_RE = re.compile(r"\$\{([a-zA-Z0-9_]+)\}")
 
 
-def expand_defines(insn, defines, single_valued=True):
+def expand_defines(stmt, defines, single_valued=True):
     replacements = [()]
 
     processed_defines = set()
@@ -83,7 +83,7 @@ def expand_defines(insn, defines, single_valued=True):
             (WORD_RE, r"\b%s\b"),
             ]:
 
-        for match in find_regexp.finditer(insn):
+        for match in find_regexp.finditer(stmt):
             define_name = match.group(1)
 
             # {{{ don't process the same define multiple times
@@ -118,7 +118,7 @@ def expand_defines(insn, defines, single_valued=True):
                         for rep in replacements]
 
     for rep in replacements:
-        rep_value = insn
+        rep_value = stmt
         for pattern, val in rep:
             rep_value = re.sub(pattern, str(val), rep_value)
 
@@ -147,16 +147,16 @@ def expand_defines_in_expr(expr, defines):
 # }}}
 
 
-# {{{ instruction options
+# {{{ statement options
 
-def get_default_insn_options_dict():
+def get_default_stmt_options_dict():
     return {
         "depends_on": frozenset(),
         "depends_on_is_final": False,
         "no_sync_with": frozenset(),
         "groups": frozenset(),
         "conflicts_with_groups": frozenset(),
-        "insn_id": None,
+        "stmt_id": None,
         "inames_to_dup": [],
         "priority": 0,
         "within_inames_is_final": False,
@@ -172,7 +172,7 @@ from collections import namedtuple
 _NosyncParseResult = namedtuple("_NosyncParseResult", "expr, scope")
 
 
-def parse_insn_options(opt_dict, options_str, assignee_names=None):
+def parse_stmt_options(opt_dict, options_str, assignee_names=None):
     if options_str is None:
         return opt_dict
 
@@ -212,10 +212,10 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None):
                 raise LoopyError("'id' option may not be specified "
                         "in a 'with' block")
 
-            result["insn_id"] = intern(opt_value)
+            result["stmt_id"] = intern(opt_value)
 
         elif opt_key == "id_prefix" and opt_value is not None:
-            result["insn_id"] = UniqueName(opt_value)
+            result["stmt_id"] = UniqueName(opt_value)
 
         elif opt_key == "priority" and opt_value is not None:
             if is_with_block:
@@ -354,7 +354,7 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None):
 
         else:
             raise ValueError(
-                    "unrecognized instruction option '%s' "
+                    "unrecognized statement option '%s' "
                     "(maybe a missing/extraneous =value?)"
                     % opt_key)
 
@@ -363,7 +363,7 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None):
 # }}}
 
 
-# {{{ parse one instruction
+# {{{ parse one statement
 
 WITH_OPTIONS_RE = re.compile(
         r"^"
@@ -420,10 +420,10 @@ SUBST_RE = re.compile(
         r"^\s*(?P<lhs>.+?)\s*:=\s*(?P<rhs>.+)\s*$")
 
 
-def parse_insn(groups, insn_options):
+def parse_stmt(groups, stmt_options):
     """
-    :return: a tuple ``(insn, inames_to_dup)``, where insn is a
-        :class:`Assignment`, a :class:`CallInstruction`,
+    :return: a tuple ``(stmt, inames_to_dup)``, where stmt is a
+        :class:`Assignment`, a :class:`CallStatement`,
         or a :class:`SubstitutionRule`
         and *inames_to_dup* is None or a list of tuples `(old, new)`.
     """
@@ -488,22 +488,22 @@ def parse_insn(groups, insn_options):
     temp_var_types = tuple(temp_var_types)
     del new_lhs
 
-    insn_options = parse_insn_options(
-            insn_options.copy(),
+    stmt_options = parse_stmt_options(
+            stmt_options.copy(),
             groups["options"],
             assignee_names=assignee_names)
 
-    insn_id = insn_options.pop("insn_id", None)
-    inames_to_dup = insn_options.pop("inames_to_dup", [])
+    stmt_id = stmt_options.pop("stmt_id", None)
+    inames_to_dup = stmt_options.pop("inames_to_dup", [])
 
     kwargs = dict(
                 id=(
-                    intern(insn_id)
-                    if isinstance(insn_id, str)
-                    else insn_id),
-                **insn_options)
+                    intern(stmt_id)
+                    if isinstance(stmt_id, str)
+                    else stmt_id),
+                **stmt_options)
 
-    from loopy.kernel.instruction import make_assignment
+    from loopy.kernel.statement import make_assignment
     return make_assignment(
             lhs, rhs, temp_var_types, **kwargs
             ), inames_to_dup
@@ -556,47 +556,47 @@ def parse_subst_rule(groups):
 # }}}
 
 
-# {{{ parse_special_insn
+# {{{ parse_special_stmt
 
-def parse_special_insn(groups, insn_options):
-    insn_options = parse_insn_options(
-            insn_options.copy(),
+def parse_special_stmt(groups, stmt_options):
+    stmt_options = parse_stmt_options(
+            stmt_options.copy(),
             groups["options"],
             assignee_names=())
 
-    del insn_options["atomicity"]
+    del stmt_options["atomicity"]
 
-    insn_id = insn_options.pop("insn_id", None)
-    inames_to_dup = insn_options.pop("inames_to_dup", [])
+    stmt_id = stmt_options.pop("stmt_id", None)
+    inames_to_dup = stmt_options.pop("inames_to_dup", [])
 
     kwargs = dict(
                 id=(
-                    intern(insn_id)
-                    if isinstance(insn_id, str)
-                    else insn_id),
-                **insn_options)
+                    intern(stmt_id)
+                    if isinstance(stmt_id, str)
+                    else stmt_id),
+                **stmt_options)
 
-    from loopy.kernel.instruction import NoOpInstruction, BarrierInstruction
-    special_insn_kind = groups["kind"]
+    from loopy.kernel.statement import NoOpStatement, BarrierStatement
+    special_stmt_kind = groups["kind"]
 
-    if special_insn_kind == "gbarrier":
-        cls = BarrierInstruction
+    if special_stmt_kind == "gbarrier":
+        cls = BarrierStatement
         kwargs["kind"] = "global"
-    elif special_insn_kind == "lbarrier":
-        cls = BarrierInstruction
+    elif special_stmt_kind == "lbarrier":
+        cls = BarrierStatement
         kwargs["kind"] = "local"
-    elif special_insn_kind == "nop":
-        cls = NoOpInstruction
+    elif special_stmt_kind == "nop":
+        cls = NoOpStatement
     else:
         raise LoopyError(
-            "invalid kind of special instruction: '%s'" % special_insn_kind)
+            "invalid kind of special statement: '%s'" % special_stmt_kind)
 
     return cls(**kwargs), inames_to_dup
 
 # }}}
 
 
-# {{{ parse_instructions
+# {{{ parse_statements
 
 _PAREN_PAIRS = {
         "(": (+1, "("),
@@ -619,184 +619,184 @@ def _count_open_paren_symbols(s):
     return result
 
 
-def parse_instructions(instructions, defines):
-    if isinstance(instructions, str):
-        instructions = [instructions]
+def parse_statements(statements, defines):
+    if isinstance(statements, str):
+        statements = [statements]
 
     substitutions = {}
 
-    new_instructions = []
+    new_statements = []
 
     # {{{ pass 1: interning, comments, whitespace
 
-    for insn in instructions:
-        if isinstance(insn, SubstitutionRule):
-            substitutions[insn.name] = insn
+    for stmt in statements:
+        if isinstance(stmt, SubstitutionRule):
+            substitutions[stmt.name] = stmt
             continue
 
-        elif isinstance(insn, InstructionBase):
+        elif isinstance(stmt, StatementBase):
             def intern_if_str(s):
                 if isinstance(s, str):
                     return intern(s)
                 else:
                     return s
 
-            new_instructions.append(
-                    insn.copy(
-                        id=intern(insn.id) if isinstance(insn.id, str) else insn.id,
+            new_statements.append(
+                    stmt.copy(
+                        id=intern(stmt.id) if isinstance(stmt.id, str) else stmt.id,
                         depends_on=frozenset(intern_if_str(dep)
-                            for dep in insn.depends_on),
-                        groups=frozenset(intern(grp) for grp in insn.groups),
+                            for dep in stmt.depends_on),
+                        groups=frozenset(intern(grp) for grp in stmt.groups),
                         conflicts_with_groups=frozenset(
-                            intern(grp) for grp in insn.conflicts_with_groups),
+                            intern(grp) for grp in stmt.conflicts_with_groups),
                         within_inames=frozenset(
-                            intern(iname) for iname in insn.within_inames),
+                            intern(iname) for iname in stmt.within_inames),
                         ))
             continue
 
-        elif not isinstance(insn, str):
-            raise TypeError("Instructions must be either an Instruction "
+        elif not isinstance(stmt, str):
+            raise TypeError("Statements must be either an Statement "
                     "instance or a parseable string. got '%s' instead."
-                    % type(insn))
+                    % type(stmt))
 
-        for insn in insn.split("\n"):
-            comment_start = insn.find("#")
+        for stmt in stmt.split("\n"):
+            comment_start = stmt.find("#")
             if comment_start >= 0:
-                insn = insn[:comment_start]
+                stmt = stmt[:comment_start]
 
-            insn = insn.strip()
-            if not insn:
+            stmt = stmt.strip()
+            if not stmt:
                 continue
 
-            new_instructions.append(insn)
+            new_statements.append(stmt)
 
     # }}}
 
-    instructions = new_instructions
-    new_instructions = []
+    statements = new_statements
+    new_statements = []
 
     # {{{ pass 2: join-by-paren
 
-    insn_buffer = None
+    stmt_buffer = None
 
-    for i, insn in enumerate(instructions):
-        if isinstance(insn, InstructionBase):
-            if insn_buffer is not None:
-                raise LoopyError("cannot join instruction lines "
+    for i, stmt in enumerate(statements):
+        if isinstance(stmt, StatementBase):
+            if stmt_buffer is not None:
+                raise LoopyError("cannot join statement lines "
                         "by paren-like delimiters "
-                        "across InstructionBase instance at instructions index %d"
+                        "across StatementBase instance at statements index %d"
                         % i)
 
-            new_instructions.append(insn)
+            new_statements.append(stmt)
         else:
-            if insn_buffer is not None:
-                insn_buffer = insn_buffer + " " + insn
-                if _count_open_paren_symbols(insn_buffer) == 0:
-                    new_instructions.append(insn_buffer)
-                    insn_buffer = None
+            if stmt_buffer is not None:
+                stmt_buffer = stmt_buffer + " " + stmt
+                if _count_open_paren_symbols(stmt_buffer) == 0:
+                    new_statements.append(stmt_buffer)
+                    stmt_buffer = None
 
             else:
-                if _count_open_paren_symbols(insn) == 0:
-                    new_instructions.append(insn)
+                if _count_open_paren_symbols(stmt) == 0:
+                    new_statements.append(stmt)
                 else:
-                    insn_buffer = insn
+                    stmt_buffer = stmt
 
-    if insn_buffer is not None:
-        raise LoopyError("unclosed paren-like delimiter at end of 'instructions' "
+    if stmt_buffer is not None:
+        raise LoopyError("unclosed paren-like delimiter at end of 'statements' "
                 "while attempting to join lines by paren-like delimiters")
 
     # }}}
 
-    instructions = new_instructions
-    new_instructions = []
+    statements = new_statements
+    new_statements = []
 
     # {{{ pass 3: defines
 
-    for insn in instructions:
-        if isinstance(insn, InstructionBase):
-            new_instructions.append(insn)
+    for stmt in statements:
+        if isinstance(stmt, StatementBase):
+            new_statements.append(stmt)
         else:
-            for sub_insn in expand_defines(insn, defines, single_valued=False):
-                new_instructions.append(sub_insn)
+            for sub_stmt in expand_defines(stmt, defines, single_valued=False):
+                new_statements.append(sub_stmt)
 
     # }}}
 
-    instructions = new_instructions
-    new_instructions = []
+    statements = new_statements
+    new_statements = []
 
-    inames_to_dup = []  # one for each parsed_instruction
+    inames_to_dup = []  # one for each parsed_statement
 
     # {{{ pass 4: parsing
 
-    insn_options_stack = [get_default_insn_options_dict()]
+    stmt_options_stack = [get_default_stmt_options_dict()]
     if_predicates_stack = [
             {'predicates': frozenset(),
-                'insn_predicates': frozenset()}]
+                'stmt_predicates': frozenset()}]
 
-    for insn in instructions:
-        if isinstance(insn, InstructionBase):
-            local_w_inames = insn_options_stack[-1]["within_inames"]
+    for stmt in statements:
+        if isinstance(stmt, StatementBase):
+            local_w_inames = stmt_options_stack[-1]["within_inames"]
 
-            if insn.within_inames_is_final:
+            if stmt.within_inames_is_final:
                 if not (
-                        local_w_inames <= insn.within_inames):
-                    raise LoopyError("non-parsed instruction '%s' without "
+                        local_w_inames <= stmt.within_inames):
+                    raise LoopyError("non-parsed statement '%s' without "
                             "inames '%s' (but with final iname dependencies) "
                             "found inside 'for'/'with' block for inames "
                             "'%s'"
-                            % (insn.id,
-                                ", ".join(local_w_inames - insn.within_inames),
-                                insn_options_stack[-1].within_inames))
+                            % (stmt.id,
+                                ", ".join(local_w_inames - stmt.within_inames),
+                                stmt_options_stack[-1].within_inames))
 
             else:
                 # not final, add inames from current scope
                 kwargs = {}
-                if insn.id is None:
-                    kwargs["id"] = insn_options_stack[-1]["insn_id"]
+                if stmt.id is None:
+                    kwargs["id"] = stmt_options_stack[-1]["stmt_id"]
 
-                insn = insn.copy(
-                        within_inames=insn.within_inames | local_w_inames,
+                stmt = stmt.copy(
+                        within_inames=stmt.within_inames | local_w_inames,
                         within_inames_is_final=(
                             # If it's inside a for/with block, then it's
                             # final now.
                             bool(local_w_inames)),
                         depends_on=(
-                            (insn.depends_on
-                                | insn_options_stack[-1]["depends_on"])
-                            if insn_options_stack[-1]["depends_on"] is not None
-                            else insn.depends_on),
+                            (stmt.depends_on
+                                | stmt_options_stack[-1]["depends_on"])
+                            if stmt_options_stack[-1]["depends_on"] is not None
+                            else stmt.depends_on),
                         tags=(
-                            insn.tags
-                            | insn_options_stack[-1]["tags"]),
+                            stmt.tags
+                            | stmt_options_stack[-1]["tags"]),
                         predicates=(
-                            insn.predicates
-                            | insn_options_stack[-1]["predicates"]),
+                            stmt.predicates
+                            | stmt_options_stack[-1]["predicates"]),
                         groups=(
-                            insn.groups
-                            | insn_options_stack[-1]["groups"]),
+                            stmt.groups
+                            | stmt_options_stack[-1]["groups"]),
                         conflicts_with_groups=(
-                            insn.groups
-                            | insn_options_stack[-1]["conflicts_with_groups"]),
+                            stmt.groups
+                            | stmt_options_stack[-1]["conflicts_with_groups"]),
                         **kwargs)
 
-            new_instructions.append(insn)
+            new_statements.append(stmt)
             inames_to_dup.append([])
 
             del local_w_inames
 
             continue
 
-        with_options_match = WITH_OPTIONS_RE.match(insn)
+        with_options_match = WITH_OPTIONS_RE.match(stmt)
         if with_options_match is not None:
-            insn_options_stack.append(
-                    parse_insn_options(
-                        insn_options_stack[-1],
+            stmt_options_stack.append(
+                    parse_stmt_options(
+                        stmt_options_stack[-1],
                         with_options_match.group("options")))
             continue
 
-        for_match = FOR_RE.match(insn)
+        for_match = FOR_RE.match(stmt)
         if for_match is not None:
-            options = insn_options_stack[-1].copy()
+            options = stmt_options_stack[-1].copy()
             added_inames = frozenset(
                     iname.strip()
                     for iname in for_match.group("inames").split(",")
@@ -809,13 +809,13 @@ def parse_instructions(instructions, defines):
                     | added_inames)
             options["within_inames_is_final"] = True
 
-            insn_options_stack.append(options)
+            stmt_options_stack.append(options)
             del options
             continue
 
-        if_match = IF_RE.match(insn)
+        if_match = IF_RE.match(stmt)
         if if_match is not None:
-            options = insn_options_stack[-1].copy()
+            options = stmt_options_stack[-1].copy()
             predicate = if_match.group("predicate")
             if not predicate:
                 raise LoopyError("'if' without predicate encountered")
@@ -827,27 +827,27 @@ def parse_instructions(instructions, defines):
                     options.get("predicates", frozenset())
                     | frozenset([predicate]))
 
-            insn_options_stack.append(options)
+            stmt_options_stack.append(options)
 
             #add to the if_stack
             if_options = options.copy()
-            if_options['insn_predicates'] = options["predicates"]
+            if_options['stmt_predicates'] = options["predicates"]
             if_predicates_stack.append(if_options)
             del options
             del predicate
             continue
 
-        elif_match = ELIF_RE.match(insn)
-        else_match = ELSE_RE.match(insn)
+        elif_match = ELIF_RE.match(stmt)
+        else_match = ELSE_RE.match(stmt)
         if elif_match is not None or else_match is not None:
-            prev_predicates = insn_options_stack[-1].get(
+            prev_predicates = stmt_options_stack[-1].get(
                     "predicates", frozenset())
             last_if_predicates = if_predicates_stack[-1].get(
                     "predicates", frozenset())
-            insn_options_stack.pop()
+            stmt_options_stack.pop()
             if_predicates_stack.pop()
 
-            outer_predicates = insn_options_stack[-1].get(
+            outer_predicates = stmt_options_stack[-1].get(
                     "predicates", frozenset())
             last_if_predicates = last_if_predicates - outer_predicates
 
@@ -867,8 +867,8 @@ def parse_instructions(instructions, defines):
                     raise LoopyError("'else' without 'if'/'elif' encountered")
                 additional_preds = frozenset()
 
-            options = insn_options_stack[-1].copy()
-            if_options = insn_options_stack[-1].copy()
+            options = stmt_options_stack[-1].copy()
+            if_options = stmt_options_stack[-1].copy()
 
             from pymbolic.primitives import LogicalNot
             options["predicates"] = (
@@ -881,9 +881,9 @@ def parse_instructions(instructions, defines):
                     )
             if_options["predicates"] = additional_preds
             #hold on to this for comparison / stack popping later
-            if_options["insn_predicates"] = options["predicates"]
+            if_options["stmt_predicates"] = options["predicates"]
 
-            insn_options_stack.append(options)
+            stmt_options_stack.append(options)
             if_predicates_stack.append(if_options)
 
             del options
@@ -892,53 +892,53 @@ def parse_instructions(instructions, defines):
 
             continue
 
-        if insn == "end":
-            obj = insn_options_stack.pop()
+        if stmt == "end":
+            obj = stmt_options_stack.pop()
             #if this object is the end of an if statement
-            if obj['predicates'] == if_predicates_stack[-1]["insn_predicates"] and\
-                    if_predicates_stack[-1]["insn_predicates"]:
+            if obj['predicates'] == if_predicates_stack[-1]["stmt_predicates"] and\
+                    if_predicates_stack[-1]["stmt_predicates"]:
                 if_predicates_stack.pop()
             continue
 
-        insn_match = SPECIAL_INSN_RE.match(insn)
-        if insn_match is not None:
-            insn, insn_inames_to_dup = parse_special_insn(
-                    insn_match.groupdict(), insn_options_stack[-1])
-            new_instructions.append(insn)
-            inames_to_dup.append(insn_inames_to_dup)
+        stmt_match = SPECIAL_INSN_RE.match(stmt)
+        if stmt_match is not None:
+            stmt, stmt_inames_to_dup = parse_special_stmt(
+                    stmt_match.groupdict(), stmt_options_stack[-1])
+            new_statements.append(stmt)
+            inames_to_dup.append(stmt_inames_to_dup)
             continue
 
-        subst_match = SUBST_RE.match(insn)
+        subst_match = SUBST_RE.match(stmt)
         if subst_match is not None:
             subst = parse_subst_rule(subst_match.groupdict())
             substitutions[subst.name] = subst
             continue
 
-        insn_match = INSN_RE.match(insn)
-        if insn_match is not None:
-            insn, insn_inames_to_dup = parse_insn(
-                    insn_match.groupdict(), insn_options_stack[-1])
-            new_instructions.append(insn)
-            inames_to_dup.append(insn_inames_to_dup)
+        stmt_match = INSN_RE.match(stmt)
+        if stmt_match is not None:
+            stmt, stmt_inames_to_dup = parse_stmt(
+                    stmt_match.groupdict(), stmt_options_stack[-1])
+            new_statements.append(stmt)
+            inames_to_dup.append(stmt_inames_to_dup)
             continue
 
-        insn_match = EMPTY_LHS_INSN_RE.match(insn)
-        if insn_match is not None:
-            insn, insn_inames_to_dup = parse_insn(
-                    insn_match.groupdict(), insn_options_stack[-1])
-            new_instructions.append(insn)
-            inames_to_dup.append(insn_inames_to_dup)
+        stmt_match = EMPTY_LHS_INSN_RE.match(stmt)
+        if stmt_match is not None:
+            stmt, stmt_inames_to_dup = parse_stmt(
+                    stmt_match.groupdict(), stmt_options_stack[-1])
+            new_statements.append(stmt)
+            inames_to_dup.append(stmt_inames_to_dup)
             continue
 
-        raise LoopyError("instruction parse error: %s" % insn)
+        raise LoopyError("statement parse error: %s" % stmt)
 
-    if len(insn_options_stack) != 1:
+    if len(stmt_options_stack) != 1:
         raise LoopyError("unbalanced number of 'for'/'with' and 'end' "
                 "declarations")
 
     # }}}
 
-    return new_instructions, inames_to_dup, substitutions
+    return new_statements, inames_to_dup, substitutions
 
 # }}}
 
@@ -1050,10 +1050,10 @@ class IndexRankFinder(CSECachingMapperMixin, WalkMapper):
 
 
 class ArgumentGuesser:
-    def __init__(self, domains, instructions, temporary_variables,
+    def __init__(self, domains, statements, temporary_variables,
             subst_rules, default_offset):
         self.domains = domains
-        self.instructions = instructions
+        self.statements = statements
         self.temporary_variables = temporary_variables
         self.subst_rules = subst_rules
         self.default_offset = default_offset
@@ -1073,15 +1073,15 @@ class ArgumentGuesser:
         self.all_names = set()
         self.all_written_names = set()
         from loopy.symbolic import get_dependencies
-        for insn in instructions:
-            if isinstance(insn, MultiAssignmentBase):
-                for assignee_var_name in insn.assignee_var_names():
+        for stmt in statements:
+            if isinstance(stmt, MultiAssignmentBase):
+                for assignee_var_name in stmt.assignee_var_names():
                     self.all_written_names.add(assignee_var_name)
 
                 self.all_names.update(get_dependencies(
-                    self.submap(insn.assignees)))
+                    self.submap(stmt.assignees)))
                 self.all_names.update(get_dependencies(
-                    self.submap(insn.expression)))
+                    self.submap(stmt.expression)))
 
     def find_index_rank(self, name):
         irf = IndexRankFinder(name)
@@ -1090,8 +1090,8 @@ class ArgumentGuesser:
             irf(self.submap(expr))
             return expr
 
-        for insn in self.instructions:
-            insn.with_transformed_expressions(run_irf)
+        for stmt in self.statements:
+            stmt.with_transformed_expressions(run_irf)
 
         if not irf.index_ranks:
             return 0
@@ -1145,11 +1145,11 @@ class ArgumentGuesser:
 
         temp_var_names = set(six.iterkeys(self.temporary_variables))
 
-        for insn in self.instructions:
-            if isinstance(insn, MultiAssignmentBase):
+        for stmt in self.statements:
+            if isinstance(stmt, MultiAssignmentBase):
                 for assignee_var_name, temp_var_type in zip(
-                        insn.assignee_var_names(),
-                        insn.temp_var_types):
+                        stmt.assignee_var_names(),
+                        stmt.temp_var_types):
                     if temp_var_type is not None:
                         temp_var_names.add(assignee_var_name)
 
@@ -1206,14 +1206,14 @@ def check_for_duplicate_names(knl):
 
 
 def check_for_nonexistent_iname_deps(knl):
-    for insn in knl.instructions:
-        if not set(insn.within_inames) <= knl.all_inames():
-            raise ValueError("In instruction '%s': "
+    for stmt in knl.statements:
+        if not set(stmt.within_inames) <= knl.all_inames():
+            raise ValueError("In statement '%s': "
                     "cannot force dependency on inames '%s'--"
                     "they don't exist" % (
-                        insn.id,
+                        stmt.id,
                         ",".join(
-                            set(insn.within_inames)-knl.all_inames())))
+                            set(stmt.within_inames)-knl.all_inames())))
 
 
 def check_for_multiple_writes_to_loop_bounds(knl):
@@ -1240,8 +1240,8 @@ def check_written_variable_names(knl):
             set(arg.name for arg in knl.args)
             | set(six.iterkeys(knl.temporary_variables)))
 
-    for insn in knl.instructions:
-        for var_name in insn.assignee_var_names():
+    for stmt in knl.statements:
+        for var_name in stmt.assignee_var_names():
             if var_name not in admissible_vars:
                 raise RuntimeError("variable '%s' not declared or not "
                         "allowed for writing" % var_name)
@@ -1284,7 +1284,7 @@ class CSEToAssignmentMapper(IdentityMapper):
             return var
 
 
-def expand_cses(instructions, inames_to_dup, cse_prefix="cse_expr"):
+def expand_cses(statements, inames_to_dup, cse_prefix="cse_expr"):
     def add_assignment(base_name, expr, dtype, additional_inames):
         if base_name is None:
             base_name = "var"
@@ -1305,47 +1305,47 @@ def expand_cses(instructions, inames_to_dup, cse_prefix="cse_expr"):
                 shape=()))
 
         from pymbolic.primitives import Variable
-        new_insn = Assignment(
+        new_stmt = Assignment(
                 id=None,
                 assignee=Variable(new_var_name),
                 expression=expr,
-                predicates=insn.predicates,
-                within_inames=insn.within_inames | additional_inames,
-                within_inames_is_final=insn.within_inames_is_final,
+                predicates=stmt.predicates,
+                within_inames=stmt.within_inames | additional_inames,
+                within_inames_is_final=stmt.within_inames_is_final,
                 )
-        newly_created_insn_ids.add(new_insn.id)
-        new_insns.append(new_insn)
-        if insn_inames_to_dup:
+        newly_created_stmt_ids.add(new_stmt.id)
+        new_stmts.append(new_stmt)
+        if stmt_inames_to_dup:
             raise LoopyError("in-line iname duplication not allowed in "
-                    "an instruction containing a tagged common "
-                    "subexpression (found in instruction '%s')"
-                    % insn)
+                    "an statement containing a tagged common "
+                    "subexpression (found in statement '%s')"
+                    % stmt)
 
-        new_inames_to_dup.append(insn_inames_to_dup)
+        new_inames_to_dup.append(stmt_inames_to_dup)
 
         return new_var_name
 
     cseam = CSEToAssignmentMapper(add_assignment=add_assignment)
 
-    new_insns = []
+    new_stmts = []
     new_inames_to_dup = []
 
     from pytools import UniqueNameGenerator
     var_name_gen = UniqueNameGenerator(forced_prefix=cse_prefix)
 
-    newly_created_insn_ids = set()
+    newly_created_stmt_ids = set()
     new_temp_vars = []
 
-    for insn, insn_inames_to_dup in zip(instructions, inames_to_dup):
-        if isinstance(insn, MultiAssignmentBase):
-            new_insns.append(insn.copy(
-                expression=cseam(insn.expression, frozenset())))
-            new_inames_to_dup.append(insn_inames_to_dup)
+    for stmt, stmt_inames_to_dup in zip(statements, inames_to_dup):
+        if isinstance(stmt, MultiAssignmentBase):
+            new_stmts.append(stmt.copy(
+                expression=cseam(stmt.expression, frozenset())))
+            new_inames_to_dup.append(stmt_inames_to_dup)
         else:
-            new_insns.append(insn)
-            new_inames_to_dup.append(insn_inames_to_dup)
+            new_stmts.append(stmt)
+            new_inames_to_dup.append(stmt_inames_to_dup)
 
-    return new_insns, new_inames_to_dup, new_temp_vars
+    return new_stmts, new_inames_to_dup, new_temp_vars
 
 # }}}
 
@@ -1353,25 +1353,25 @@ def expand_cses(instructions, inames_to_dup, cse_prefix="cse_expr"):
 # {{{ add_sequential_dependencies
 
 def add_sequential_dependencies(knl):
-    new_insns = []
-    prev_insn = None
-    for insn in knl.instructions:
-        depon = insn.depends_on
+    new_stmts = []
+    prev_stmt = None
+    for stmt in knl.statements:
+        depon = stmt.depends_on
         if depon is None:
             depon = frozenset()
 
-        if prev_insn is not None:
-            depon = depon | frozenset((prev_insn.id,))
+        if prev_stmt is not None:
+            depon = depon | frozenset((prev_stmt.id,))
 
-        insn = insn.copy(
+        stmt = stmt.copy(
                 depends_on=depon,
                 depends_on_is_final=True)
 
-        new_insns.append(insn)
+        new_stmts.append(stmt)
 
-        prev_insn = insn
+        prev_stmt = stmt
 
-    return knl.copy(instructions=new_insns)
+    return knl.copy(statements=new_stmts)
 
 # }}}
 
@@ -1379,16 +1379,16 @@ def add_sequential_dependencies(knl):
 # {{{ temporary variable creation
 
 def create_temporaries(knl, default_order):
-    new_insns = []
+    new_stmts = []
     new_temp_vars = knl.temporary_variables.copy()
 
     import loopy as lp
 
-    for insn in knl.instructions:
-        if isinstance(insn, MultiAssignmentBase):
+    for stmt in knl.statements:
+        if isinstance(stmt, MultiAssignmentBase):
             for assignee_name, temp_var_type in zip(
-                    insn.assignee_var_names(),
-                    insn.temp_var_types):
+                    stmt.assignee_var_names(),
+                    stmt.temp_var_types):
 
                 if temp_var_type is None:
                     continue
@@ -1412,15 +1412,15 @@ def create_temporaries(knl, default_order):
                         order=default_order,
                         target=knl.target)
 
-                if isinstance(insn, Assignment):
-                    insn = insn.copy(temp_var_type=None)
+                if isinstance(stmt, Assignment):
+                    stmt = stmt.copy(temp_var_type=None)
                 else:
-                    insn = insn.copy(temp_var_types=None)
+                    stmt = stmt.copy(temp_var_types=None)
 
-        new_insns.append(insn)
+        new_stmts.append(stmt)
 
     return knl.copy(
-            instructions=new_insns,
+            statements=new_stmts,
             temporary_variables=new_temp_vars)
 
 # }}}
@@ -1489,9 +1489,9 @@ def determine_shapes_of_temporaries(knl):
             vars_needing_shape_inference.add(tv.name)
 
     def feed_all_expressions(receiver):
-        for insn in knl.instructions:
-            insn.with_transformed_expressions(
-                lambda expr: receiver(expr, knl.insn_inames(insn)))
+        for stmt in knl.statements:
+            stmt.with_transformed_expressions(
+                lambda expr: receiver(expr, knl.stmt_inames(stmt)))
 
     var_to_base_indices, var_to_shape, var_to_error = (
         find_shapes_of_vars(
@@ -1509,14 +1509,14 @@ def determine_shapes_of_temporaries(knl):
                              "shape of temporary '%s' because: %s"
                              % (varname, err))
 
-        def feed_assignee_of_instruction(receiver):
-            for insn in knl.instructions:
-                for assignee in insn.assignees:
-                    receiver(assignee, knl.insn_inames(insn))
+        def feed_assignee_of_statement(receiver):
+            for stmt in knl.statements:
+                for assignee in stmt.assignees:
+                    receiver(assignee, knl.stmt_inames(stmt))
 
         var_to_base_indices_fallback, var_to_shape_fallback, var_to_error = (
             find_shapes_of_vars(
-                    knl, vars_needing_shape_inference, feed_assignee_of_instruction))
+                    knl, vars_needing_shape_inference, feed_assignee_of_statement))
 
         if len(var_to_error) > 0:
             # No way around errors: propagate an exception upward.
@@ -1622,10 +1622,10 @@ def apply_default_order_to_args(kernel, default_order):
 # }}}
 
 
-# {{{ resolve instruction dependencies
+# {{{ resolve statement dependencies
 
-def _resolve_dependencies(knl, insn, deps):
-    from loopy import find_instructions
+def _resolve_dependencies(knl, stmt, deps):
+    from loopy import find_statements
     from loopy.match import MatchExpressionBase
 
     new_deps = []
@@ -1634,45 +1634,45 @@ def _resolve_dependencies(knl, insn, deps):
         found_any = False
 
         if isinstance(dep, MatchExpressionBase):
-            for new_dep in find_instructions(knl, dep):
-                if new_dep.id != insn.id:
+            for new_dep in find_statements(knl, dep):
+                if new_dep.id != stmt.id:
                     new_deps.append(new_dep.id)
                     found_any = True
         else:
             from fnmatch import fnmatchcase
-            for other_insn in knl.instructions:
-                if fnmatchcase(other_insn.id, dep):
-                    new_deps.append(other_insn.id)
+            for other_stmt in knl.statements:
+                if fnmatchcase(other_stmt.id, dep):
+                    new_deps.append(other_stmt.id)
                     found_any = True
 
         if not found_any and knl.options.check_dep_resolution:
-            raise LoopyError("instruction '%s' declared a depency on '%s', "
-                    "which did not resolve to any instruction present in the "
+            raise LoopyError("statement '%s' declared a depency on '%s', "
+                    "which did not resolve to any statement present in the "
                     "kernel '%s'. Set the kernel option 'check_dep_resolution'"
-                    "to False to disable this check." % (insn.id, dep, knl.name))
+                    "to False to disable this check." % (stmt.id, dep, knl.name))
 
     for dep_id in new_deps:
-        if dep_id not in knl.id_to_insn:
-            raise LoopyError("instruction '%s' depends on instruction id '%s', "
-                    "which was not found" % (insn.id, dep_id))
+        if dep_id not in knl.id_to_stmt:
+            raise LoopyError("statement '%s' depends on statement id '%s', "
+                    "which was not found" % (stmt.id, dep_id))
 
     return frozenset(new_deps)
 
 
 def resolve_dependencies(knl):
-    new_insns = []
+    new_stmts = []
 
-    for insn in knl.instructions:
-        new_insns.append(insn.copy(
-                    depends_on=_resolve_dependencies(knl, insn, insn.depends_on),
+    for stmt in knl.statements:
+        new_stmts.append(stmt.copy(
+                    depends_on=_resolve_dependencies(knl, stmt, stmt.depends_on),
                     no_sync_with=frozenset(
-                        (resolved_insn_id, nosync_scope)
-                        for nosync_dep, nosync_scope in insn.no_sync_with
-                        for resolved_insn_id in
-                        _resolve_dependencies(knl, insn, (nosync_dep,))),
+                        (resolved_stmt_id, nosync_scope)
+                        for nosync_dep, nosync_scope in stmt.no_sync_with
+                        for resolved_stmt_id in
+                        _resolve_dependencies(knl, stmt, (nosync_dep,))),
                     ))
 
-    return knl.copy(instructions=new_insns)
+    return knl.copy(statements=new_stmts)
 
 # }}}
 
@@ -1680,20 +1680,20 @@ def resolve_dependencies(knl):
 # {{{ add used inames deps
 
 def add_used_inames(knl):
-    new_insns = []
+    new_stmts = []
 
-    for insn in knl.instructions:
-        deps = insn.read_dependency_names() | insn.write_dependency_names()
+    for stmt in knl.statements:
+        deps = stmt.read_dependency_names() | stmt.write_dependency_names()
         iname_deps = deps & knl.all_inames()
 
-        new_within_inames = insn.within_inames | iname_deps
+        new_within_inames = stmt.within_inames | iname_deps
 
-        if new_within_inames != insn.within_inames:
-            insn = insn.copy(within_inames=new_within_inames)
+        if new_within_inames != stmt.within_inames:
+            stmt = stmt.copy(within_inames=new_within_inames)
 
-        new_insns.append(insn)
+        new_stmts.append(stmt)
 
-    return knl.copy(instructions=new_insns)
+    return knl.copy(statements=new_stmts)
 
 # }}}
 
@@ -1701,12 +1701,12 @@ def add_used_inames(knl):
 # {{{ add inferred iname deps
 
 def add_inferred_inames(knl):
-    from loopy.kernel.tools import find_all_insn_inames
-    insn_inames = find_all_insn_inames(knl)
+    from loopy.kernel.tools import find_all_stmt_inames
+    stmt_inames = find_all_stmt_inames(knl)
 
-    return knl.copy(instructions=[
-            insn.copy(within_inames=insn_inames[insn.id])
-            for insn in knl.instructions])
+    return knl.copy(statements=[
+            stmt.copy(within_inames=stmt_inames[stmt.id])
+            for stmt in knl.statements])
 
 # }}}
 
@@ -1726,18 +1726,18 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True):
     var_names = arg_names | set(six.iterkeys(kernel.temporary_variables))
 
     dep_map = dict(
-            (insn.id, insn.read_dependency_names() & var_names)
-            for insn in expanded_kernel.instructions)
+            (stmt.id, stmt.read_dependency_names() & var_names)
+            for stmt in expanded_kernel.statements)
 
-    new_insns = []
-    for insn in kernel.instructions:
-        if not insn.depends_on_is_final:
+    new_stmts = []
+    for stmt in kernel.statements:
+        if not stmt.depends_on_is_final:
             auto_deps = set()
 
             # {{{ add automatic dependencies
 
             all_my_var_writers = set()
-            for var in dep_map[insn.id]:
+            for var in dep_map[stmt.id]:
                 var_writers = writer_map.get(var, set())
                 all_my_var_writers |= var_writers
 
@@ -1751,11 +1751,11 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True):
                 if len(var_writers) == 1:
                     auto_deps.update(
                             var_writers
-                            - set([insn.id]))
+                            - set([stmt.id]))
 
             # }}}
 
-            depends_on = insn.depends_on
+            depends_on = stmt.depends_on
             if depends_on is None:
                 depends_on = frozenset()
 
@@ -1764,26 +1764,26 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True):
             if warn_if_used and new_deps != depends_on:
                 warn_with_kernel(kernel, "single_writer_after_creation",
                         "The single-writer dependency heuristic added dependencies "
-                        "on instruction ID(s) '%s' to instruction ID '%s' after "
+                        "on statement ID(s) '%s' to statement ID '%s' after "
                         "kernel creation is complete. This is deprecated and "
                         "may stop working in the future. "
-                        "To fix this, ensure that instruction dependencies "
+                        "To fix this, ensure that statement dependencies "
                         "are added/resolved as soon as possible, ideally at kernel "
                         "creation time."
-                        % (", ".join(new_deps - depends_on), insn.id))
+                        % (", ".join(new_deps - depends_on), stmt.id))
 
-            insn = insn.copy(depends_on=new_deps)
+            stmt = stmt.copy(depends_on=new_deps)
 
-        new_insns.append(insn)
+        new_stmts.append(stmt)
 
-    return kernel.copy(instructions=new_insns)
+    return kernel.copy(statements=new_stmts)
 
 # }}}
 
 
 # {{{ kernel creation top-level
 
-def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
+def make_kernel(domains, statements, kernel_data=["..."], **kwargs):
     """User-facing kernel creation entrypoint.
 
     :arg domains:
@@ -1792,9 +1792,9 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
         representing the :ref:`domain-tree`. May also be a list of strings
         which will be parsed into such instances according to :ref:`isl-syntax`.
 
-    :arg instructions:
+    :arg statements:
 
-        A list of :class:`Assignment` (or other :class:`InstructionBase`
+        A list of :class:`Assignment` (or other :class:`StatementBase`
         subclasses), possibly intermixed with instances of
         :class:`SubstitutionRule`. This same list may also contain strings
         which will be parsed into such objects using the
@@ -1857,7 +1857,7 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
     :arg target: an instance of :class:`loopy.TargetBase`, or *None*,
         to use the default target.
     :arg seq_dependencies: If *True*, dependencies that sequentially
-        connect the given *instructions* will be added. Defaults to
+        connect the given *statements* will be added. Defaults to
         *False*.
     :arg fixed_parameters: A dictionary of *name*/*value* pairs, where *name*
         will be fixed to *value*. *name* may refer to :ref:`domain-parameters`
@@ -1948,8 +1948,8 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
 
     # }}}
 
-    instructions, inames_to_dup, substitutions = \
-            parse_instructions(instructions, defines)
+    statements, inames_to_dup, substitutions = \
+            parse_statements(statements, defines)
 
     # {{{ find/create isl_context
 
@@ -1959,15 +1959,15 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
 
     # }}}
 
-    instructions, inames_to_dup, cse_temp_vars = expand_cses(
-            instructions, inames_to_dup)
+    statements, inames_to_dup, cse_temp_vars = expand_cses(
+            statements, inames_to_dup)
     for tv in cse_temp_vars:
         temporary_variables[tv.name] = tv
     del cse_temp_vars
 
     domains = parse_domains(domains, defines)
 
-    arg_guesser = ArgumentGuesser(domains, instructions,
+    arg_guesser = ArgumentGuesser(domains, statements,
             temporary_variables, substitutions,
             default_offset)
 
@@ -1977,29 +1977,29 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
     kwargs["substitutions"] = substitutions
 
     from loopy.kernel import LoopKernel
-    knl = LoopKernel(domains, instructions, kernel_args,
+    knl = LoopKernel(domains, statements, kernel_args,
             temporary_variables=temporary_variables,
             silenced_warnings=silenced_warnings,
             options=options,
             target=target,
             **kwargs)
 
-    from loopy.transform.instruction import uniquify_instruction_ids
-    knl = uniquify_instruction_ids(knl)
-    from loopy.check import check_for_duplicate_insn_ids
-    check_for_duplicate_insn_ids(knl)
+    from loopy.transform.statement import uniquify_statement_ids
+    knl = uniquify_statement_ids(knl)
+    from loopy.check import check_for_duplicate_stmt_ids
+    check_for_duplicate_stmt_ids(knl)
 
     if seq_dependencies:
         knl = add_sequential_dependencies(knl)
 
-    assert len(knl.instructions) == len(inames_to_dup)
+    assert len(knl.statements) == len(inames_to_dup)
 
     from loopy import duplicate_inames
     from loopy.match import Id
-    for insn, insn_inames_to_dup in zip(knl.instructions, inames_to_dup):
-        for old_iname, new_iname in insn_inames_to_dup:
+    for stmt, stmt_inames_to_dup in zip(knl.statements, inames_to_dup):
+        for old_iname, new_iname in stmt_inames_to_dup:
             knl = duplicate_inames(knl, old_iname,
-                    within=Id(insn.id), new_inames=new_iname)
+                    within=Id(stmt.id), new_inames=new_iname)
 
     check_for_nonexistent_iname_deps(knl)
 
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index 44cbdea49..42c0a6b75 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -30,8 +30,8 @@ import numpy as np  # noqa
 from pytools import ImmutableRecord
 from loopy.kernel.array import ArrayBase
 from loopy.diagnostic import LoopyError
-from loopy.kernel.instruction import (  # noqa
-        InstructionBase,
+from loopy.kernel.statement import (  # noqa
+        StatementBase,
         memory_ordering,
         memory_scope,
         VarAtomicity,
@@ -39,10 +39,10 @@ from loopy.kernel.instruction import (  # noqa
         AtomicUpdate,
         MultiAssignmentBase,
         Assignment,
-        ExpressionInstruction,
-        CallInstruction,
+        ExpressionStatement,
+        CallStatement,
         make_assignment,
-        CInstruction)
+        CStatement)
 
 
 class auto(object):  # noqa
@@ -70,7 +70,7 @@ class IndexTag(ImmutableRecord):
     @property
     def key(self):
         """Return a hashable, comparable value that is used to ensure
-        per-instruction uniqueness of this unique iname tag.
+        per-statement uniqueness of this unique iname tag.
 
         Also used for persistent hash construction.
         """
diff --git a/loopy/kernel/instruction.py b/loopy/kernel/statement.py
similarity index 88%
rename from loopy/kernel/instruction.py
rename to loopy/kernel/statement.py
index e6b093785..f23815ccb 100644
--- a/loopy/kernel/instruction.py
+++ b/loopy/kernel/statement.py
@@ -28,10 +28,10 @@ from loopy.diagnostic import LoopyError
 from warnings import warn
 
 
-# {{{ instructions: base class
+# {{{ statements: base class
 
-class InstructionBase(ImmutableRecord):
-    """A base class for all types of instruction that can occur in
+class StatementBase(ImmutableRecord):
+    """A base class for all types of statement that can occur in
     a kernel.
 
     .. attribute:: id
@@ -39,26 +39,26 @@ class InstructionBase(ImmutableRecord):
         An (otherwise meaningless) identifier that is unique within
         a :class:`loopy.kernel.LoopKernel`.
 
-    .. rubric:: Instruction ordering
+    .. rubric:: Statement ordering
 
     .. attribute:: depends_on
 
-        a :class:`frozenset` of :attr:`id` values of :class:`Instruction` instances
+        a :class:`frozenset` of :attr:`id` values of :class:`Statement` instances
         that *must* be executed before this one. Note that
         :func:`loopy.preprocess_kernel` (usually invoked automatically)
         augments this by adding dependencies on any writes to temporaries read
-        by this instruction.
+        by this statement.
 
         May be *None* to invoke the default.
 
         There are two extensions to this:
 
         - You may use `*` as a wildcard in the given IDs. This will be expanded
-          to all matching instruction IDs during :func:`loopy.make_kernel`.
-        - Instead of an instruction ID, you may pass an instance of
+          to all matching statement IDs during :func:`loopy.make_kernel`.
+        - Instead of an statement ID, you may pass an instance of
           :class:`loopy.match.MatchExpressionBase` into the :attr:`depends_on`
           :class:`frozenset`. The given expression will be used to add any
-          matching instructions in the kernel to :attr:`depends_on` during
+          matching statements in the kernel to :attr:`depends_on` during
           :func:`loopy.make_kernel`. Note, that this is not meant as a user-facing
           interface.
 
@@ -71,16 +71,16 @@ class InstructionBase(ImmutableRecord):
 
     .. attribute:: groups
 
-        A :class:`frozenset` of strings indicating the names of 'instruction
-        groups' of which this instruction is a part. An instruction group is
-        considered 'active' as long as one (but not all) instructions of the
+        A :class:`frozenset` of strings indicating the names of 'statement
+        groups' of which this statement is a part. An statement group is
+        considered 'active' as long as one (but not all) statements of the
         group have been executed.
 
     .. attribute:: conflicts_with_groups
 
-        A :class:`frozenset` of strings indicating which instruction groups
-        (see :class:`InstructionBase.groups`) may not be active when this
-        instruction is scheduled.
+        A :class:`frozenset` of strings indicating which statement groups
+        (see :class:`StatementBase.groups`) may not be active when this
+        statement is scheduled.
 
     .. attribute:: priority
 
@@ -91,8 +91,8 @@ class InstructionBase(ImmutableRecord):
 
     .. attribute:: no_sync_with
 
-        a :class:`frozenset` of tuples of the form `(insn_id, scope)`, where
-        `insn_id` refers to :attr:`id` of :class:`Instruction` instances
+        a :class:`frozenset` of tuples of the form `(stmt_id, scope)`, where
+        `stmt_id` refers to :attr:`id` of :class:`Statement` instances
         and `scope` is one of the following strings:
 
            - `"local"`
@@ -100,10 +100,10 @@ class InstructionBase(ImmutableRecord):
            - `"any"`.
 
         This indicates no barrier synchronization is necessary with the given
-        instruction using barriers of type `scope`, even given the existence of
+        statement using barriers of type `scope`, even given the existence of
         a dependency chain and apparently conflicting access.
 
-        Note, that :attr:`no_sync_with` allows instruction matching through wildcards
+        Note, that :attr:`no_sync_with` allows statement matching through wildcards
         and match expression, just like :attr:`depends_on`.
 
     .. rubric:: Conditionals
@@ -111,7 +111,7 @@ class InstructionBase(ImmutableRecord):
     .. attribute:: predicates
 
         a :class:`frozenset` of expressions. The conjunction (logical and) of
-        their truth values (as defined by C) determines whether this instruction
+        their truth values (as defined by C) determines whether this statement
         should be run.
 
     .. rubric:: Iname dependencies
@@ -119,7 +119,7 @@ class InstructionBase(ImmutableRecord):
     .. attribute:: within_inames
 
         A :class:`frozenset` of inames identifying the loops within which this
-        instruction will be executed.
+        statement will be executed.
 
     .. rubric:: Iname dependencies
 
@@ -128,7 +128,7 @@ class InstructionBase(ImmutableRecord):
     .. attribute:: tags
 
         A :class:`frozenset` of string identifiers that can be used to
-        identify groups of instructions.
+        identify groups of statements.
 
         Tags starting with exclamation marks (``!``) are reserved and may have
         specific meanings defined by :mod:`loopy` or its targets.
@@ -164,19 +164,19 @@ class InstructionBase(ImmutableRecord):
             within_inames_is_final, within_inames,
             priority,
             boostable, boostable_into, predicates, tags,
-            insn_deps=None, insn_deps_is_final=None,
+            stmt_deps=None, stmt_deps_is_final=None,
             forced_iname_deps=None, forced_iname_deps_is_final=None):
 
         # {{{ backwards compatibility goop
 
-        if depends_on is not None and insn_deps is not None:
-            raise LoopyError("may not specify both insn_deps and depends_on")
-        elif insn_deps is not None:
-            warn("insn_deps is deprecated, use depends_on",
+        if depends_on is not None and stmt_deps is not None:
+            raise LoopyError("may not specify both stmt_deps and depends_on")
+        elif stmt_deps is not None:
+            warn("stmt_deps is deprecated, use depends_on",
                     DeprecationWarning, stacklevel=2)
 
-            depends_on = insn_deps
-            depends_on_is_final = insn_deps_is_final
+            depends_on = stmt_deps
+            depends_on_is_final = stmt_deps_is_final
 
         if forced_iname_deps is not None and within_inames is not None:
             raise LoopyError("may not specify both forced_iname_deps "
@@ -282,16 +282,16 @@ class InstructionBase(ImmutableRecord):
     # {{{ backwards compatibility goop
 
     @property
-    def insn_deps(self):
-        warn("insn_deps is deprecated, use depends_on",
+    def stmt_deps(self):
+        warn("stmt_deps is deprecated, use depends_on",
                 DeprecationWarning, stacklevel=2)
 
         return self.depends_on
 
     # legacy
     @property
-    def insn_deps_is_final(self):
-        warn("insn_deps_is_final is deprecated, use depends_on_is_final",
+    def stmt_deps_is_final(self):
+        warn("stmt_deps_is_final is deprecated, use depends_on_is_final",
                 DeprecationWarning, stacklevel=2)
 
         return self.depends_on_is_final
@@ -349,14 +349,14 @@ class InstructionBase(ImmutableRecord):
     def assignee_name(self):
         """A convenience wrapper around :meth:`assignee_var_names`
         that returns the the name of the variable being assigned.
-        If more than one variable is being modified in the instruction,
+        If more than one variable is being modified in the statement,
         :raise:`ValueError` is raised.
         """
 
         names = self.assignee_var_names()
 
         if len(names) != 1:
-            raise ValueError("expected exactly one assignment in instruction "
+            raise ValueError("expected exactly one assignment in statement "
                     "on which assignee_name is being called, found %d"
                     % len(names))
 
@@ -366,7 +366,7 @@ class InstructionBase(ImmutableRecord):
     @memoize_method
     def write_dependency_names(self):
         """Return a set of dependencies of the left hand side of the
-        assignments performed by this instruction, including written variables
+        assignments performed by this statement, including written variables
         and indices.
         """
 
@@ -393,7 +393,7 @@ class InstructionBase(ImmutableRecord):
         elif self.boostable is None:
             pass
         else:
-            raise RuntimeError("unexpected value for Instruction.boostable")
+            raise RuntimeError("unexpected value for Statement.boostable")
 
         if self.depends_on:
             result.append("dep="+":".join(self.depends_on))
@@ -447,22 +447,22 @@ class InstructionBase(ImmutableRecord):
     # }}}
 
     def copy(self, **kwargs):
-        if "insn_deps" in kwargs:
-            warn("insn_deps is deprecated, use depends_on",
+        if "stmt_deps" in kwargs:
+            warn("stmt_deps is deprecated, use depends_on",
                     DeprecationWarning, stacklevel=2)
 
-            kwargs["depends_on"] = kwargs.pop("insn_deps")
+            kwargs["depends_on"] = kwargs.pop("stmt_deps")
 
-        if "insn_deps_is_final" in kwargs:
-            warn("insn_deps_is_final is deprecated, use depends_on",
+        if "stmt_deps_is_final" in kwargs:
+            warn("stmt_deps_is_final is deprecated, use depends_on",
                     DeprecationWarning, stacklevel=2)
 
-            kwargs["depends_on_is_final"] = kwargs.pop("insn_deps_is_final")
+            kwargs["depends_on_is_final"] = kwargs.pop("stmt_deps_is_final")
 
-        return super(InstructionBase, self).copy(**kwargs)
+        return super(StatementBase, self).copy(**kwargs)
 
     def __setstate__(self, val):
-        super(InstructionBase, self).__setstate__(val)
+        super(StatementBase, self).__setstate__(val)
 
         from loopy.tools import intern_frozenset_of_ids
 
@@ -666,13 +666,13 @@ class AtomicUpdate(VarAtomicity):
 # }}}
 
 
-# {{{ instruction base class: expression rhs
+# {{{ statement base class: expression rhs
 
-class MultiAssignmentBase(InstructionBase):
-    """An assignment instruction with an expression as a right-hand side."""
+class MultiAssignmentBase(StatementBase):
+    """An assignment statement with an expression as a right-hand side."""
 
-    fields = InstructionBase.fields | set(["expression"])
-    pymbolic_fields = InstructionBase.pymbolic_fields | set(["expression"])
+    fields = StatementBase.fields | set(["expression"])
+    pymbolic_fields = StatementBase.pymbolic_fields | set(["expression"])
 
     @memoize_method
     def read_dependency_names(self):
@@ -704,7 +704,7 @@ class MultiAssignmentBase(InstructionBase):
 # }}}
 
 
-# {{{ instruction: assignment
+# {{{ statement: assignment
 
 class Assignment(MultiAssignmentBase):
     """
@@ -774,7 +774,7 @@ class Assignment(MultiAssignmentBase):
             boostable=None, boostable_into=None, tags=None,
             temp_var_type=None, atomicity=(),
             priority=0, predicates=frozenset(),
-            insn_deps=None, insn_deps_is_final=None,
+            stmt_deps=None, stmt_deps_is_final=None,
             forced_iname_deps=None, forced_iname_deps_is_final=None):
 
         super(Assignment, self).__init__(
@@ -791,8 +791,8 @@ class Assignment(MultiAssignmentBase):
                 priority=priority,
                 predicates=predicates,
                 tags=tags,
-                insn_deps=insn_deps,
-                insn_deps_is_final=insn_deps_is_final,
+                stmt_deps=stmt_deps,
+                stmt_deps_is_final=stmt_deps_is_final,
                 forced_iname_deps=forced_iname_deps,
                 forced_iname_deps_is_final=forced_iname_deps_is_final)
 
@@ -812,7 +812,7 @@ class Assignment(MultiAssignmentBase):
         self.temp_var_type = temp_var_type
         self.atomicity = atomicity
 
-    # {{{ implement InstructionBase interface
+    # {{{ implement StatementBase interface
 
     @memoize_method
     def assignee_var_names(self):
@@ -844,7 +844,7 @@ class Assignment(MultiAssignmentBase):
             result += "\n" + 10*" " + "if (%s)" % " && ".join(self.predicates)
         return result
 
-    # {{{ for interface uniformity with CallInstruction
+    # {{{ for interface uniformity with CallStatement
 
     @property
     def temp_var_types(self):
@@ -857,21 +857,21 @@ class Assignment(MultiAssignmentBase):
     # }}}
 
 
-class ExpressionInstruction(Assignment):
+class ExpressionStatement(Assignment):
     def __init__(self, *args, **kwargs):
-        warn("ExpressionInstruction is deprecated. Use Assignment instead",
+        warn("ExpressionStatement is deprecated. Use Assignment instead",
                 DeprecationWarning, stacklevel=2)
 
-        super(ExpressionInstruction, self).__init__(*args, **kwargs)
+        super(ExpressionStatement, self).__init__(*args, **kwargs)
 
 # }}}
 
 
-# {{{ instruction: function call
+# {{{ statement: function call
 
-class CallInstruction(MultiAssignmentBase):
-    """An instruction capturing a function call. Unlike :class:`Assignment`,
-    this instruction supports functions with multiple return values.
+class CallStatement(MultiAssignmentBase):
+    """An statement capturing a function call. Unlike :class:`Assignment`,
+    this statement supports functions with multiple return values.
 
     .. attribute:: assignees
 
@@ -907,11 +907,11 @@ class CallInstruction(MultiAssignmentBase):
             boostable=None, boostable_into=None, tags=None,
             temp_var_types=None,
             priority=0, predicates=frozenset(),
-            insn_deps=None, insn_deps_is_final=None,
+            stmt_deps=None, stmt_deps_is_final=None,
             forced_iname_deps=None,
             forced_iname_deps_is_final=None):
 
-        super(CallInstruction, self).__init__(
+        super(CallStatement, self).__init__(
                 id=id,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
@@ -925,22 +925,22 @@ class CallInstruction(MultiAssignmentBase):
                 priority=priority,
                 predicates=predicates,
                 tags=tags,
-                insn_deps=insn_deps,
-                insn_deps_is_final=insn_deps_is_final,
+                stmt_deps=stmt_deps,
+                stmt_deps_is_final=stmt_deps_is_final,
                 forced_iname_deps=forced_iname_deps,
                 forced_iname_deps_is_final=forced_iname_deps_is_final)
 
         from pymbolic.primitives import Call
         from loopy.symbolic import Reduction
         if not isinstance(expression, (Call, Reduction)) and expression is not None:
-            raise LoopyError("'expression' argument to CallInstruction "
+            raise LoopyError("'expression' argument to CallStatement "
                     "must be a function call")
 
         from loopy.symbolic import parse
         if isinstance(assignees, str):
             assignees = parse(assignees)
         if not isinstance(assignees, tuple):
-            raise LoopyError("'assignees' argument to CallInstruction "
+            raise LoopyError("'assignees' argument to CallStatement "
                     "must be a tuple or a string parseable to a tuple"
                     "--got '%s'" % type(assignees).__name__)
 
@@ -961,7 +961,7 @@ class CallInstruction(MultiAssignmentBase):
         else:
             self.temp_var_types = temp_var_types
 
-    # {{{ implement InstructionBase interface
+    # {{{ implement StatementBase interface
 
     @memoize_method
     def assignee_var_names(self):
@@ -1017,7 +1017,7 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs):
             raise LoopyError("right-hand side in multiple assignment must be "
                     "function call or reduction, got: '%s'" % expression)
 
-        return CallInstruction(
+        return CallStatement(
                 assignees=assignees,
                 expression=expression,
                 temp_var_types=temp_var_types,
@@ -1034,14 +1034,14 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs):
                 **kwargs)
 
 
-# {{{ c instruction
+# {{{ c statement
 
-class CInstruction(InstructionBase):
+class CStatement(StatementBase):
     """
     .. attribute:: iname_exprs
 
         A list of tuples *(name, expr)* of inames or expressions based on them
-        that the instruction needs access to.
+        that the statement needs access to.
 
     .. attribute:: code
 
@@ -1071,11 +1071,11 @@ class CInstruction(InstructionBase):
         figuring out dependencies.
     """
 
-    fields = InstructionBase.fields | \
+    fields = StatementBase.fields | \
             set("iname_exprs code read_variables assignees".split())
-    pymbolic_fields = InstructionBase.pymbolic_fields | \
+    pymbolic_fields = StatementBase.pymbolic_fields | \
             set("iname_exprs assignees".split())
-    pymbolic_set_fields = InstructionBase.pymbolic_set_fields | \
+    pymbolic_set_fields = StatementBase.pymbolic_set_fields | \
             set(["read_variables"])
 
     def __init__(self,
@@ -1087,7 +1087,7 @@ class CInstruction(InstructionBase):
             within_inames_is_final=None, within_inames=None,
             priority=0, boostable=None, boostable_into=None,
             predicates=frozenset(), tags=None,
-            insn_deps=None, insn_deps_is_final=None):
+            stmt_deps=None, stmt_deps_is_final=None):
         """
         :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples,
             simple strings pepresenting inames are also allowed. A single
@@ -1098,7 +1098,7 @@ class CInstruction(InstructionBase):
             sequence of strings parseable into the desired format.
         """
 
-        InstructionBase.__init__(self,
+        StatementBase.__init__(self,
                 id=id,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
@@ -1109,8 +1109,8 @@ class CInstruction(InstructionBase):
                 boostable=boostable,
                 boostable_into=boostable_into,
                 priority=priority, predicates=predicates, tags=tags,
-                insn_deps=insn_deps,
-                insn_deps_is_final=insn_deps_is_final)
+                stmt_deps=stmt_deps,
+                stmt_deps_is_final=stmt_deps_is_final)
 
         # {{{ normalize iname_exprs
 
@@ -1153,7 +1153,7 @@ class CInstruction(InstructionBase):
 
     def read_dependency_names(self):
         result = (
-                super(CInstruction, self).read_dependency_names()
+                super(CStatement, self).read_dependency_names()
                 | frozenset(self.read_variables))
 
         from loopy.symbolic import get_dependencies
@@ -1204,7 +1204,7 @@ class CInstruction(InstructionBase):
 # }}}
 
 
-class _DataObliviousInstruction(InstructionBase):
+class _DataObliviousStatement(StatementBase):
     # {{{ abstract interface
 
     # read_dependency_names inherited
@@ -1230,12 +1230,12 @@ class _DataObliviousInstruction(InstructionBase):
         return ()
 
 
-# {{{ barrier instruction
+# {{{ barrier statement
 
-class NoOpInstruction(_DataObliviousInstruction):
-    """An instruction that carries out no operation. It is mainly
+class NoOpStatement(_DataObliviousStatement):
+    """An statement that carries out no operation. It is mainly
     useful as a way to structure dependencies between other
-    instructions.
+    statements.
 
     The textual syntax in a :mod:`loopy` kernel is::
 
@@ -1249,7 +1249,7 @@ class NoOpInstruction(_DataObliviousInstruction):
             priority=None,
             boostable=None, boostable_into=None,
             predicates=None, tags=None):
-        super(NoOpInstruction, self).__init__(
+        super(NoOpStatement, self).__init__(
                 id=id,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
@@ -1276,10 +1276,10 @@ class NoOpInstruction(_DataObliviousInstruction):
 # }}}
 
 
-# {{{ barrier instruction
+# {{{ barrier statement
 
-class BarrierInstruction(_DataObliviousInstruction):
-    """An instruction that requires synchronization with all
+class BarrierStatement(_DataObliviousStatement):
+    """An statement that requires synchronization with all
     concurrent work items of :attr:`kind`.
 
     .. attribute:: kind
@@ -1292,7 +1292,7 @@ class BarrierInstruction(_DataObliviousInstruction):
         ... lbarrier
     """
 
-    fields = _DataObliviousInstruction.fields | set(["kind"])
+    fields = _DataObliviousStatement.fields | set(["kind"])
 
     def __init__(self, id, depends_on=None, depends_on_is_final=None,
             groups=None, conflicts_with_groups=None,
@@ -1305,7 +1305,7 @@ class BarrierInstruction(_DataObliviousInstruction):
         if predicates:
             raise LoopyError("conditional barriers are not supported")
 
-        super(BarrierInstruction, self).__init__(
+        super(BarrierStatement, self).__init__(
                 id=id,
                 depends_on=depends_on,
                 depends_on_is_final=depends_on_is_final,
diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py
index ad1e71e59..edc154b15 100644
--- a/loopy/kernel/tools.py
+++ b/loopy/kernel/tools.py
@@ -131,30 +131,30 @@ def _add_and_infer_dtypes_overdetermined(knl, dtype_dict):
 # }}}
 
 
-# {{{ find_all_insn_inames fixed point iteration (deprecated)
+# {{{ find_all_stmt_inames fixed point iteration (deprecated)
 
-def guess_iname_deps_based_on_var_use(kernel, insn, insn_id_to_inames=None):
-    # For all variables that insn depends on, find the intersection
-    # of iname deps of all writers, and add those to insn's
+def guess_iname_deps_based_on_var_use(kernel, stmt, stmt_id_to_inames=None):
+    # For all variables that stmt depends on, find the intersection
+    # of iname deps of all writers, and add those to stmt's
     # dependencies.
 
     result = frozenset()
 
     writer_map = kernel.writer_map()
 
-    for tv_name in (insn.read_dependency_names() & kernel.get_written_variables()):
+    for tv_name in (stmt.read_dependency_names() & kernel.get_written_variables()):
         tv_implicit_inames = None
 
         for writer_id in writer_map[tv_name]:
-            writer_insn = kernel.id_to_insn[writer_id]
-            if insn_id_to_inames is None:
-                writer_inames = writer_insn.within_inames
+            writer_stmt = kernel.id_to_stmt[writer_id]
+            if stmt_id_to_inames is None:
+                writer_inames = writer_stmt.within_inames
             else:
-                writer_inames = insn_id_to_inames[writer_id]
+                writer_inames = stmt_id_to_inames[writer_id]
 
             writer_implicit_inames = (
                     writer_inames
-                    - (writer_insn.write_dependency_names() & kernel.all_inames()))
+                    - (writer_stmt.write_dependency_names() & kernel.all_inames()))
             if tv_implicit_inames is None:
                 tv_implicit_inames = writer_implicit_inames
             else:
@@ -164,16 +164,16 @@ def guess_iname_deps_based_on_var_use(kernel, insn, insn_id_to_inames=None):
         if tv_implicit_inames is not None:
             result = result | tv_implicit_inames
 
-    return result - insn.reduction_inames()
+    return result - stmt.reduction_inames()
 
 
-def find_all_insn_inames(kernel):
-    logger.debug("%s: find_all_insn_inames: start" % kernel.name)
+def find_all_stmt_inames(kernel):
+    logger.debug("%s: find_all_stmt_inames: start" % kernel.name)
 
     writer_map = kernel.writer_map()
 
-    insn_id_to_inames = {}
-    insn_assignee_inames = {}
+    stmt_id_to_inames = {}
+    stmt_assignee_inames = {}
 
     all_read_deps = {}
     all_write_deps = {}
@@ -181,30 +181,30 @@ def find_all_insn_inames(kernel):
     from loopy.transform.subst import expand_subst
     kernel = expand_subst(kernel)
 
-    for insn in kernel.instructions:
-        all_read_deps[insn.id] = read_deps = insn.read_dependency_names()
-        all_write_deps[insn.id] = write_deps = insn.write_dependency_names()
+    for stmt in kernel.statements:
+        all_read_deps[stmt.id] = read_deps = stmt.read_dependency_names()
+        all_write_deps[stmt.id] = write_deps = stmt.write_dependency_names()
         deps = read_deps | write_deps
 
-        if insn.within_inames_is_final:
-            iname_deps = insn.within_inames
+        if stmt.within_inames_is_final:
+            iname_deps = stmt.within_inames
         else:
             iname_deps = (
                     deps & kernel.all_inames()
-                    | insn.within_inames)
+                    | stmt.within_inames)
 
-        assert isinstance(read_deps, frozenset), type(insn)
-        assert isinstance(write_deps, frozenset), type(insn)
-        assert isinstance(iname_deps, frozenset), type(insn)
+        assert isinstance(read_deps, frozenset), type(stmt)
+        assert isinstance(write_deps, frozenset), type(stmt)
+        assert isinstance(iname_deps, frozenset), type(stmt)
 
-        logger.debug("%s: find_all_insn_inames: %s (init): %s - "
+        logger.debug("%s: find_all_stmt_inames: %s (init): %s - "
                 "read deps: %s - write deps: %s" % (
-                    kernel.name, insn.id, ", ".join(sorted(iname_deps)),
+                    kernel.name, stmt.id, ", ".join(sorted(iname_deps)),
                     ", ".join(sorted(read_deps)), ", ".join(sorted(write_deps)),
                     ))
 
-        insn_id_to_inames[insn.id] = iname_deps
-        insn_assignee_inames[insn.id] = write_deps & kernel.all_inames()
+        stmt_id_to_inames[stmt.id] = iname_deps
+        stmt_assignee_inames[stmt.id] = write_deps & kernel.all_inames()
 
     # fixed point iteration until all iname dep sets have converged
 
@@ -221,36 +221,36 @@ def find_all_insn_inames(kernel):
 
     while True:
         did_something = False
-        for insn in kernel.instructions:
+        for stmt in kernel.statements:
 
-            if insn.within_inames_is_final:
+            if stmt.within_inames_is_final:
                 continue
 
             # {{{ depdency-based propagation
 
-            inames_old = insn_id_to_inames[insn.id]
+            inames_old = stmt_id_to_inames[stmt.id]
             inames_new = inames_old | guess_iname_deps_based_on_var_use(
-                    kernel, insn, insn_id_to_inames)
+                    kernel, stmt, stmt_id_to_inames)
 
-            insn_id_to_inames[insn.id] = inames_new
+            stmt_id_to_inames[stmt.id] = inames_new
 
             if inames_new != inames_old:
                 did_something = True
 
                 warn_with_kernel(kernel, "inferred_iname",
-                        "The iname(s) '%s' on instruction '%s' "
+                        "The iname(s) '%s' on statement '%s' "
                         "was/were automatically added. "
                         "This is deprecated. Please add the iname "
-                        "to the instruction "
+                        "to the statement "
                         "explicitly, e.g. by adding 'for' loops"
-                        % (", ".join(inames_new-inames_old), insn.id))
+                        % (", ".join(inames_new-inames_old), stmt.id))
 
             # }}}
 
             # {{{ domain-based propagation
 
-            inames_old = insn_id_to_inames[insn.id]
-            inames_new = set(insn_id_to_inames[insn.id])
+            inames_old = stmt_id_to_inames[stmt.id]
+            inames_new = set(stmt_id_to_inames[stmt.id])
 
             for iname in inames_old:
                 home_domain = kernel.domains[kernel.get_home_domain_index(iname)]
@@ -268,31 +268,31 @@ def find_all_insn_inames(kernel):
 
                     if par in kernel.temporary_variables:
                         for writer_id in writer_map.get(par, []):
-                            inames_new.update(insn_id_to_inames[writer_id])
+                            inames_new.update(stmt_id_to_inames[writer_id])
 
             if inames_new != inames_old:
                 did_something = True
-                insn_id_to_inames[insn.id] = frozenset(inames_new)
+                stmt_id_to_inames[stmt.id] = frozenset(inames_new)
 
                 warn_with_kernel(kernel, "inferred_iname",
-                        "The iname(s) '%s' on instruction '%s' was "
+                        "The iname(s) '%s' on statement '%s' was "
                         "automatically added. "
                         "This is deprecated. Please add the iname "
-                        "to the instruction "
+                        "to the statement "
                         "explicitly, e.g. by adding 'for' loops"
-                        % (", ".join(inames_new-inames_old), insn.id))
+                        % (", ".join(inames_new-inames_old), stmt.id))
 
             # }}}
 
         if not did_something:
             break
 
-    logger.debug("%s: find_all_insn_inames: done" % kernel.name)
+    logger.debug("%s: find_all_stmt_inames: done" % kernel.name)
 
-    for v in six.itervalues(insn_id_to_inames):
+    for v in six.itervalues(stmt_id_to_inames):
         assert isinstance(v, frozenset)
 
-    return insn_id_to_inames
+    return stmt_id_to_inames
 
 # }}}
 
@@ -447,17 +447,17 @@ class DomainChanger:
                 # Changing the domain might look like it wants to change grid
                 # sizes. Not true.
                 # (Relevant for 'slab decomposition')
-                overridden_get_grid_sizes_for_insn_ids=(
-                    self.kernel.get_grid_sizes_for_insn_ids))
+                overridden_get_grid_sizes_for_stmt_ids=(
+                    self.kernel.get_grid_sizes_for_stmt_ids))
 
 # }}}
 
 
 # {{{ graphviz / dot export
 
-def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False):
+def get_dot_dependency_graph(kernel, iname_cluster=True, use_stmt_id=False):
     """Return a string in the `dot <http://graphviz.org/>`_ language depicting
-    dependencies among kernel instructions.
+    dependencies among kernel statements.
     """
 
     # make sure all automatically added stuff shows up
@@ -478,34 +478,34 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False):
     dep_graph = {}
     lines = []
 
-    from loopy.kernel.data import MultiAssignmentBase, CInstruction
+    from loopy.kernel.data import MultiAssignmentBase, CStatement
 
-    for insn in kernel.instructions:
-        if isinstance(insn, MultiAssignmentBase):
-            op = "%s <- %s" % (insn.assignees, insn.expression)
+    for stmt in kernel.statements:
+        if isinstance(stmt, MultiAssignmentBase):
+            op = "%s <- %s" % (stmt.assignees, stmt.expression)
             if len(op) > 200:
                 op = op[:200] + "..."
 
-        elif isinstance(insn, CInstruction):
-            op = "<C instruction %s>" % insn.id
+        elif isinstance(stmt, CStatement):
+            op = "<C statement %s>" % stmt.id
         else:
-            op = "<instruction %s>" % insn.id
+            op = "<statement %s>" % stmt.id
 
-        if use_insn_id:
-            insn_label = insn.id
+        if use_stmt_id:
+            stmt_label = stmt.id
             tooltip = op
         else:
-            insn_label = op
-            tooltip = insn.id
+            stmt_label = op
+            tooltip = stmt.id
 
         lines.append("\"%s\" [label=\"%s\",shape=\"box\",tooltip=\"%s\"];"
                 % (
-                    insn.id,
-                    repr(insn_label)[1:-1],
+                    stmt.id,
+                    repr(stmt_label)[1:-1],
                     repr(tooltip)[1:-1],
                     ))
-        for dep in insn.depends_on:
-            dep_graph.setdefault(insn.id, set()).add(dep)
+        for dep in stmt.depends_on:
+            dep_graph.setdefault(stmt.id, set()).add(dep)
 
     # {{{ O(n^3) transitive reduction
 
@@ -513,31 +513,31 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False):
     while True:
         changed_something = False
 
-        for insn_1 in dep_graph:
-            for insn_2 in dep_graph.get(insn_1, set()).copy():
-                for insn_3 in dep_graph.get(insn_2, set()).copy():
-                    if insn_3 not in dep_graph.get(insn_1, set()):
+        for stmt_1 in dep_graph:
+            for stmt_2 in dep_graph.get(stmt_1, set()).copy():
+                for stmt_3 in dep_graph.get(stmt_2, set()).copy():
+                    if stmt_3 not in dep_graph.get(stmt_1, set()):
                         changed_something = True
-                        dep_graph[insn_1].add(insn_3)
+                        dep_graph[stmt_1].add(stmt_3)
 
         if not changed_something:
             break
 
-    for insn_1 in dep_graph:
-        for insn_2 in dep_graph.get(insn_1, set()).copy():
-            for insn_3 in dep_graph.get(insn_2, set()).copy():
-                if insn_3 in dep_graph.get(insn_1, set()):
-                    dep_graph[insn_1].remove(insn_3)
+    for stmt_1 in dep_graph:
+        for stmt_2 in dep_graph.get(stmt_1, set()).copy():
+            for stmt_3 in dep_graph.get(stmt_2, set()).copy():
+                if stmt_3 in dep_graph.get(stmt_1, set()):
+                    dep_graph[stmt_1].remove(stmt_3)
 
     # }}}
 
-    for insn_1 in dep_graph:
-        for insn_2 in dep_graph.get(insn_1, set()):
-            lines.append("%s -> %s" % (insn_2, insn_1))
+    for stmt_1 in dep_graph:
+        for stmt_2 in dep_graph.get(stmt_1, set()):
+            lines.append("%s -> %s" % (stmt_2, stmt_1))
 
     if iname_cluster:
         from loopy.schedule import (
-                EnterLoop, LeaveLoop, RunInstruction, Barrier,
+                EnterLoop, LeaveLoop, RunStatement, Barrier,
                 CallKernel, ReturnFromKernel)
 
         for sched_item in kernel.schedule:
@@ -546,8 +546,8 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False):
                         % (sched_item.iname, sched_item.iname))
             elif isinstance(sched_item, LeaveLoop):
                 lines.append("}")
-            elif isinstance(sched_item, RunInstruction):
-                lines.append(sched_item.insn_id)
+            elif isinstance(sched_item, RunStatement):
+                lines.append(sched_item.stmt_id)
             elif isinstance(sched_item, (CallKernel, ReturnFromKernel, Barrier)):
                 pass
             else:
@@ -672,14 +672,14 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames):
 
     for par in dom_parameters:
         if par in kernel.temporary_variables:
-            writer_insns = kernel.writer_map()[par]
+            writer_stmts = kernel.writer_map()[par]
 
-            if len(writer_insns) > 1:
+            if len(writer_stmts) > 1:
                 raise RuntimeError("loop bound '%s' "
                         "may only be written to once" % par)
 
-            writer_insn, = writer_insns
-            writer_inames = kernel.insn_inames(writer_insn)
+            writer_stmt, = writer_stmts
+            writer_inames = kernel.stmt_inames(writer_stmt)
 
             if writer_inames & inames:
                 return True
@@ -695,7 +695,7 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames):
 
 # {{{ rank inames by stride
 
-def get_auto_axis_iname_ranking_by_stride(kernel, insn):
+def get_auto_axis_iname_ranking_by_stride(kernel, stmt):
     from loopy.kernel.data import ImageArg, ValueArg
 
     approximate_arg_values = {}
@@ -707,14 +707,14 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn):
                 raise LoopyError("No approximate arg value specified for '%s'"
                         % arg.name)
 
-    # {{{ find all array accesses in insn
+    # {{{ find all array accesses in stmt
 
     from loopy.symbolic import ArrayAccessFinder
-    ary_acc_exprs = list(ArrayAccessFinder()(insn.expression))
+    ary_acc_exprs = list(ArrayAccessFinder()(stmt.expression))
 
     from pymbolic.primitives import Subscript
 
-    for assignee in insn.assignees:
+    for assignee in stmt.assignees:
         if isinstance(assignee, Subscript):
             ary_acc_exprs.append(assignee)
 
@@ -742,7 +742,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn):
     from loopy.kernel.data import AutoLocalIndexTagBase
     auto_axis_inames = set(
             iname
-            for iname in kernel.insn_inames(insn)
+            for iname in kernel.stmt_inames(stmt)
             if isinstance(kernel.iname_to_tag.get(iname),
                 AutoLocalIndexTagBase))
 
@@ -802,7 +802,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn):
     if aggregate_strides:
         very_large_stride = int(np.iinfo(np.int32).max)
 
-        return sorted((iname for iname in kernel.insn_inames(insn)),
+        return sorted((iname for iname in kernel.stmt_inames(stmt)),
                 key=lambda iname: (
                     aggregate_strides.get(iname, very_large_stride),
                     iname))
@@ -912,13 +912,13 @@ def assign_automatic_axes(kernel, axis=0, local_size=None):
 
     import loopy as lp
 
-    for insn in kernel.instructions:
-        if not isinstance(insn, lp.MultiAssignmentBase):
+    for stmt in kernel.statements:
+        if not isinstance(stmt, lp.MultiAssignmentBase):
             continue
 
         auto_axis_inames = [
                 iname
-                for iname in kernel.insn_inames(insn)
+                for iname in kernel.stmt_inames(stmt)
                 if isinstance(kernel.iname_to_tag.get(iname),
                     AutoLocalIndexTagBase)]
 
@@ -927,7 +927,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None):
 
         assigned_local_axes = set()
 
-        for iname in kernel.insn_inames(insn):
+        for iname in kernel.stmt_inames(stmt):
             tag = kernel.iname_to_tag.get(iname)
             if isinstance(tag, LocalIndexTag):
                 assigned_local_axes.add(tag.axis)
@@ -936,7 +936,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None):
             # "valid" pass: try to assign a given axis
 
             if axis not in assigned_local_axes:
-                iname_ranking = get_auto_axis_iname_ranking_by_stride(kernel, insn)
+                iname_ranking = get_auto_axis_iname_ranking_by_stride(kernel, stmt)
                 if iname_ranking is not None:
                     for iname in iname_ranking:
                         prev_tag = kernel.iname_to_tag.get(iname)
@@ -963,7 +963,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None):
 
     # }}}
 
-    # We've seen all instructions and not punted to recursion/restart because
+    # We've seen all statements and not punted to recursion/restart because
     # of a new axis assignment.
 
     if axis >= len(local_size):
@@ -1032,12 +1032,12 @@ def guess_var_shape(kernel, var_name):
     submap = SubstitutionRuleExpander(kernel.substitutions)
 
     def run_through_armap(expr):
-        armap(submap(expr), kernel.insn_inames(insn))
+        armap(submap(expr), kernel.stmt_inames(stmt))
         return expr
 
     try:
-        for insn in kernel.instructions:
-            insn.with_transformed_expressions(run_through_armap)
+        for stmt in kernel.statements:
+            stmt.with_transformed_expressions(run_through_armap)
     except TypeError as e:
         from traceback import print_exc
         print_exc()
@@ -1200,9 +1200,9 @@ def get_visual_iname_order_embedding(kernel):
 
     iname_trie = SetTrie()
 
-    for insn in kernel.instructions:
+    for stmt in kernel.statements:
         within_inames = set(
-            iname for iname in insn.within_inames
+            iname for iname in stmt.within_inames
             if iname not in ilp_inames)
         iname_trie.add_or_update(within_inames)
 
@@ -1225,17 +1225,17 @@ def get_visual_iname_order_embedding(kernel):
 
 # {{{ find_recursive_dependencies
 
-def find_recursive_dependencies(kernel, insn_ids):
-    queue = list(insn_ids)
+def find_recursive_dependencies(kernel, stmt_ids):
+    queue = list(stmt_ids)
 
-    result = set(insn_ids)
+    result = set(stmt_ids)
 
     while queue:
         new_queue = []
 
-        for insn_id in queue:
-            insn = kernel.id_to_insn[insn_id]
-            additionals = insn.depends_on - result
+        for stmt_id in queue:
+            stmt = kernel.id_to_stmt[stmt_id]
+            additionals = stmt.depends_on - result
             result.update(additionals)
             new_queue.extend(additionals)
 
@@ -1248,15 +1248,15 @@ def find_recursive_dependencies(kernel, insn_ids):
 
 # {{{ find_reverse_dependencies
 
-def find_reverse_dependencies(kernel, insn_ids):
-    """Finds a set of IDs of instructions that depend on one of the insn_ids.
+def find_reverse_dependencies(kernel, stmt_ids):
+    """Finds a set of IDs of statements that depend on one of the stmt_ids.
 
-    :arg insn_ids: a set of instruction IDs
+    :arg stmt_ids: a set of statement IDs
     """
     return frozenset(
-            insn.id
-            for insn in kernel.instructions
-            if insn.depends_on & insn_ids)
+            stmt.id
+            for stmt in kernel.statements
+            if stmt.depends_on & stmt_ids)
 
 # }}}
 
@@ -1264,28 +1264,28 @@ def find_reverse_dependencies(kernel, insn_ids):
 # {{{ draw_dependencies_as_unicode_arrows
 
 def draw_dependencies_as_unicode_arrows(
-        instructions, fore, style, flag_downward=True, max_columns=20):
+        statements, fore, style, flag_downward=True, max_columns=20):
     """
-    :arg instructions: an ordered iterable of :class:`loopy.InstructionBase`
+    :arg statements: an ordered iterable of :class:`loopy.StatementBase`
         instances
     :arg fore: if given, will be used like a :mod:`colorama` ``Fore`` object
         to color-code dependencies. (E.g. red for downward edges)
     :returns: A list of tuples (arrows, extender) with Unicode-drawn dependency
-        arrows, one per entry of *instructions*. *extender* can be used to
-        extend arrows below the line of an instruction.
+        arrows, one per entry of *statements*. *extender* can be used to
+        extend arrows below the line of an statement.
     """
     reverse_deps = {}
 
-    for insn in instructions:
-        for dep in insn.depends_on:
-            reverse_deps.setdefault(dep, []).append(insn.id)
+    for stmt in statements:
+        for dep in stmt.depends_on:
+            reverse_deps.setdefault(dep, []).append(stmt.id)
 
     # mapping of (from_id, to_id) tuples to column_index
     dep_to_column = {}
 
     # {{{ find column assignments
 
-    # mapping from column indices to (end_insn_id, updown)
+    # mapping from column indices to (end_stmt_id, updown)
     columns_in_use = {}
 
     n_columns = [0]
@@ -1313,28 +1313,28 @@ def draw_dependencies_as_unicode_arrows(
         return result
 
     rows = []
-    for insn in instructions:
+    for stmt in statements:
         row = make_extender()
 
-        for rdep in reverse_deps.get(insn.id, []):
-            assert rdep != insn.id
+        for rdep in reverse_deps.get(stmt.id, []):
+            assert rdep != stmt.id
 
-            dep_key = (rdep, insn.id)
+            dep_key = (rdep, stmt.id)
             if dep_key not in dep_to_column:
                 col = dep_to_column[dep_key] = find_free_column()
                 columns_in_use[col] = (rdep, "up")
                 row[col] = u"↱"
 
-        for dep in insn.depends_on:
-            assert dep != insn.id
-            dep_key = (insn.id, dep)
+        for dep in stmt.depends_on:
+            assert dep != stmt.id
+            dep_key = (stmt.id, dep)
             if dep_key not in dep_to_column:
                 col = dep_to_column[dep_key] = find_free_column()
                 columns_in_use[col] = (dep, "down")
                 row[col] = do_flag_downward(u"┌", "down")
 
         for col, (end, updown) in list(six.iteritems(columns_in_use)):
-            if insn.id == end:
+            if stmt.id == end:
                 del columns_in_use[col]
                 if updown == "up":
                     row[col] = u"└"
@@ -1376,26 +1376,26 @@ def draw_dependencies_as_unicode_arrows(
 # }}}
 
 
-# {{{ stringify_instruction_list
+# {{{ stringify_statement_list
 
-def stringify_instruction_list(kernel):
+def stringify_statement_list(kernel):
     # {{{ topological sort
 
-    printed_insn_ids = set()
-    printed_insn_order = []
+    printed_stmt_ids = set()
+    printed_stmt_order = []
 
-    def insert_insn_into_order(insn):
-        if insn.id in printed_insn_ids:
+    def insert_stmt_into_order(stmt):
+        if stmt.id in printed_stmt_ids:
             return
-        printed_insn_ids.add(insn.id)
+        printed_stmt_ids.add(stmt.id)
 
-        for dep_id in natsorted(insn.depends_on):
-            insert_insn_into_order(kernel.id_to_insn[dep_id])
+        for dep_id in natsorted(stmt.depends_on):
+            insert_stmt_into_order(kernel.id_to_stmt[dep_id])
 
-        printed_insn_order.append(insn)
+        printed_stmt_order.append(stmt)
 
-    for insn in kernel.instructions:
-        insert_insn_into_order(insn)
+    for stmt in kernel.statements:
+        insert_stmt_into_order(stmt)
 
     # }}}
 
@@ -1406,7 +1406,7 @@ def stringify_instruction_list(kernel):
 
     uniform_arrow_length, arrows_and_extenders = \
             draw_dependencies_as_unicode_arrows(
-                    printed_insn_order, fore=Fore, style=Style)
+                    printed_stmt_order, fore=Fore, style=Style)
 
     leader = " " * uniform_arrow_length
     lines = []
@@ -1457,51 +1457,51 @@ def stringify_instruction_list(kernel):
 
         current_inames[0] = new_inames
 
-    for insn, (arrows, extender) in zip(printed_insn_order, arrows_and_extenders):
-        if isinstance(insn, lp.MultiAssignmentBase):
-            lhs = ", ".join(str(a) for a in insn.assignees)
-            rhs = str(insn.expression)
+    for stmt, (arrows, extender) in zip(printed_stmt_order, arrows_and_extenders):
+        if isinstance(stmt, lp.MultiAssignmentBase):
+            lhs = ", ".join(str(a) for a in stmt.assignees)
+            rhs = str(stmt.expression)
             trailing = []
-        elif isinstance(insn, lp.CInstruction):
-            lhs = ", ".join(str(a) for a in insn.assignees)
+        elif isinstance(stmt, lp.CStatement):
+            lhs = ", ".join(str(a) for a in stmt.assignees)
             rhs = "CODE(%s|%s)" % (
-                    ", ".join(str(x) for x in insn.read_variables),
+                    ", ".join(str(x) for x in stmt.read_variables),
                     ", ".join("%s=%s" % (name, expr)
-                        for name, expr in insn.iname_exprs))
+                        for name, expr in stmt.iname_exprs))
 
-            trailing = [l for l in insn.code.split("\n")]
-        elif isinstance(insn, lp.BarrierInstruction):
+            trailing = [l for l in stmt.code.split("\n")]
+        elif isinstance(stmt, lp.BarrierStatement):
             lhs = ""
-            rhs = "... %sbarrier" % insn.kind[0]
+            rhs = "... %sbarrier" % stmt.kind[0]
             trailing = []
 
-        elif isinstance(insn, lp.NoOpInstruction):
+        elif isinstance(stmt, lp.NoOpStatement):
             lhs = ""
             rhs = "... nop"
             trailing = []
 
         else:
-            raise LoopyError("unexpected instruction type: %s"
-                    % type(insn).__name__)
+            raise LoopyError("unexpected statement type: %s"
+                    % type(stmt).__name__)
 
-        adapt_to_new_inames_list(kernel.insn_inames(insn))
+        adapt_to_new_inames_list(kernel.stmt_inames(stmt))
 
-        options = ["id="+Fore.GREEN+insn.id+Style.RESET_ALL]
-        if insn.priority:
-            options.append("priority=%d" % insn.priority)
-        if insn.tags:
-            options.append("tags=%s" % ":".join(insn.tags))
-        if isinstance(insn, lp.Assignment) and insn.atomicity:
+        options = ["id="+Fore.GREEN+stmt.id+Style.RESET_ALL]
+        if stmt.priority:
+            options.append("priority=%d" % stmt.priority)
+        if stmt.tags:
+            options.append("tags=%s" % ":".join(stmt.tags))
+        if isinstance(stmt, lp.Assignment) and stmt.atomicity:
             options.append("atomic=%s" % ":".join(
-                str(a) for a in insn.atomicity))
-        if insn.groups:
-            options.append("groups=%s" % ":".join(insn.groups))
-        if insn.conflicts_with_groups:
+                str(a) for a in stmt.atomicity))
+        if stmt.groups:
+            options.append("groups=%s" % ":".join(stmt.groups))
+        if stmt.conflicts_with_groups:
             options.append(
-                    "conflicts=%s" % ":".join(insn.conflicts_with_groups))
-        if insn.no_sync_with:
+                    "conflicts=%s" % ":".join(stmt.conflicts_with_groups))
+        if stmt.no_sync_with:
             options.append("no_sync_with=%s" % ":".join(
-                "%s@%s" % entry for entry in sorted(insn.no_sync_with)))
+                "%s@%s" % entry for entry in sorted(stmt.no_sync_with)))
 
         if lhs:
             core = "%s = %s" % (
@@ -1513,9 +1513,9 @@ def stringify_instruction_list(kernel):
 
         options_str = "  {%s}" % ", ".join(options)
 
-        if insn.predicates:
+        if stmt.predicates:
             # FIXME: precedence
-            add_pre_line("if %s" % " and ".join([str(x) for x in insn.predicates]))
+            add_pre_line("if %s" % " and ".join([str(x) for x in stmt.predicates]))
             indent_level[0] += indent_increment
 
         add_main_line(core + options_str)
@@ -1523,7 +1523,7 @@ def stringify_instruction_list(kernel):
         for t in trailing:
             add_post_line(t)
 
-        if insn.predicates:
+        if stmt.predicates:
             indent_level[0] -= indent_increment
             add_post_line("end")
 
@@ -1540,21 +1540,21 @@ def stringify_instruction_list(kernel):
 
 @memoize_on_first_arg
 def get_global_barrier_order(kernel):
-    """Return a :class:`tuple` of the listing the ids of global barrier instructions
+    """Return a :class:`tuple` of the listing the ids of global barrier statements
     as they appear in order in the kernel.
 
-    See also :class:`loopy.instruction.BarrierInstruction`.
+    See also :class:`loopy.statement.BarrierStatement`.
     """
     barriers = []
     visiting = set()
     visited = set()
 
-    unvisited = set(insn.id for insn in kernel.instructions)
+    unvisited = set(stmt.id for stmt in kernel.statements)
 
-    def is_barrier(my_insn_id):
-        insn = kernel.id_to_insn[my_insn_id]
-        from loopy.kernel.instruction import BarrierInstruction
-        return isinstance(insn, BarrierInstruction) and insn.kind == "global"
+    def is_barrier(my_stmt_id):
+        stmt = kernel.id_to_stmt[my_stmt_id]
+        from loopy.kernel.statement import BarrierStatement
+        return isinstance(stmt, BarrierStatement) and stmt.kind == "global"
 
     while unvisited:
         stack = [unvisited.pop()]
@@ -1574,7 +1574,7 @@ def get_global_barrier_order(kernel):
             visited.add(top)
             visiting.add(top)
 
-            for child in kernel.id_to_insn[top].depends_on:
+            for child in kernel.id_to_stmt[top].depends_on:
                 # Check for no cycles.
                 assert child not in visiting
                 stack.append(child)
@@ -1610,7 +1610,7 @@ def get_global_barrier_order(kernel):
                 visiting.clear()
                 break
 
-            for child in kernel.id_to_insn[top].depends_on:
+            for child in kernel.id_to_stmt[top].depends_on:
                 stack.append(child)
         else:
             # Search exhausted and we did not find prev_barrier.
@@ -1625,10 +1625,10 @@ def get_global_barrier_order(kernel):
 # {{{ find most recent global barrier
 
 @memoize_on_first_arg
-def find_most_recent_global_barrier(kernel, insn_id):
+def find_most_recent_global_barrier(kernel, stmt_id):
     """Return the id of the latest occuring global barrier which the
-    given instruction (indirectly or directly) depends on, or *None* if this
-    instruction does not depend on a global barrier.
+    given statement (indirectly or directly) depends on, or *None* if this
+    statement does not depend on a global barrier.
 
     The return value is guaranteed to be unique because global barriers are
     totally ordered within the kernel.
@@ -1639,15 +1639,15 @@ def find_most_recent_global_barrier(kernel, insn_id):
     if len(global_barrier_order) == 0:
         return None
 
-    insn = kernel.id_to_insn[insn_id]
+    stmt = kernel.id_to_stmt[stmt_id]
 
-    if len(insn.depends_on) == 0:
+    if len(stmt.depends_on) == 0:
         return None
 
-    def is_barrier(my_insn_id):
-        insn = kernel.id_to_insn[my_insn_id]
-        from loopy.kernel.instruction import BarrierInstruction
-        return isinstance(insn, BarrierInstruction) and insn.kind == "global"
+    def is_barrier(my_stmt_id):
+        stmt = kernel.id_to_stmt[my_stmt_id]
+        from loopy.kernel.statement import BarrierStatement
+        return isinstance(stmt, BarrierStatement) and stmt.kind == "global"
 
     global_barrier_to_ordinal = dict(
             (b, i) for i, b in enumerate(global_barrier_order))
@@ -1658,13 +1658,13 @@ def find_most_recent_global_barrier(kernel, insn_id):
                 else -1)
 
     direct_barrier_dependencies = set(
-            dep for dep in insn.depends_on if is_barrier(dep))
+            dep for dep in stmt.depends_on if is_barrier(dep))
 
     if len(direct_barrier_dependencies) > 0:
         return max(direct_barrier_dependencies, key=get_barrier_ordinal)
     else:
         return max((find_most_recent_global_barrier(kernel, dep)
-                    for dep in insn.depends_on),
+                    for dep in stmt.depends_on),
                 key=get_barrier_ordinal)
 
 # }}}
@@ -1691,9 +1691,9 @@ def get_subkernels(kernel):
 
 
 @memoize_on_first_arg
-def get_subkernel_to_insn_id_map(kernel):
+def get_subkernel_to_stmt_id_map(kernel):
     """Return a :class:`dict` mapping subkernel names to a :class:`frozenset`
-    consisting of the instruction ids scheduled within the subkernel. The
+    consisting of the statement ids scheduled within the subkernel. The
     kernel must be scheduled.
     """
     from loopy.kernel import kernel_state
@@ -1701,7 +1701,7 @@ def get_subkernel_to_insn_id_map(kernel):
         raise LoopyError("Kernel must be scheduled")
 
     from loopy.schedule import (
-            sched_item_to_insn_id, CallKernel, ReturnFromKernel)
+            sched_item_to_stmt_id, CallKernel, ReturnFromKernel)
 
     subkernel = None
     result = {}
@@ -1715,8 +1715,8 @@ def get_subkernel_to_insn_id_map(kernel):
             subkernel = None
 
         if subkernel is not None:
-            for insn_id in sched_item_to_insn_id(sched_item):
-                result[subkernel].add(insn_id)
+            for stmt_id in sched_item_to_stmt_id(sched_item):
+                result[subkernel].add(stmt_id)
 
     for subkernel in result:
         result[subkernel] = frozenset(result[subkernel])
diff --git a/loopy/loop.py b/loopy/loop.py
index 459246382..6daef3ac7 100644
--- a/loopy/loop.py
+++ b/loopy/loop.py
@@ -37,7 +37,7 @@ def potential_loop_nest_map(kernel):
     result = {}
 
     all_inames = kernel.all_inames()
-    iname_to_insns = kernel.iname_to_insns()
+    iname_to_stmts = kernel.iname_to_stmts()
 
     # examine pairs of all inames--O(n**2), I know.
     for inner_iname in all_inames:
@@ -46,7 +46,7 @@ def potential_loop_nest_map(kernel):
             if inner_iname == outer_iname:
                 continue
 
-            if iname_to_insns[inner_iname] <= iname_to_insns[outer_iname]:
+            if iname_to_stmts[inner_iname] <= iname_to_stmts[outer_iname]:
                 inner_result.add(outer_iname)
 
         if inner_result:
diff --git a/loopy/match.py b/loopy/match.py
index ab0038af8..434eced40 100644
--- a/loopy/match.py
+++ b/loopy/match.py
@@ -1,4 +1,4 @@
-"""Matching functionality for instruction ids and subsitution
+"""Matching functionality for statement ids and subsitution
 rule invocations stacks."""
 
 from __future__ import division, absolute_import
@@ -471,11 +471,11 @@ class StackMatch(object):
     def __ne__(self, other):
         return not self.__eq__(other)
 
-    def __call__(self, kernel, insn, rule_stack):
+    def __call__(self, kernel, stmt, rule_stack):
         """
         :arg rule_stack: a tuple of (name, tags) rule invocation, outermost first
         """
-        stack_of_matchables = [insn]
+        stack_of_matchables = [stmt]
         for id, tags in rule_stack:
             stack_of_matchables.append(RuleInvocationMatchable(id, tags))
 
@@ -490,8 +490,8 @@ def parse_stack_match(smatch):
     """Syntax example::
 
         ... > outer > ... > next > innermost $
-        insn > next
-        insn > ... > next > innermost $
+        stmt > next
+        stmt > ... > next > innermost $
 
     ``...`` matches an arbitrary number of intervening stack levels.
 
diff --git a/loopy/maxima.py b/loopy/maxima.py
index 22d0c085c..c9a9a42ed 100644
--- a/loopy/maxima.py
+++ b/loopy/maxima.py
@@ -42,7 +42,7 @@ class MaximaStringifyMapper(MaximaStringifyMapperBase):
         return res
 
 
-def get_loopy_instructions_as_maxima(kernel, prefix):
+def get_loopy_statements_as_maxima(kernel, prefix):
     """Sample use for code comparison::
 
         load("knl-optFalse.mac");
@@ -60,8 +60,8 @@ def get_loopy_instructions_as_maxima(kernel, prefix):
 
     my_variable_names = (
             avn
-            for insn in kernel.instructions
-            for avn in insn.assignee_var_names()
+            for stmt in kernel.statements
+            for avn in stmt.assignee_var_names()
             )
 
     from pymbolic import var
@@ -75,30 +75,30 @@ def get_loopy_instructions_as_maxima(kernel, prefix):
 
     result = ["ratprint:false;"]
 
-    written_insn_ids = set()
+    written_stmt_ids = set()
 
-    from loopy.kernel import InstructionBase, Assignment
+    from loopy.kernel import StatementBase, Assignment
 
-    def write_insn(insn):
-        if not isinstance(insn, InstructionBase):
-            insn = kernel.id_to_insn[insn]
-        if not isinstance(insn, Assignment):
+    def write_stmt(stmt):
+        if not isinstance(stmt, StatementBase):
+            stmt = kernel.id_to_stmt[stmt]
+        if not isinstance(stmt, Assignment):
             raise RuntimeError("non-single-output assignment not supported "
                     "in maxima export")
 
-        for dep in insn.depends_on:
-            if dep not in written_insn_ids:
-                write_insn(dep)
+        for dep in stmt.depends_on:
+            if dep not in written_stmt_ids:
+                write_stmt(dep)
 
-        aname, = insn.assignee_var_names()
+        aname, = stmt.assignee_var_names()
         result.append("%s%s : %s;" % (
             prefix, aname,
-            mstr(substitute(insn.expression))))
+            mstr(substitute(stmt.expression))))
 
-        written_insn_ids.add(insn.id)
+        written_stmt_ids.add(stmt.id)
 
-    for insn in kernel.instructions:
-        if insn.id not in written_insn_ids:
-            write_insn(insn)
+    for stmt in kernel.statements:
+        if stmt.id not in written_stmt_ids:
+            write_stmt(stmt)
 
     return "\n".join(result)
diff --git a/loopy/options.py b/loopy/options.py
index 25bb7014c..451da8b7a 100644
--- a/loopy/options.py
+++ b/loopy/options.py
@@ -82,7 +82,7 @@ class Options(ImmutableRecord):
     .. attribute:: trace_assignments
 
         Generate code that uses *printf* in kernels to trace the
-        execution of assignment instructions.
+        execution of assignment statements.
 
     .. attribute:: trace_assignment_values
 
@@ -98,7 +98,7 @@ class Options(ImmutableRecord):
     .. attribute:: check_dep_resolution
 
         Whether loopy should issue an error if a dependency
-        expression does not match any instructions in the kernel.
+        expression does not match any statements in the kernel.
 
     .. rubric:: Invocation-related options
 
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index ae70a0d6c..35f405aa2 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -78,16 +78,16 @@ def prepare_for_caching(kernel):
 
 def check_for_writes_to_predicates(kernel):
     from loopy.symbolic import get_dependencies
-    for insn in kernel.instructions:
+    for stmt in kernel.statements:
         pred_vars = (
                 frozenset.union(
-                    *(get_dependencies(pred) for pred in insn.predicates))
-                if insn.predicates else frozenset())
-        written_pred_vars = frozenset(insn.assignee_var_names()) & pred_vars
+                    *(get_dependencies(pred) for pred in stmt.predicates))
+                if stmt.predicates else frozenset())
+        written_pred_vars = frozenset(stmt.assignee_var_names()) & pred_vars
         if written_pred_vars:
-            raise LoopyError("In instruction '%s': may not write to "
-                    "variable(s) '%s' involved in the instruction's predicates"
-                    % (insn.id, ", ".join(written_pred_vars)))
+            raise LoopyError("In statement '%s': may not write to "
+                    "variable(s) '%s' involved in the statement's predicates"
+                    % (stmt.id, ", ".join(written_pred_vars)))
 
 # }}}
 
@@ -113,8 +113,8 @@ def check_reduction_iname_uniqueness(kernel):
     from loopy.symbolic import ReductionCallbackMapper
     cb_mapper = ReductionCallbackMapper(map_reduction)
 
-    for insn in kernel.instructions:
-        insn.with_transformed_expressions(cb_mapper)
+    for stmt in kernel.statements:
+        stmt.with_transformed_expressions(cb_mapper)
 
     for iname, count in six.iteritems(iname_to_reduction_count):
         nonsimul_count = iname_to_nonsimultaneous_reduction_count.get(iname, 0)
@@ -134,17 +134,17 @@ def check_reduction_iname_uniqueness(kernel):
 
 # {{{ decide temporary scope
 
-def _get_compute_inames_tagged(kernel, insn, tag_base):
+def _get_compute_inames_tagged(kernel, stmt, tag_base):
     return set(iname
-            for iname in kernel.insn_inames(insn.id)
+            for iname in kernel.stmt_inames(stmt.id)
             if isinstance(kernel.iname_to_tag.get(iname), tag_base))
 
 
-def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names):
+def _get_assignee_inames_tagged(kernel, stmt, tag_base, tv_names):
     return set(iname
             for aname, adeps in zip(
-                insn.assignee_var_names(),
-                insn.assignee_subscript_deps())
+                stmt.assignee_var_names(),
+                stmt.assignee_subscript_deps())
             for iname in adeps & kernel.all_inames()
             if aname in tv_names
             if isinstance(kernel.iname_to_tag.get(iname), tag_base))
@@ -190,28 +190,28 @@ def find_temporary_scope(kernel):
             for alias in base_storage_to_aliases.get(temp_var.base_storage, []):
                 my_writers = my_writers | writers.get(alias, frozenset())
 
-        desired_scope_per_insn = []
-        for insn_id in my_writers:
-            insn = kernel.id_to_insn[insn_id]
+        desired_scope_per_stmt = []
+        for stmt_id in my_writers:
+            stmt = kernel.id_to_stmt[stmt_id]
 
             # A write race will emerge if:
             #
             # - the variable is local
             #   and
-            # - the instruction is run across more inames (locally) parallel
+            # - the statement is run across more inames (locally) parallel
             #   than are reflected in the assignee indices.
 
             locparallel_compute_inames = _get_compute_inames_tagged(
-                    kernel, insn, LocalIndexTagBase)
+                    kernel, stmt, LocalIndexTagBase)
 
             locparallel_assignee_inames = _get_assignee_inames_tagged(
-                    kernel, insn, LocalIndexTagBase, tv_names)
+                    kernel, stmt, LocalIndexTagBase, tv_names)
 
             grpparallel_compute_inames = _get_compute_inames_tagged(
-                    kernel, insn, GroupIndexTag)
+                    kernel, stmt, GroupIndexTag)
 
             grpparallel_assignee_inames = _get_assignee_inames_tagged(
-                    kernel, insn, GroupIndexTag, temp_var.name)
+                    kernel, stmt, GroupIndexTag, temp_var.name)
 
             assert locparallel_assignee_inames <= locparallel_compute_inames
             assert grpparallel_assignee_inames <= grpparallel_compute_inames
@@ -227,13 +227,13 @@ def find_temporary_scope(kernel):
                 if (apin != cpin and bool(apin)):
                     warn_with_kernel(
                             kernel,
-                            "write_race_%s(%s)" % (scope_descr, insn_id),
-                            "instruction '%s' looks invalid: "
+                            "write_race_%s(%s)" % (scope_descr, stmt_id),
+                            "statement '%s' looks invalid: "
                             "it assigns to indices based on %s IDs, but "
                             "its temporary '%s' cannot be made %s because "
                             "a write race across the iname(s) '%s' would emerge. "
                             "(Do you need to add an extra iname to your prefetch?)"
-                            % (insn_id, iname_descr, temp_var.name, scope_descr,
+                            % (stmt_id, iname_descr, temp_var.name, scope_descr,
                                 ", ".join(cpin - apin)),
                             WriteRaceConditionWarning)
 
@@ -244,9 +244,9 @@ def find_temporary_scope(kernel):
                         and bool(cpin)):
                     desired_scope = max(desired_scope, scope)
 
-            desired_scope_per_insn.append(desired_scope)
+            desired_scope_per_stmt.append(desired_scope)
 
-        if not desired_scope_per_insn:
+        if not desired_scope_per_stmt:
             if temp_var.initializer is None:
                 warn_with_kernel(kernel, "temp_to_write(%s)" % temp_var.name,
                         "temporary variable '%s' never written, eliminating"
@@ -258,11 +258,11 @@ def find_temporary_scope(kernel):
 
             continue
 
-        overall_scope = max(desired_scope_per_insn)
+        overall_scope = max(desired_scope_per_stmt)
 
         from pytools import all
-        if not all(iscope == overall_scope for iscope in desired_scope_per_insn):
-            raise LoopyError("not all instructions agree on the "
+        if not all(iscope == overall_scope for iscope in desired_scope_per_stmt):
+            raise LoopyError("not all statements agree on the "
                     "the desired scope (private/local/global) of  the "
                     "temporary '%s'" % temp_var.name)
 
@@ -712,59 +712,59 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel):
     be the case by introducing temporary assignments into the kernel.
     """
 
-    insn_id_gen = kernel.get_instruction_id_generator()
+    stmt_id_gen = kernel.get_statement_id_generator()
     var_name_gen = kernel.get_var_name_generator()
 
-    new_or_updated_instructions = {}
+    new_or_updated_statements = {}
     new_temporaries = {}
 
     dep_map = dict(
-            (insn.id, insn.depends_on) for insn in kernel.instructions)
+            (stmt.id, stmt.depends_on) for stmt in kernel.statements)
 
-    inverse_dep_map = dict((insn.id, set()) for insn in kernel.instructions)
+    inverse_dep_map = dict((stmt.id, set()) for stmt in kernel.statements)
 
     import six
-    for insn_id, deps in six.iteritems(dep_map):
+    for stmt_id, deps in six.iteritems(dep_map):
         for dep in deps:
-            inverse_dep_map[dep].add(insn_id)
+            inverse_dep_map[dep].add(stmt_id)
 
     del dep_map
 
     # {{{ utils
 
-    def _add_to_no_sync_with(insn_id, new_no_sync_with_params):
-        insn = kernel.id_to_insn.get(insn_id)
-        insn = new_or_updated_instructions.get(insn_id, insn)
-        new_or_updated_instructions[insn_id] = (
-                insn.copy(
+    def _add_to_no_sync_with(stmt_id, new_no_sync_with_params):
+        stmt = kernel.id_to_stmt.get(stmt_id)
+        stmt = new_or_updated_statements.get(stmt_id, stmt)
+        new_or_updated_statements[stmt_id] = (
+                stmt.copy(
                     no_sync_with=(
-                        insn.no_sync_with | frozenset(new_no_sync_with_params))))
+                        stmt.no_sync_with | frozenset(new_no_sync_with_params))))
 
-    def _add_to_depends_on(insn_id, new_depends_on_params):
-        insn = kernel.id_to_insn.get(insn_id)
-        insn = new_or_updated_instructions.get(insn_id, insn)
-        new_or_updated_instructions[insn_id] = (
-                insn.copy(
-                    depends_on=insn.depends_on | frozenset(new_depends_on_params)))
+    def _add_to_depends_on(stmt_id, new_depends_on_params):
+        stmt = kernel.id_to_stmt.get(stmt_id)
+        stmt = new_or_updated_statements.get(stmt_id, stmt)
+        new_or_updated_statements[stmt_id] = (
+                stmt.copy(
+                    depends_on=stmt.depends_on | frozenset(new_depends_on_params)))
 
     # }}}
 
-    from loopy.kernel.instruction import CallInstruction
-    for insn in kernel.instructions:
-        if not isinstance(insn, CallInstruction):
+    from loopy.kernel.statement import CallStatement
+    for stmt in kernel.statements:
+        if not isinstance(stmt, CallStatement):
             continue
 
-        if len(insn.assignees) <= 1:
+        if len(stmt.assignees) <= 1:
             continue
 
-        assignees = insn.assignees
-        assignee_var_names = insn.assignee_var_names()
+        assignees = stmt.assignees
+        assignee_var_names = stmt.assignee_var_names()
 
         new_assignees = [assignees[0]]
         newly_added_assignments_ids = set()
         needs_replacement = False
 
-        last_added_insn_id = insn.id
+        last_added_stmt_id = stmt.id
 
         from loopy.kernel.data import temp_var_scope, TemporaryVariable
 
@@ -785,15 +785,15 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel):
 
             needs_replacement = True
 
-            # {{{ generate a new assignent instruction
+            # {{{ generate a new assignent statement
 
             new_assignee_name = var_name_gen(
-                    "{insn_id}_retval_{assignee_nr}"
-                    .format(insn_id=insn.id, assignee_nr=assignee_nr))
+                    "{stmt_id}_retval_{assignee_nr}"
+                    .format(stmt_id=stmt.id, assignee_nr=assignee_nr))
 
-            new_assignment_id = insn_id_gen(
-                    "{insn_id}_assign_retval_{assignee_nr}"
-                    .format(insn_id=insn.id, assignee_nr=assignee_nr))
+            new_assignment_id = stmt_id_gen(
+                    "{stmt_id}_assign_retval_{assignee_nr}"
+                    .format(stmt_id=stmt.id, assignee_nr=assignee_nr))
 
             newly_added_assignments_ids.add(new_assignment_id)
 
@@ -808,55 +808,55 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel):
             new_assignee = var(new_assignee_name)
             new_assignees.append(new_assignee)
 
-            new_or_updated_instructions[new_assignment_id] = (
+            new_or_updated_statements[new_assignment_id] = (
                     make_assignment(
                         assignees=(assignee,),
                         expression=new_assignee,
                         id=new_assignment_id,
-                        depends_on=frozenset([last_added_insn_id]),
+                        depends_on=frozenset([last_added_stmt_id]),
                         depends_on_is_final=True,
                         no_sync_with=(
-                            insn.no_sync_with | frozenset([(insn.id, "any")])),
-                        predicates=insn.predicates,
-                        within_inames=insn.within_inames))
+                            stmt.no_sync_with | frozenset([(stmt.id, "any")])),
+                        predicates=stmt.predicates,
+                        within_inames=stmt.within_inames))
 
-            last_added_insn_id = new_assignment_id
+            last_added_stmt_id = new_assignment_id
 
             # }}}
 
         if not needs_replacement:
             continue
 
-        # {{{ update originating instruction
+        # {{{ update originating statement
 
-        orig_insn = new_or_updated_instructions.get(insn.id, insn)
+        orig_stmt = new_or_updated_statements.get(stmt.id, stmt)
 
-        new_or_updated_instructions[insn.id] = (
-                orig_insn.copy(assignees=tuple(new_assignees)))
+        new_or_updated_statements[stmt.id] = (
+                orig_stmt.copy(assignees=tuple(new_assignees)))
 
-        _add_to_no_sync_with(insn.id,
+        _add_to_no_sync_with(stmt.id,
                 [(id, "any") for id in newly_added_assignments_ids])
 
         # }}}
 
         # {{{ squash spurious memory dependencies amongst new assignments
 
-        for new_insn_id in newly_added_assignments_ids:
-            _add_to_no_sync_with(new_insn_id,
+        for new_stmt_id in newly_added_assignments_ids:
+            _add_to_no_sync_with(new_stmt_id,
                     [(id, "any")
                      for id in newly_added_assignments_ids
-                     if id != new_insn_id])
+                     if id != new_stmt_id])
 
         # }}}
 
-        # {{{ update instructions that depend on the originating instruction
+        # {{{ update statements that depend on the originating statement
 
-        for inverse_dep in inverse_dep_map[insn.id]:
+        for inverse_dep in inverse_dep_map[stmt.id]:
             _add_to_depends_on(inverse_dep, newly_added_assignments_ids)
 
-            for insn_id, scope in (
-                    new_or_updated_instructions[inverse_dep].no_sync_with):
-                if insn_id == insn.id:
+            for stmt_id, scope in (
+                    new_or_updated_statements[inverse_dep].no_sync_with):
+                if stmt_id == stmt.id:
                     _add_to_no_sync_with(
                             inverse_dep,
                             [(id, scope) for id in newly_added_assignments_ids])
@@ -866,14 +866,14 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel):
     new_temporary_variables = kernel.temporary_variables.copy()
     new_temporary_variables.update(new_temporaries)
 
-    new_instructions = (
-            list(new_or_updated_instructions.values())
-            + list(insn
-                for insn in kernel.instructions
-                if insn.id not in new_or_updated_instructions))
+    new_statements = (
+            list(new_or_updated_statements.values())
+            + list(stmt
+                for stmt in kernel.statements
+                if stmt.id not in new_or_updated_statements))
 
     return kernel.copy(temporary_variables=new_temporary_variables,
-                       instructions=new_instructions)
+                       statements=new_statements)
 
 
 def _insert_subdomain_into_domain_tree(kernel, domains, subdomain):
@@ -888,18 +888,18 @@ def _insert_subdomain_into_domain_tree(kernel, domains, subdomain):
 # }}}
 
 
-def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
+def realize_reduction(kernel, stmt_id_filter=None, unknown_types_ok=True,
                       automagic_scans_ok=False, force_scan=False,
                       force_outer_iname_for_scan=None):
-    """Rewrites reductions into their imperative form. With *insn_id_filter*
-    specified, operate only on the instruction with an instruction id matching
-    *insn_id_filter*.
+    """Rewrites reductions into their imperative form. With *stmt_id_filter*
+    specified, operate only on the statement with an statement id matching
+    *stmt_id_filter*.
 
-    If *insn_id_filter* is given, only the outermost level of reductions will be
+    If *stmt_id_filter* is given, only the outermost level of reductions will be
     expanded, inner reductions will be left alone (because they end up in a new
-    instruction with a different ID, which doesn't match the filter).
+    statement with a different ID, which doesn't match the filter).
 
-    If *insn_id_filter* is not given, all reductions in all instructions will
+    If *stmt_id_filter* is not given, all reductions in all statements will
     be realized.
 
     If *automagic_scans_ok*, this function will attempt to rewrite triangular
@@ -907,7 +907,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
 
     If *force_scan* is *True*, this function will attempt to rewrite *all*
     candidate reductions as scans and raise an error if this is not possible
-    (this is most useful combined with *insn_id_filter*).
+    (this is most useful combined with *stmt_id_filter*).
 
     If *force_outer_iname_for_scan* is not *None*, this function will attempt
     to realize candidate reductions as scans using the specified iname as the
@@ -916,10 +916,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
 
     logger.debug("%s: realize reduction" % kernel.name)
 
-    new_insns = []
+    new_stmts = []
     new_iname_tags = {}
 
-    insn_id_gen = kernel.get_instruction_id_generator()
+    stmt_id_gen = kernel.get_statement_id_generator()
 
     var_name_gen = kernel.get_var_name_generator()
     new_temporary_variables = kernel.temporary_variables.copy()
@@ -935,8 +935,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
             return val
 
     def preprocess_scan_arguments(
-                insn, expr, nresults, scan_iname, track_iname,
-                newly_generated_insn_id_set):
+                stmt, expr, nresults, scan_iname, track_iname,
+                newly_generated_stmt_id_set):
         """Does iname substitution within scan arguments and returns a set of values
         suitable to be passed to the binary op. Returns a tuple."""
 
@@ -947,20 +947,20 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
             # the arguments in order to pass them to the binary op - so we expand
             # items that are not "plain" tuples here.
             if not isinstance(inner_expr, tuple):
-                get_args_insn_id = insn_id_gen(
-                        "%s_%s_get" % (insn.id, "_".join(expr.inames)))
+                get_args_stmt_id = stmt_id_gen(
+                        "%s_%s_get" % (stmt.id, "_".join(expr.inames)))
 
                 inner_expr = expand_inner_reduction(
-                        id=get_args_insn_id,
+                        id=get_args_stmt_id,
                         expr=inner_expr,
                         nresults=nresults,
-                        depends_on=insn.depends_on,
-                        within_inames=insn.within_inames | expr.inames,
-                        within_inames_is_final=insn.within_inames_is_final,
-                        predicates=insn.predicates,
+                        depends_on=stmt.depends_on,
+                        within_inames=stmt.within_inames | expr.inames,
+                        within_inames_is_final=stmt.within_inames_is_final,
+                        predicates=stmt.predicates,
                         )
 
-                newly_generated_insn_id_set.add(get_args_insn_id)
+                newly_generated_stmt_id_set.add(get_args_stmt_id)
 
             updated_inner_exprs = tuple(
                     replace_var_within_expr(sub_expr, scan_iname, track_iname)
@@ -993,7 +993,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
         from pymbolic import var
         temp_vars = tuple(var(n) for n in temp_var_names)
 
-        call_insn = make_assignment(
+        call_stmt = make_assignment(
                 id=id,
                 assignees=temp_vars,
                 expression=expr,
@@ -1002,7 +1002,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                 within_inames_is_final=within_inames_is_final,
                 predicates=predicates)
 
-        generated_insns.append(call_insn)
+        generated_stmts.append(call_stmt)
 
         return temp_vars
 
@@ -1012,7 +1012,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
 
     def map_reduction_seq(expr, rec, nresults, arg_dtypes,
             reduction_dtypes):
-        outer_insn_inames = temp_kernel.insn_inames(insn)
+        outer_stmt_inames = temp_kernel.stmt_inames(stmt)
 
         from loopy.kernel.data import temp_var_scope
         acc_var_names = make_temporaries(
@@ -1022,75 +1022,75 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                 dtypes=reduction_dtypes,
                 scope=temp_var_scope.PRIVATE)
 
-        init_insn_depends_on = frozenset()
+        init_stmt_depends_on = frozenset()
 
-        global_barrier = lp.find_most_recent_global_barrier(temp_kernel, insn.id)
+        global_barrier = lp.find_most_recent_global_barrier(temp_kernel, stmt.id)
 
         if global_barrier is not None:
-            init_insn_depends_on |= frozenset([global_barrier])
+            init_stmt_depends_on |= frozenset([global_barrier])
 
         from pymbolic import var
         acc_vars = tuple(var(n) for n in acc_var_names)
 
-        init_id = insn_id_gen(
-                "%s_%s_init" % (insn.id, "_".join(expr.inames)))
+        init_id = stmt_id_gen(
+                "%s_%s_init" % (stmt.id, "_".join(expr.inames)))
 
-        init_insn = make_assignment(
+        init_stmt = make_assignment(
                 id=init_id,
                 assignees=acc_vars,
-                within_inames=outer_insn_inames - frozenset(expr.inames),
-                within_inames_is_final=insn.within_inames_is_final,
-                depends_on=init_insn_depends_on,
+                within_inames=outer_stmt_inames - frozenset(expr.inames),
+                within_inames_is_final=stmt.within_inames_is_final,
+                depends_on=init_stmt_depends_on,
                 expression=expr.operation.neutral_element(*arg_dtypes),
-                predicates=insn.predicates,)
+                predicates=stmt.predicates,)
 
-        generated_insns.append(init_insn)
+        generated_stmts.append(init_stmt)
 
-        update_id = insn_id_gen(
-                based_on="%s_%s_update" % (insn.id, "_".join(expr.inames)))
+        update_id = stmt_id_gen(
+                based_on="%s_%s_update" % (stmt.id, "_".join(expr.inames)))
 
-        update_insn_iname_deps = temp_kernel.insn_inames(insn) | set(expr.inames)
-        if insn.within_inames_is_final:
-            update_insn_iname_deps = insn.within_inames | set(expr.inames)
+        update_stmt_iname_deps = temp_kernel.stmt_inames(stmt) | set(expr.inames)
+        if stmt.within_inames_is_final:
+            update_stmt_iname_deps = stmt.within_inames | set(expr.inames)
 
-        reduction_insn_depends_on = set([init_id])
+        reduction_stmt_depends_on = set([init_id])
 
         # In the case of a multi-argument reduction, we need a name for each of
         # the arguments in order to pass them to the binary op - so we expand
         # items that are not "plain" tuples here.
         if nresults > 1 and not isinstance(expr.expr, tuple):
-            get_args_insn_id = insn_id_gen(
-                    "%s_%s_get" % (insn.id, "_".join(expr.inames)))
+            get_args_stmt_id = stmt_id_gen(
+                    "%s_%s_get" % (stmt.id, "_".join(expr.inames)))
 
             reduction_expr = expand_inner_reduction(
-                    id=get_args_insn_id,
+                    id=get_args_stmt_id,
                     expr=expr.expr,
                     nresults=nresults,
-                    depends_on=insn.depends_on,
-                    within_inames=update_insn_iname_deps,
-                    within_inames_is_final=insn.within_inames_is_final,
-                    predicates=insn.predicates,
+                    depends_on=stmt.depends_on,
+                    within_inames=update_stmt_iname_deps,
+                    within_inames_is_final=stmt.within_inames_is_final,
+                    predicates=stmt.predicates,
                     )
 
-            reduction_insn_depends_on.add(get_args_insn_id)
+            reduction_stmt_depends_on.add(get_args_stmt_id)
         else:
             reduction_expr = expr.expr
 
-        reduction_insn = make_assignment(
+        reduction_stmt = make_assignment(
                 id=update_id,
                 assignees=acc_vars,
                 expression=expr.operation(
                     arg_dtypes,
                     _strip_if_scalar(acc_vars, acc_vars),
                     reduction_expr),
-                depends_on=frozenset(reduction_insn_depends_on) | insn.depends_on,
-                within_inames=update_insn_iname_deps,
-                within_inames_is_final=insn.within_inames_is_final,
-                predicates=insn.predicates,)
+                depends_on=frozenset(reduction_stmt_depends_on) | stmt.depends_on,
+                within_inames=update_stmt_iname_deps,
+                within_inames_is_final=stmt.within_inames_is_final,
+                predicates=stmt.predicates,)
 
-        generated_insns.append(reduction_insn)
+        generated_stmts.append(reduction_stmt)
 
-        new_insn_add_depends_on.add(reduction_insn.id)
+        new_stmt_add_depends_on.add(reduction_stmt.id)
 
         if nresults == 1:
             assert len(acc_vars) == 1
@@ -1134,12 +1134,12 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
 
         size = _get_int_iname_size(red_iname)
 
-        outer_insn_inames = temp_kernel.insn_inames(insn)
+        outer_stmt_inames = temp_kernel.stmt_inames(stmt)
 
         from loopy.kernel.data import LocalIndexTagBase
         outer_local_inames = tuple(
                 oiname
-                for oiname in outer_insn_inames
+                for oiname in outer_stmt_inames
                 if isinstance(
                     kernel.iname_to_tag.get(oiname),
                     LocalIndexTagBase))
@@ -1181,34 +1181,34 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
 
         # }}}
 
-        base_iname_deps = outer_insn_inames - frozenset(expr.inames)
+        base_iname_deps = outer_stmt_inames - frozenset(expr.inames)
 
         neutral = expr.operation.neutral_element(*arg_dtypes)
-        init_id = insn_id_gen("%s_%s_init" % (insn.id, red_iname))
-        init_insn = make_assignment(
+        init_id = stmt_id_gen("%s_%s_init" % (stmt.id, red_iname))
+        init_stmt = make_assignment(
                 id=init_id,
                 assignees=tuple(
                     acc_var[outer_local_iname_vars + (var(base_exec_iname),)]
                     for acc_var in acc_vars),
                 expression=neutral,
                 within_inames=base_iname_deps | frozenset([base_exec_iname]),
-                within_inames_is_final=insn.within_inames_is_final,
+                within_inames_is_final=stmt.within_inames_is_final,
                 depends_on=frozenset(),
-                predicates=insn.predicates,
+                predicates=stmt.predicates,
                 )
-        generated_insns.append(init_insn)
+        generated_stmts.append(init_stmt)
 
-        init_neutral_id = insn_id_gen("%s_%s_init_neutral" % (insn.id, red_iname))
-        init_neutral_insn = make_assignment(
+        init_neutral_id = stmt_id_gen("%s_%s_init_neutral" % (stmt.id, red_iname))
+        init_neutral_stmt = make_assignment(
                 id=init_neutral_id,
                 assignees=tuple(var(nvn) for nvn in neutral_var_names),
                 expression=neutral,
                 within_inames=base_iname_deps | frozenset([base_exec_iname]),
-                within_inames_is_final=insn.within_inames_is_final,
+                within_inames_is_final=stmt.within_inames_is_final,
                 depends_on=frozenset(),
-                predicates=insn.predicates,
+                predicates=stmt.predicates,
                 )
-        generated_insns.append(init_neutral_insn)
+        generated_stmts.append(init_neutral_stmt)
 
         transfer_depends_on = set([init_neutral_id, init_id])
 
@@ -1216,27 +1216,27 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
         # the arguments in order to pass them to the binary op - so we expand
         # items that are not "plain" tuples here.
         if nresults > 1 and not isinstance(expr.expr, tuple):
-            get_args_insn_id = insn_id_gen(
-                    "%s_%s_get" % (insn.id, red_iname))
+            get_args_stmt_id = stmt_id_gen(
+                    "%s_%s_get" % (stmt.id, red_iname))
 
             reduction_expr = expand_inner_reduction(
-                    id=get_args_insn_id,
+                    id=get_args_stmt_id,
                     expr=expr.expr,
                     nresults=nresults,
-                    depends_on=insn.depends_on,
+                    depends_on=stmt.depends_on,
                     within_inames=(
-                        (outer_insn_inames - frozenset(expr.inames))
+                        (outer_stmt_inames - frozenset(expr.inames))
                         | frozenset([red_iname])),
-                    within_inames_is_final=insn.within_inames_is_final,
-                    predicates=insn.predicates,
+                    within_inames_is_final=stmt.within_inames_is_final,
+                    predicates=stmt.predicates,
                     )
 
-            transfer_depends_on.add(get_args_insn_id)
+            transfer_depends_on.add(get_args_stmt_id)
         else:
             reduction_expr = expr.expr
 
-        transfer_id = insn_id_gen("%s_%s_transfer" % (insn.id, red_iname))
-        transfer_insn = make_assignment(
+        transfer_id = stmt_id_gen("%s_%s_transfer" % (stmt.id, red_iname))
+        transfer_stmt = make_assignment(
                 id=transfer_id,
                 assignees=tuple(
                     acc_var[outer_local_iname_vars + (var(red_iname),)]
@@ -1248,14 +1248,14 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                         tuple(var(nvn) for nvn in neutral_var_names)),
                     reduction_expr),
                 within_inames=(
-                    (outer_insn_inames - frozenset(expr.inames))
+                    (outer_stmt_inames - frozenset(expr.inames))
                     | frozenset([red_iname])),
-                within_inames_is_final=insn.within_inames_is_final,
-                depends_on=frozenset([init_id, init_neutral_id]) | insn.depends_on,
+                within_inames_is_final=stmt.within_inames_is_final,
+                depends_on=frozenset([init_id, init_neutral_id]) | stmt.depends_on,
                 no_sync_with=frozenset([(init_id, "any")]),
-                predicates=insn.predicates,
+                predicates=stmt.predicates,
                 )
-        generated_insns.append(transfer_insn)
+        generated_stmts.append(transfer_stmt)
 
         cur_size = 1
         while cur_size < size:
@@ -1274,8 +1274,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
             domains.append(_make_slab_set(stage_exec_iname, bound-new_size))
             new_iname_tags[stage_exec_iname] = kernel.iname_to_tag[red_iname]
 
-            stage_id = insn_id_gen("red_%s_stage_%d" % (red_iname, istage))
-            stage_insn = make_assignment(
+            stage_id = stmt_id_gen("red_%s_stage_%d" % (red_iname, istage))
+            stage_stmt = make_assignment(
                     id=stage_id,
                     assignees=tuple(
                         acc_var[outer_local_iname_vars + (var(stage_exec_iname),)]
@@ -1293,21 +1293,21 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                             for acc_var in acc_vars))),
                     within_inames=(
                         base_iname_deps | frozenset([stage_exec_iname])),
-                    within_inames_is_final=insn.within_inames_is_final,
+                    within_inames_is_final=stmt.within_inames_is_final,
                     depends_on=frozenset([prev_id]),
-                    predicates=insn.predicates,
+                    predicates=stmt.predicates,
                     )
 
-            generated_insns.append(stage_insn)
+            generated_stmts.append(stage_stmt)
             prev_id = stage_id
 
             cur_size = new_size
             bound = cur_size
             istage += 1
 
-        new_insn_add_depends_on.add(prev_id)
-        new_insn_add_no_sync_with.add((prev_id, "any"))
-        new_insn_add_within_inames.add(base_exec_iname or stage_exec_iname)
+        new_stmt_add_depends_on.add(prev_id)
+        new_stmt_add_no_sync_with.add((prev_id, "any"))
+        new_stmt_add_within_inames.add(base_exec_iname or stage_exec_iname)
 
         if nresults == 1:
             assert len(acc_vars) == 1
@@ -1375,7 +1375,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
     def map_scan_seq(expr, rec, nresults, arg_dtypes,
             reduction_dtypes, sweep_iname, scan_iname, sweep_min_value,
             scan_min_value, stride):
-        outer_insn_inames = temp_kernel.insn_inames(insn)
+        outer_stmt_inames = temp_kernel.stmt_inames(stmt)
         inames_to_remove.add(scan_iname)
 
         track_iname = var_name_gen(
@@ -1397,59 +1397,59 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
         from pymbolic import var
         acc_vars = tuple(var(n) for n in acc_var_names)
 
-        init_id = insn_id_gen(
-                "%s_%s_init" % (insn.id, "_".join(expr.inames)))
+        init_id = stmt_id_gen(
+                "%s_%s_init" % (stmt.id, "_".join(expr.inames)))
 
-        init_insn_depends_on = frozenset()
+        init_stmt_depends_on = frozenset()
 
-        global_barrier = lp.find_most_recent_global_barrier(temp_kernel, insn.id)
+        global_barrier = lp.find_most_recent_global_barrier(temp_kernel, stmt.id)
 
         if global_barrier is not None:
-            init_insn_depends_on |= frozenset([global_barrier])
+            init_stmt_depends_on |= frozenset([global_barrier])
 
-        init_insn = make_assignment(
+        init_stmt = make_assignment(
                 id=init_id,
                 assignees=acc_vars,
-                within_inames=outer_insn_inames - frozenset(
+                within_inames=outer_stmt_inames - frozenset(
                     (sweep_iname,) + expr.inames),
-                within_inames_is_final=insn.within_inames_is_final,
-                depends_on=init_insn_depends_on,
+                within_inames_is_final=stmt.within_inames_is_final,
+                depends_on=init_stmt_depends_on,
                 expression=expr.operation.neutral_element(*arg_dtypes),
-                predicates=insn.predicates,
+                predicates=stmt.predicates,
                 )
 
-        generated_insns.append(init_insn)
+        generated_stmts.append(init_stmt)
 
-        update_insn_depends_on = set([init_insn.id]) | insn.depends_on
+        update_stmt_depends_on = set([init_stmt.id]) | stmt.depends_on
 
         updated_inner_exprs = (
-                preprocess_scan_arguments(insn, expr.expr, nresults,
-                    scan_iname, track_iname, update_insn_depends_on))
+                preprocess_scan_arguments(stmt, expr.expr, nresults,
+                    scan_iname, track_iname, update_stmt_depends_on))
 
-        update_id = insn_id_gen(
-                based_on="%s_%s_update" % (insn.id, "_".join(expr.inames)))
+        update_id = stmt_id_gen(
+                based_on="%s_%s_update" % (stmt.id, "_".join(expr.inames)))
 
-        update_insn_iname_deps = temp_kernel.insn_inames(insn) | set([track_iname])
-        if insn.within_inames_is_final:
-            update_insn_iname_deps = insn.within_inames | set([track_iname])
+        update_stmt_iname_deps = temp_kernel.stmt_inames(stmt) | set([track_iname])
+        if stmt.within_inames_is_final:
+            update_stmt_iname_deps = stmt.within_inames | set([track_iname])
 
-        scan_insn = make_assignment(
+        scan_stmt = make_assignment(
                 id=update_id,
                 assignees=acc_vars,
                 expression=expr.operation(
                     arg_dtypes,
                     _strip_if_scalar(acc_vars, acc_vars),
                     _strip_if_scalar(acc_vars, updated_inner_exprs)),
-                depends_on=frozenset(update_insn_depends_on),
-                within_inames=update_insn_iname_deps,
-                no_sync_with=insn.no_sync_with,
-                within_inames_is_final=insn.within_inames_is_final,
-                predicates=insn.predicates,
+                depends_on=frozenset(update_stmt_depends_on),
+                within_inames=update_stmt_iname_deps,
+                no_sync_with=stmt.no_sync_with,
+                within_inames_is_final=stmt.within_inames_is_final,
+                predicates=stmt.predicates,
                 )
 
-        generated_insns.append(scan_insn)
+        generated_stmts.append(scan_stmt)
 
-        new_insn_add_depends_on.add(scan_insn.id)
+        new_stmt_add_depends_on.add(scan_stmt.id)
 
         if nresults == 1:
             assert len(acc_vars) == 1
@@ -1473,12 +1473,12 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
             return map_reduction_seq(
                     expr, rec, nresults, arg_dtypes, reduction_dtypes)
 
-        outer_insn_inames = temp_kernel.insn_inames(insn)
+        outer_stmt_inames = temp_kernel.stmt_inames(stmt)
 
         from loopy.kernel.data import LocalIndexTagBase
         outer_local_inames = tuple(
                 oiname
-                for oiname in outer_insn_inames
+                for oiname in outer_stmt_inames
                 if isinstance(
                     kernel.iname_to_tag.get(oiname),
                     LocalIndexTagBase)
@@ -1530,45 +1530,45 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
         acc_vars = tuple(var(n) for n in acc_var_names)
         read_vars = tuple(var(n) for n in read_var_names)
 
-        base_iname_deps = (outer_insn_inames
+        base_iname_deps = (outer_stmt_inames
                 - frozenset(expr.inames) - frozenset([sweep_iname]))
 
         neutral = expr.operation.neutral_element(*arg_dtypes)
 
-        init_insn_depends_on = insn.depends_on
+        init_stmt_depends_on = stmt.depends_on
 
-        global_barrier = lp.find_most_recent_global_barrier(temp_kernel, insn.id)
+        global_barrier = lp.find_most_recent_global_barrier(temp_kernel, stmt.id)
 
         if global_barrier is not None:
-            init_insn_depends_on |= frozenset([global_barrier])
+            init_stmt_depends_on |= frozenset([global_barrier])
 
-        init_id = insn_id_gen("%s_%s_init" % (insn.id, scan_iname))
-        init_insn = make_assignment(
+        init_id = stmt_id_gen("%s_%s_init" % (stmt.id, scan_iname))
+        init_stmt = make_assignment(
                 id=init_id,
                 assignees=tuple(
                     acc_var[outer_local_iname_vars + (var(base_exec_iname),)]
                     for acc_var in acc_vars),
                 expression=neutral,
                 within_inames=base_iname_deps | frozenset([base_exec_iname]),
-                within_inames_is_final=insn.within_inames_is_final,
-                depends_on=init_insn_depends_on,
-                predicates=insn.predicates,
+                within_inames_is_final=stmt.within_inames_is_final,
+                depends_on=init_stmt_depends_on,
+                predicates=stmt.predicates,
                 )
-        generated_insns.append(init_insn)
+        generated_stmts.append(init_stmt)
 
-        transfer_insn_depends_on = set([init_insn.id]) | insn.depends_on
+        transfer_stmt_depends_on = set([init_stmt.id]) | stmt.depends_on
 
         updated_inner_exprs = (
-                preprocess_scan_arguments(insn, expr.expr, nresults,
-                    scan_iname, track_iname, transfer_insn_depends_on))
+                preprocess_scan_arguments(stmt, expr.expr, nresults,
+                    scan_iname, track_iname, transfer_stmt_depends_on))
 
         from loopy.symbolic import Reduction
 
         from loopy.symbolic import pw_aff_to_expr
         sweep_min_value_expr = pw_aff_to_expr(sweep_min_value)
 
-        transfer_id = insn_id_gen("%s_%s_transfer" % (insn.id, scan_iname))
-        transfer_insn = make_assignment(
+        transfer_id = stmt_id_gen("%s_%s_transfer" % (stmt.id, scan_iname))
+        transfer_stmt = make_assignment(
                 id=transfer_id,
                 assignees=tuple(
                     acc_var[outer_local_iname_vars
@@ -1580,14 +1580,14 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                     expr=_strip_if_scalar(acc_vars, updated_inner_exprs),
                     allow_simultaneous=False,
                     ),
-                within_inames=outer_insn_inames - frozenset(expr.inames),
-                within_inames_is_final=insn.within_inames_is_final,
-                depends_on=frozenset(transfer_insn_depends_on),
-                no_sync_with=frozenset([(init_id, "any")]) | insn.no_sync_with,
-                predicates=insn.predicates,
+                within_inames=outer_stmt_inames - frozenset(expr.inames),
+                within_inames_is_final=stmt.within_inames_is_final,
+                depends_on=frozenset(transfer_stmt_depends_on),
+                no_sync_with=frozenset([(init_id, "any")]) | stmt.no_sync_with,
+                predicates=stmt.predicates,
                 )
 
-        generated_insns.append(transfer_insn)
+        generated_stmts.append(transfer_stmt)
 
         prev_id = transfer_id
 
@@ -1601,10 +1601,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
             new_iname_tags[stage_exec_iname] = kernel.iname_to_tag[sweep_iname]
 
             for read_var, acc_var in zip(read_vars, acc_vars):
-                read_stage_id = insn_id_gen(
+                read_stage_id = stmt_id_gen(
                         "scan_%s_read_stage_%d" % (scan_iname, istage))
 
-                read_stage_insn = make_assignment(
+                read_stage_stmt = make_assignment(
                         id=read_stage_id,
                         assignees=(read_var,),
                         expression=(
@@ -1613,26 +1613,26 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                                     + (var(stage_exec_iname) - cur_size,)]),
                         within_inames=(
                             base_iname_deps | frozenset([stage_exec_iname])),
-                        within_inames_is_final=insn.within_inames_is_final,
+                        within_inames_is_final=stmt.within_inames_is_final,
                         depends_on=frozenset([prev_id]),
-                        predicates=insn.predicates,
+                        predicates=stmt.predicates,
                         )
 
                 if cur_size == 1:
-                    # Performance hack: don't add a barrier here with transfer_insn.
+                    # Performance hack: don't add a barrier here with transfer_stmt.
                     # NOTE: This won't work if the way that local inames
                     # are lowered changes.
-                    read_stage_insn = read_stage_insn.copy(
+                    read_stage_stmt = read_stage_stmt.copy(
                             no_sync_with=(
-                                read_stage_insn.no_sync_with
+                                read_stage_stmt.no_sync_with
                                 | frozenset([(transfer_id, "any")])))
 
-                generated_insns.append(read_stage_insn)
+                generated_stmts.append(read_stage_stmt)
                 prev_id = read_stage_id
 
-            write_stage_id = insn_id_gen(
+            write_stage_id = stmt_id_gen(
                     "scan_%s_write_stage_%d" % (scan_iname, istage))
-            write_stage_insn = make_assignment(
+            write_stage_stmt = make_assignment(
                     id=write_stage_id,
                     assignees=tuple(
                         acc_var[outer_local_iname_vars + (var(stage_exec_iname),)]
@@ -1647,19 +1647,19 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                         ),
                     within_inames=(
                         base_iname_deps | frozenset([stage_exec_iname])),
-                    within_inames_is_final=insn.within_inames_is_final,
+                    within_inames_is_final=stmt.within_inames_is_final,
                     depends_on=frozenset([prev_id]),
-                    predicates=insn.predicates,
+                    predicates=stmt.predicates,
                     )
 
-            generated_insns.append(write_stage_insn)
+            generated_stmts.append(write_stage_stmt)
             prev_id = write_stage_id
 
             cur_size *= 2
             istage += 1
 
-        new_insn_add_depends_on.add(prev_id)
-        new_insn_add_within_inames.add(sweep_iname)
+        new_stmt_add_depends_on.add(prev_id)
+        new_stmt_add_within_inames.add(sweep_iname)
 
         output_idx = var(sweep_iname) - sweep_min_value_expr
 
@@ -1676,7 +1676,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
 
     def map_reduction(expr, rec, nresults=1):
         # Only expand one level of reduction at a time, going from outermost to
-        # innermost. Otherwise we get the (iname + insn) dependencies wrong.
+        # innermost. Otherwise we get the (iname + stmt) dependencies wrong.
 
         from loopy.type_inference import (
                 infer_arg_and_reduction_dtypes_for_reduction_expression)
@@ -1684,8 +1684,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                 infer_arg_and_reduction_dtypes_for_reduction_expression(
                         temp_kernel, expr, unknown_types_ok))
 
-        outer_insn_inames = temp_kernel.insn_inames(insn)
-        bad_inames = frozenset(expr.inames) & outer_insn_inames
+        outer_stmt_inames = temp_kernel.stmt_inames(stmt)
+        bad_inames = frozenset(expr.inames) & outer_stmt_inames
         if bad_inames:
             raise LoopyError("reduction used within loop(s) that it was "
                     "supposed to reduce over: " + ", ".join(bad_inames))
@@ -1711,7 +1711,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                 # Try to determine scan candidate information (sweep iname, scan
                 # iname, etc).
                 scan_param = _try_infer_scan_candidate_from_expr(
-                        temp_kernel, expr, outer_insn_inames,
+                        temp_kernel, expr, outer_stmt_inames,
                         sweep_iname=force_outer_iname_for_scan)
 
             except ValueError as v:
@@ -1778,10 +1778,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
                 parallel = sweep_iname in sweep_class.local_parallel
                 bad_parallel = sweep_iname in sweep_class.nonlocal_parallel
 
-                if sweep_iname not in outer_insn_inames:
+                if sweep_iname not in outer_stmt_inames:
                     _error_if_force_scan_on(LoopyError,
                             "Sweep iname '%s' was detected, but is not an iname "
-                            "for the instruction." % sweep_iname)
+                            "for the statement." % sweep_iname)
                 elif bad_parallel:
                     _error_if_force_scan_on(LoopyError,
                             "Sweep iname '%s' has an unsupported parallel tag '%s' "
@@ -1828,48 +1828,48 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
     from loopy.symbolic import ReductionCallbackMapper
     cb_mapper = ReductionCallbackMapper(map_reduction)
 
-    insn_queue = kernel.instructions[:]
-    insn_id_replacements = {}
+    stmt_queue = kernel.statements[:]
+    stmt_id_replacements = {}
     domains = kernel.domains[:]
 
     temp_kernel = kernel
 
     import loopy as lp
-    while insn_queue:
-        new_insn_add_depends_on = set()
-        new_insn_add_no_sync_with = set()
-        new_insn_add_within_inames = set()
+    while stmt_queue:
+        new_stmt_add_depends_on = set()
+        new_stmt_add_no_sync_with = set()
+        new_stmt_add_within_inames = set()
 
-        generated_insns = []
+        generated_stmts = []
 
-        insn = insn_queue.pop(0)
+        stmt = stmt_queue.pop(0)
 
-        if insn_id_filter is not None and insn.id != insn_id_filter \
-                or not isinstance(insn, lp.MultiAssignmentBase):
-            new_insns.append(insn)
+        if stmt_id_filter is not None and stmt.id != stmt_id_filter \
+                or not isinstance(stmt, lp.MultiAssignmentBase):
+            new_stmts.append(stmt)
             continue
 
-        nresults = len(insn.assignees)
+        nresults = len(stmt.assignees)
 
         # Run reduction expansion.
         from loopy.symbolic import Reduction
-        if isinstance(insn.expression, Reduction) and nresults > 1:
-            new_expressions = cb_mapper(insn.expression, nresults=nresults)
+        if isinstance(stmt.expression, Reduction) and nresults > 1:
+            new_expressions = cb_mapper(stmt.expression, nresults=nresults)
         else:
-            new_expressions = (cb_mapper(insn.expression),)
+            new_expressions = (cb_mapper(stmt.expression),)
 
-        if generated_insns:
+        if generated_stmts:
             # An expansion happened, so insert the generated stuff plus
             # ourselves back into the queue.
 
-            kwargs = insn.get_copy_kwargs(
-                    depends_on=insn.depends_on
-                    | frozenset(new_insn_add_depends_on),
-                    no_sync_with=insn.no_sync_with
-                    | frozenset(new_insn_add_no_sync_with),
+            kwargs = stmt.get_copy_kwargs(
+                    depends_on=stmt.depends_on
+                    | frozenset(new_stmt_add_depends_on),
+                    no_sync_with=stmt.no_sync_with
+                    | frozenset(new_stmt_add_no_sync_with),
                     within_inames=(
-                        temp_kernel.insn_inames(insn)
-                        | new_insn_add_within_inames))
+                        temp_kernel.stmt_inames(stmt)
+                        | new_stmt_add_within_inames))
 
             kwargs.pop("id")
             kwargs.pop("expression")
@@ -1878,53 +1878,53 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
             kwargs.pop("temp_var_type", None)
             kwargs.pop("temp_var_types", None)
 
-            if isinstance(insn.expression, Reduction) and nresults > 1:
-                replacement_insns = [
+            if isinstance(stmt.expression, Reduction) and nresults > 1:
+                replacement_stmts = [
                         lp.Assignment(
-                            id=insn_id_gen(insn.id),
+                            id=stmt_id_gen(stmt.id),
                             assignee=assignee,
                             expression=new_expr,
                             **kwargs)
                         for assignee, new_expr in zip(
-                            insn.assignees, new_expressions)]
+                            stmt.assignees, new_expressions)]
 
             else:
                 new_expr, = new_expressions
-                replacement_insns = [
+                replacement_stmts = [
                         make_assignment(
-                            id=insn_id_gen(insn.id),
-                            assignees=insn.assignees,
+                            id=stmt_id_gen(stmt.id),
+                            assignees=stmt.assignees,
                             expression=new_expr,
                             **kwargs)
                         ]
 
-            insn_id_replacements[insn.id] = [
-                    rinsn.id for rinsn in replacement_insns]
+            stmt_id_replacements[stmt.id] = [
+                    rstmt.id for rstmt in replacement_stmts]
 
-            insn_queue = generated_insns + replacement_insns + insn_queue
+            stmt_queue = generated_stmts + replacement_stmts + stmt_queue
 
             # The reduction expander needs an up-to-date kernel
             # object to find dependencies. Keep temp_kernel up-to-date.
 
             temp_kernel = kernel.copy(
-                    instructions=new_insns + insn_queue,
+                    statements=new_stmts + stmt_queue,
                     temporary_variables=new_temporary_variables,
                     domains=domains)
-            temp_kernel = lp.replace_instruction_ids(
-                    temp_kernel, insn_id_replacements)
+            temp_kernel = lp.replace_statement_ids(
+                    temp_kernel, stmt_id_replacements)
 
         else:
-            # nothing happened, we're done with insn
-            assert not new_insn_add_depends_on
+            # nothing happened, we're done with stmt
+            assert not new_stmt_add_depends_on
 
-            new_insns.append(insn)
+            new_stmts.append(stmt)
 
     kernel = kernel.copy(
-            instructions=new_insns,
+            statements=new_stmts,
             temporary_variables=new_temporary_variables,
             domains=domains)
 
-    kernel = lp.replace_instruction_ids(kernel, insn_id_replacements)
+    kernel = lp.replace_statement_ids(kernel, stmt_id_replacements)
 
     kernel = lp.tag_inames(kernel, new_iname_tags)
 
@@ -1939,7 +1939,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
 # }}}
 
 
-# {{{ find idempotence ("boostability") of instructions
+# {{{ find idempotence ("boostability") of statements
 
 def find_idempotence(kernel):
     logger.debug("%s: idempotence" % kernel.name)
@@ -1951,18 +1951,18 @@ def find_idempotence(kernel):
     var_names = arg_names | set(six.iterkeys(kernel.temporary_variables))
 
     reads_map = dict(
-            (insn.id, insn.read_dependency_names() & var_names)
-            for insn in kernel.instructions)
+            (stmt.id, stmt.read_dependency_names() & var_names)
+            for stmt in kernel.statements)
 
     from collections import defaultdict
     dep_graph = defaultdict(set)
 
-    for insn in kernel.instructions:
-        dep_graph[insn.id] = set(writer_id
-                for var in reads_map[insn.id]
+    for stmt in kernel.statements:
+        dep_graph[stmt.id] = set(writer_id
+                for var in reads_map[stmt.id]
                 for writer_id in writer_map.get(var, set()))
 
-    # Find SCCs of dep_graph. These are used for checking if the instruction is
+    # Find SCCs of dep_graph. These are used for checking if the statement is
     # in a dependency cycle.
     from loopy.tools import compute_sccs
 
@@ -1972,29 +1972,29 @@ def find_idempotence(kernel):
 
     non_idempotently_updated_vars = set()
 
-    new_insns = []
-    for insn in kernel.instructions:
-        boostable = len(sccs[insn.id]) == 1 and insn.id not in dep_graph[insn.id]
+    new_stmts = []
+    for stmt in kernel.statements:
+        boostable = len(sccs[stmt.id]) == 1 and stmt.id not in dep_graph[stmt.id]
 
         if not boostable:
             non_idempotently_updated_vars.update(
-                    insn.assignee_var_names())
+                    stmt.assignee_var_names())
 
-        new_insns.append(insn.copy(boostable=boostable))
+        new_stmts.append(stmt.copy(boostable=boostable))
 
     # {{{ remove boostability from isns that access non-idempotently updated vars
 
-    new2_insns = []
-    for insn in new_insns:
-        if insn.boostable and bool(
-                non_idempotently_updated_vars & insn.dependency_names()):
-            new2_insns.append(insn.copy(boostable=False))
+    new2_stmts = []
+    for stmt in new_stmts:
+        if stmt.boostable and bool(
+                non_idempotently_updated_vars & stmt.dependency_names()):
+            new2_stmts.append(stmt.copy(boostable=False))
         else:
-            new2_insns.append(insn)
+            new2_stmts.append(stmt)
 
     # }}}
 
-    return kernel.copy(instructions=new2_insns)
+    return kernel.copy(statements=new2_stmts)
 
 # }}}
 
@@ -2002,47 +2002,47 @@ def find_idempotence(kernel):
 # {{{ limit boostability
 
 def limit_boostability(kernel):
-    """Finds out which other inames an instruction's inames occur with
+    """Finds out which other inames an statement's inames occur with
     and then limits boostability to just those inames.
     """
 
     logger.debug("%s: limit boostability" % kernel.name)
 
     iname_occurs_with = {}
-    for insn in kernel.instructions:
-        insn_inames = kernel.insn_inames(insn)
-        for iname in insn_inames:
-            iname_occurs_with.setdefault(iname, set()).update(insn_inames)
+    for stmt in kernel.statements:
+        stmt_inames = kernel.stmt_inames(stmt)
+        for iname in stmt_inames:
+            iname_occurs_with.setdefault(iname, set()).update(stmt_inames)
 
     iname_use_counts = {}
-    for insn in kernel.instructions:
-        for iname in kernel.insn_inames(insn):
+    for stmt in kernel.statements:
+        for iname in kernel.stmt_inames(stmt):
             iname_use_counts[iname] = iname_use_counts.get(iname, 0) + 1
 
     single_use_inames = set(iname for iname, uc in six.iteritems(iname_use_counts)
             if uc == 1)
 
-    new_insns = []
-    for insn in kernel.instructions:
-        if insn.boostable is None:
-            raise LoopyError("insn '%s' has undetermined boostability" % insn.id)
-        elif insn.boostable:
+    new_stmts = []
+    for stmt in kernel.statements:
+        if stmt.boostable is None:
+            raise LoopyError("stmt '%s' has undetermined boostability" % stmt.id)
+        elif stmt.boostable:
             boostable_into = set()
-            for iname in kernel.insn_inames(insn):
+            for iname in kernel.stmt_inames(stmt):
                 boostable_into.update(iname_occurs_with[iname])
 
-            boostable_into -= kernel.insn_inames(insn) | single_use_inames
+            boostable_into -= kernel.stmt_inames(stmt) | single_use_inames
 
             # Even if boostable_into is empty, leave boostable flag on--it is used
             # for boosting into unused hw axes.
 
-            insn = insn.copy(boostable_into=boostable_into)
+            stmt = stmt.copy(boostable_into=boostable_into)
         else:
-            insn = insn.copy(boostable_into=set())
+            stmt = stmt.copy(boostable_into=set())
 
-        new_insns.append(insn)
+        new_stmts.append(stmt)
 
-    return kernel.copy(instructions=new_insns)
+    return kernel.copy(statements=new_stmts)
 
 # }}}
 
diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index abf4d799f..e585a8a39 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -66,8 +66,8 @@ class LeaveLoop(EndBlockItem):
     hash_fields = __slots__ = ["iname"]
 
 
-class RunInstruction(ScheduleItem):
-    hash_fields = __slots__ = ["insn_id"]
+class RunStatement(ScheduleItem):
+    hash_fields = __slots__ = ["stmt_id"]
 
 
 class CallKernel(BeginBlockItem):
@@ -88,11 +88,11 @@ class Barrier(ScheduleItem):
 
         ``"local"`` or ``"global"``
 
-    .. attribute:: originating_insn_id
+    .. attribute:: originating_stmt_id
     """
 
     hash_fields = ["comment", "kind"]
-    __slots__ = hash_fields + ["originating_insn_id"]
+    __slots__ = hash_fields + ["originating_stmt_id"]
 
 # }}}
 
@@ -143,12 +143,12 @@ def generate_sub_sched_items(schedule, start_idx):
     assert False
 
 
-def get_insn_ids_for_block_at(schedule, start_idx):
+def get_stmt_ids_for_block_at(schedule, start_idx):
     return frozenset(
-            sub_sched_item.insn_id
+            sub_sched_item.stmt_id
             for i, sub_sched_item in generate_sub_sched_items(
                 schedule, start_idx)
-            if isinstance(sub_sched_item, RunInstruction))
+            if isinstance(sub_sched_item, RunStatement))
 
 
 def find_active_inames_at(kernel, sched_index):
@@ -185,17 +185,17 @@ def find_used_inames_within(kernel, sched_index):
     if isinstance(sched_item, BeginBlockItem):
         loop_contents, _ = gather_schedule_block(
                 kernel.schedule, sched_index)
-        run_insns = [subsched_item
+        run_stmts = [subsched_item
                 for subsched_item in loop_contents
-                if isinstance(subsched_item, RunInstruction)]
-    elif isinstance(sched_item, RunInstruction):
-        run_insns = [sched_item]
+                if isinstance(subsched_item, RunStatement)]
+    elif isinstance(sched_item, RunStatement):
+        run_stmts = [sched_item]
     else:
         return set()
 
     result = set()
-    for sched_item in run_insns:
-        result.update(kernel.insn_inames(sched_item.insn_id))
+    for sched_item in run_stmts:
+        result.update(kernel.stmt_inames(sched_item.stmt_id))
 
     return result
 
@@ -214,13 +214,13 @@ def find_loop_nest_with_map(kernel):
             if not isinstance(kernel.iname_to_tag.get(iname),
                 (ConcurrentTag, IlpBaseTag, VectorizeTag))])
 
-    iname_to_insns = kernel.iname_to_insns()
+    iname_to_stmts = kernel.iname_to_stmts()
 
     for iname in all_nonpar_inames:
         result[iname] = set([
             other_iname
-            for insn in iname_to_insns[iname]
-            for other_iname in kernel.insn_inames(insn) & all_nonpar_inames
+            for stmt in iname_to_stmts[iname]
+            for other_iname in kernel.stmt_inames(stmt) & all_nonpar_inames
             ])
 
     return result
@@ -234,7 +234,7 @@ def find_loop_nest_around_map(kernel):
 
     all_inames = kernel.all_inames()
 
-    iname_to_insns = kernel.iname_to_insns()
+    iname_to_stmts = kernel.iname_to_stmts()
 
     # examine pairs of all inames--O(n**2), I know.
     from loopy.kernel.data import IlpBaseTag
@@ -253,7 +253,7 @@ def find_loop_nest_around_map(kernel):
                 # slack here.
                 continue
 
-            if iname_to_insns[inner_iname] < iname_to_insns[outer_iname]:
+            if iname_to_stmts[inner_iname] < iname_to_stmts[outer_iname]:
                 result[inner_iname].add(outer_iname)
 
     for dom_idx, dom in enumerate(kernel.domains):
@@ -267,54 +267,54 @@ def find_loop_nest_around_map(kernel):
     return result
 
 
-def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
-    """Returns a dictionary mapping inames to other instruction ids that need to
+def find_loop_stmt_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
+    """Returns a dictionary mapping inames to other statement ids that need to
     be scheduled before the iname should be eligible for scheduling.
     """
 
     result = {}
 
     from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag
-    for insn in kernel.instructions:
-        for iname in kernel.insn_inames(insn):
+    for stmt in kernel.statements:
+        for iname in kernel.stmt_inames(stmt):
             if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag):
                 continue
 
             iname_dep = result.setdefault(iname, set())
 
-            for dep_insn_id in insn.depends_on:
-                if dep_insn_id in iname_dep:
+            for dep_stmt_id in stmt.depends_on:
+                if dep_stmt_id in iname_dep:
                     # already depending, nothing to check
                     continue
 
-                dep_insn = kernel.id_to_insn[dep_insn_id]
-                dep_insn_inames = kernel.insn_inames(dep_insn)
+                dep_stmt = kernel.id_to_stmt[dep_stmt_id]
+                dep_stmt_inames = kernel.stmt_inames(dep_stmt)
 
-                if iname in dep_insn_inames:
+                if iname in dep_stmt_inames:
                     # Nothing to be learned, dependency is in loop over iname
                     # already.
                     continue
 
-                # To make sure dep_insn belongs outside of iname, we must prove
-                # that all inames that dep_insn will be executed in nest
+                # To make sure dep_stmt belongs outside of iname, we must prove
+                # that all inames that dep_stmt will be executed in nest
                 # outside of the loop over *iname*. (i.e. nested around, or
                 # before).
 
                 may_add_to_loop_dep_map = True
-                for dep_insn_iname in dep_insn_inames:
-                    if dep_insn_iname in loop_nest_around_map[iname]:
-                        # dep_insn_iname is guaranteed to nest outside of iname
+                for dep_stmt_iname in dep_stmt_inames:
+                    if dep_stmt_iname in loop_nest_around_map[iname]:
+                        # dep_stmt_iname is guaranteed to nest outside of iname
                         # -> safe.
                         continue
 
-                    tag = kernel.iname_to_tag.get(dep_insn_iname)
+                    tag = kernel.iname_to_tag.get(dep_stmt_iname)
                     if isinstance(tag, (ConcurrentTag, IlpBaseTag, VectorizeTag)):
                         # Parallel tags don't really nest, so we'll disregard
                         # them here.
                         continue
 
-                    if dep_insn_iname not in loop_nest_with_map.get(iname, []):
-                        # dep_insn_iname does not nest with iname, so its nest
+                    if dep_stmt_iname not in loop_nest_with_map.get(iname, []):
+                        # dep_stmt_iname does not nest with iname, so its nest
                         # must occur outside.
                         continue
 
@@ -325,38 +325,38 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
                     continue
 
                 logger.debug("{knl}: loop dependency map: iname '{iname}' "
-                        "depends on '{dep_insn}' via '{insn}'"
+                        "depends on '{dep_stmt}' via '{stmt}'"
                         .format(
                             knl=kernel.name,
                             iname=iname,
-                            dep_insn=dep_insn_id,
-                            insn=insn.id))
+                            dep_stmt=dep_stmt_id,
+                            stmt=stmt.id))
 
-                iname_dep.add(dep_insn_id)
+                iname_dep.add(dep_stmt_id)
 
     return result
 
 
-def group_insn_counts(kernel):
+def group_stmt_counts(kernel):
     result = {}
 
-    for insn in kernel.instructions:
-        for grp in insn.groups:
+    for stmt in kernel.statements:
+        for grp in stmt.groups:
             result[grp] = result.get(grp, 0) + 1
 
     return result
 
 
-def gen_dependencies_except(kernel, insn_id, except_insn_ids):
-    insn = kernel.id_to_insn[insn_id]
-    for dep_id in insn.depends_on:
+def gen_dependencies_except(kernel, stmt_id, except_stmt_ids):
+    stmt = kernel.id_to_stmt[stmt_id]
+    for dep_id in stmt.depends_on:
 
-        if dep_id in except_insn_ids:
+        if dep_id in except_stmt_ids:
             continue
 
         yield dep_id
 
-        for sub_dep_id in gen_dependencies_except(kernel, dep_id, except_insn_ids):
+        for sub_dep_id in gen_dependencies_except(kernel, dep_id, except_stmt_ids):
             yield sub_dep_id
 
 
@@ -403,50 +403,50 @@ def get_priority_tiers(wanted, priorities):
         yield tier
 
 
-def sched_item_to_insn_id(sched_item):
+def sched_item_to_stmt_id(sched_item):
     # Helper for use in generator expressions, i.e.
-    # (... for insn_id in sched_item_to_insn_id(item) ...)
-    if isinstance(sched_item, RunInstruction):
-        yield sched_item.insn_id
+    # (... for stmt_id in sched_item_to_stmt_id(item) ...)
+    if isinstance(sched_item, RunStatement):
+        yield sched_item.stmt_id
     elif isinstance(sched_item, Barrier):
-        if (hasattr(sched_item, "originating_insn_id")
-                and sched_item.originating_insn_id is not None):
-            yield sched_item.originating_insn_id
+        if (hasattr(sched_item, "originating_stmt_id")
+                and sched_item.originating_stmt_id is not None):
+            yield sched_item.originating_stmt_id
 
 # }}}
 
 
 # {{{ debug help
 
-def format_insn_id(kernel, insn_id):
+def format_stmt_id(kernel, stmt_id):
     Fore = kernel.options._fore  # noqa
     Style = kernel.options._style  # noqa
-    return Fore.GREEN + insn_id + Style.RESET_ALL
+    return Fore.GREEN + stmt_id + Style.RESET_ALL
 
 
-def format_insn(kernel, insn_id):
-    insn = kernel.id_to_insn[insn_id]
+def format_stmt(kernel, stmt_id):
+    stmt = kernel.id_to_stmt[stmt_id]
     Fore = kernel.options._fore  # noqa
     Style = kernel.options._style  # noqa
-    from loopy.kernel.instruction import (
-            MultiAssignmentBase, NoOpInstruction, BarrierInstruction)
-    if isinstance(insn, MultiAssignmentBase):
+    from loopy.kernel.statement import (
+            MultiAssignmentBase, NoOpStatement, BarrierStatement)
+    if isinstance(stmt, MultiAssignmentBase):
         return "%s%s%s = %s%s%s  {id=%s}" % (
-            Fore.CYAN, ", ".join(str(a) for a in insn.assignees), Style.RESET_ALL,
-            Fore.MAGENTA, str(insn.expression), Style.RESET_ALL,
-            format_insn_id(kernel, insn_id))
-    elif isinstance(insn, BarrierInstruction):
+            Fore.CYAN, ", ".join(str(a) for a in stmt.assignees), Style.RESET_ALL,
+            Fore.MAGENTA, str(stmt.expression), Style.RESET_ALL,
+            format_stmt_id(kernel, stmt_id))
+    elif isinstance(stmt, BarrierStatement):
         return "[%s] %s... %sbarrier%s" % (
-                format_insn_id(kernel, insn_id),
-                Fore.MAGENTA, insn.kind[0], Style.RESET_ALL)
-    elif isinstance(insn, NoOpInstruction):
+                format_stmt_id(kernel, stmt_id),
+                Fore.MAGENTA, stmt.kind[0], Style.RESET_ALL)
+    elif isinstance(stmt, NoOpStatement):
         return "[%s] %s... nop%s" % (
-                format_insn_id(kernel, insn_id),
+                format_stmt_id(kernel, stmt_id),
                 Fore.MAGENTA, Style.RESET_ALL)
     else:
         return "[%s] %s%s%s" % (
-                format_insn_id(kernel, insn_id),
-                Fore.CYAN, str(insn), Style.RESET_ALL)
+                format_stmt_id(kernel, stmt_id),
+                Fore.CYAN, str(stmt), Style.RESET_ALL)
 
 
 def dump_schedule(kernel, schedule):
@@ -471,13 +471,13 @@ def dump_schedule(kernel, schedule):
         elif isinstance(sched_item, ReturnFromKernel):
             indent = indent[:-4]
             lines.append(indent + "RETURN FROM KERNEL %s" % sched_item.kernel_name)
-        elif isinstance(sched_item, RunInstruction):
-            insn = kernel.id_to_insn[sched_item.insn_id]
-            if isinstance(insn, MultiAssignmentBase):
-                insn_str = format_insn(kernel, sched_item.insn_id)
+        elif isinstance(sched_item, RunStatement):
+            stmt = kernel.id_to_stmt[sched_item.stmt_id]
+            if isinstance(stmt, MultiAssignmentBase):
+                stmt_str = format_stmt(kernel, sched_item.stmt_id)
             else:
-                insn_str = sched_item.insn_id
-            lines.append(indent + insn_str)
+                stmt_str = sched_item.stmt_id
+            lines.append(indent + stmt_str)
         elif isinstance(sched_item, Barrier):
             lines.append(indent + "... %sbarrier" % sched_item.kind[0])
         else:
@@ -592,9 +592,9 @@ class SchedulerState(ImmutableRecord):
 
     .. attribute:: schedule
 
-    .. attribute:: scheduled_insn_ids
+    .. attribute:: scheduled_stmt_ids
 
-    .. attribute:: unscheduled_insn_ids
+    .. attribute:: unscheduled_stmt_ids
 
     .. attribute:: preschedule
 
@@ -602,9 +602,9 @@ class SchedulerState(ImmutableRecord):
         schedule, maintaining the same relative ordering. Newly scheduled
         items may interleave this sequence.
 
-    .. attribute:: prescheduled_insn_ids
+    .. attribute:: prescheduled_stmt_ids
 
-        A :class:`frozenset` of any instruction that started prescheduled
+        A :class:`frozenset` of any statement that started prescheduled
 
     .. attribute:: prescheduled_inames
 
@@ -618,14 +618,14 @@ class SchedulerState(ImmutableRecord):
 
         Whether the scheduler is inside a subkernel
 
-    .. attribute:: group_insn_counts
+    .. attribute:: group_stmt_counts
 
-        A mapping from instruction group names to the number of instructions
+        A mapping from statement group names to the number of statements
         contained in them.
 
     .. attribute:: active_group_counts
 
-        A mapping from instruction group names to the number of instructions
+        A mapping from statement group names to the number of statements
         in them that are left to schedule. If a group name occurs in this
         mapping, that group is considered active.
 
@@ -645,8 +645,8 @@ class SchedulerState(ImmutableRecord):
 
 def generate_loop_schedules_internal(
         sched_state, allow_boost=False, debug=None):
-    # allow_insn is set to False initially and after entering each loop
-    # to give loops containing high-priority instructions a chance.
+    # allow_stmt is set to False initially and after entering each loop
+    # to give loops containing high-priority statements a chance.
 
     kernel = sched_state.kernel
     Fore = kernel.options._fore  # noqa
@@ -734,13 +734,13 @@ def generate_loop_schedules_internal(
 
     # {{{ see if there are pending barriers in the preschedule
 
-    # Barriers that do not have an originating instruction are handled here.
+    # Barriers that do not have an originating statement are handled here.
     # (These are automatically inserted by insert_barriers().) Barriers with
-    # originating instructions are handled as part of normal instruction
+    # originating statements are handled as part of normal statement
     # scheduling below.
     if (
             isinstance(next_preschedule_item, Barrier)
-            and next_preschedule_item.originating_insn_id is None):
+            and next_preschedule_item.originating_stmt_id is None):
         for result in generate_loop_schedules_internal(
                     sched_state.copy(
                         schedule=sched_state.schedule + (next_preschedule_item,),
@@ -751,134 +751,134 @@ def generate_loop_schedules_internal(
 
     # }}}
 
-    # {{{ see if any insns are ready to be scheduled now
+    # {{{ see if any stmts are ready to be scheduled now
 
-    # Also take note of insns that have a chance of being schedulable inside
+    # Also take note of stmts that have a chance of being schedulable inside
     # the current loop nest, in this set:
 
-    reachable_insn_ids = set()
+    reachable_stmt_ids = set()
     active_groups = frozenset(sched_state.active_group_counts)
 
-    def insn_sort_key(insn_id):
-        insn = kernel.id_to_insn[insn_id]
+    def stmt_sort_key(stmt_id):
+        stmt = kernel.id_to_stmt[stmt_id]
 
-        # Sort by insn.id as a last criterion to achieve deterministic
+        # Sort by stmt.id as a last criterion to achieve deterministic
         # schedule generation order.
-        return (insn.priority, len(active_groups & insn.groups), insn.id)
+        return (stmt.priority, len(active_groups & stmt.groups), stmt.id)
 
-    insn_ids_to_try = sorted(
-            # Non-prescheduled instructions go first.
-            sched_state.unscheduled_insn_ids - sched_state.prescheduled_insn_ids,
-            key=insn_sort_key, reverse=True)
+    stmt_ids_to_try = sorted(
+            # Non-prescheduled statements go first.
+            sched_state.unscheduled_stmt_ids - sched_state.prescheduled_stmt_ids,
+            key=stmt_sort_key, reverse=True)
 
-    insn_ids_to_try.extend(
-        insn_id
+    stmt_ids_to_try.extend(
+        stmt_id
         for item in sched_state.preschedule
-        for insn_id in sched_item_to_insn_id(item))
+        for stmt_id in sched_item_to_stmt_id(item))
 
-    for insn_id in insn_ids_to_try:
-        insn = kernel.id_to_insn[insn_id]
+    for stmt_id in stmt_ids_to_try:
+        stmt = kernel.id_to_stmt[stmt_id]
 
-        is_ready = insn.depends_on <= sched_state.scheduled_insn_ids
+        is_ready = stmt.depends_on <= sched_state.scheduled_stmt_ids
 
         if not is_ready:
             if debug_mode:
-                print("instruction '%s' is missing insn depedencies '%s'" % (
-                        format_insn(kernel, insn.id), ",".join(
-                            insn.depends_on - sched_state.scheduled_insn_ids)))
+                print("statement '%s' is missing stmt depedencies '%s'" % (
+                        format_stmt(kernel, stmt.id), ",".join(
+                            stmt.depends_on - sched_state.scheduled_stmt_ids)))
             continue
 
-        want = kernel.insn_inames(insn) - sched_state.parallel_inames
+        want = kernel.stmt_inames(stmt) - sched_state.parallel_inames
         have = active_inames_set - sched_state.parallel_inames
 
-        # If insn is boostable, it may be placed inside a more deeply
+        # If stmt is boostable, it may be placed inside a more deeply
         # nested loop without harm.
 
         orig_have = have
         if allow_boost:
-            # Note that the inames in 'insn.boostable_into' necessarily won't
+            # Note that the inames in 'stmt.boostable_into' necessarily won't
             # be contained in 'want'.
-            have = have - insn.boostable_into
+            have = have - stmt.boostable_into
 
         if want != have:
             is_ready = False
 
             if debug_mode:
                 if want-have:
-                    print("instruction '%s' is missing inames '%s'"
-                            % (format_insn(kernel, insn.id), ",".join(want-have)))
+                    print("statement '%s' is missing inames '%s'"
+                            % (format_stmt(kernel, stmt.id), ",".join(want-have)))
                 if have-want:
-                    print("instruction '%s' won't work under inames '%s'"
-                            % (format_insn(kernel, insn.id), ",".join(have-want)))
+                    print("statement '%s' won't work under inames '%s'"
+                            % (format_stmt(kernel, stmt.id), ",".join(have-want)))
 
-        # {{{ check if scheduling this insn is compatible with preschedule
+        # {{{ check if scheduling this stmt is compatible with preschedule
 
-        if insn_id in sched_state.prescheduled_insn_ids:
-            if isinstance(next_preschedule_item, RunInstruction):
-                next_preschedule_insn_id = next_preschedule_item.insn_id
+        if stmt_id in sched_state.prescheduled_stmt_ids:
+            if isinstance(next_preschedule_item, RunStatement):
+                next_preschedule_stmt_id = next_preschedule_item.stmt_id
             elif isinstance(next_preschedule_item, Barrier):
-                assert next_preschedule_item.originating_insn_id is not None
-                next_preschedule_insn_id = next_preschedule_item.originating_insn_id
+                assert next_preschedule_item.originating_stmt_id is not None
+                next_preschedule_stmt_id = next_preschedule_item.originating_stmt_id
             else:
-                next_preschedule_insn_id = None
+                next_preschedule_stmt_id = None
 
-            if next_preschedule_insn_id != insn_id:
+            if next_preschedule_stmt_id != stmt_id:
                 if debug_mode:
                     print("can't schedule '%s' because another preschedule "
-                          "instruction precedes it" % format_insn(kernel, insn.id))
+                          "statement precedes it" % format_stmt(kernel, stmt.id))
                 is_ready = False
 
         # }}}
 
-        # {{{ check if scheduler state allows insn scheduling
+        # {{{ check if scheduler state allows stmt scheduling
 
-        from loopy.kernel.instruction import BarrierInstruction
-        if isinstance(insn, BarrierInstruction) and insn.kind == "global":
+        from loopy.kernel.statement import BarrierStatement
+        if isinstance(stmt, BarrierStatement) and stmt.kind == "global":
             if not sched_state.may_schedule_global_barriers:
                 if debug_mode:
                     print("can't schedule '%s' because global barriers are "
-                          "not currently allowed" % format_insn(kernel, insn.id))
+                          "not currently allowed" % format_stmt(kernel, stmt.id))
                 is_ready = False
         else:
             if not sched_state.within_subkernel:
                 if debug_mode:
                     print("can't schedule '%s' because not within subkernel"
-                          % format_insn(kernel, insn.id))
+                          % format_stmt(kernel, stmt.id))
                 is_ready = False
 
         # }}}
 
         # {{{ determine group-based readiness
 
-        if insn.conflicts_with_groups & active_groups:
+        if stmt.conflicts_with_groups & active_groups:
             is_ready = False
 
             if debug_mode:
-                print("instruction '%s' conflicts with active group(s) '%s'"
-                        % (insn.id, ",".join(
-                            active_groups & insn.conflicts_with_groups)))
+                print("statement '%s' conflicts with active group(s) '%s'"
+                        % (stmt.id, ",".join(
+                            active_groups & stmt.conflicts_with_groups)))
 
         # }}}
 
         # {{{ determine reachability
 
         if (not is_ready and have <= want):
-            reachable_insn_ids.add(insn_id)
+            reachable_stmt_ids.add(stmt_id)
 
         # }}}
 
         if is_ready and debug_mode:
-            print("ready to schedule '%s'" % format_insn(kernel, insn.id))
+            print("ready to schedule '%s'" % format_stmt(kernel, stmt.id))
 
         if is_ready and not debug_mode:
-            iid_set = frozenset([insn.id])
+            iid_set = frozenset([stmt.id])
 
-            # {{{ update active group counts for added instruction
+            # {{{ update active group counts for added statement
 
-            if insn.groups:
+            if stmt.groups:
                 new_active_group_counts = sched_state.active_group_counts.copy()
 
-                for grp in insn.groups:
+                for grp in stmt.groups:
                     if grp in new_active_group_counts:
                         new_active_group_counts[grp] -= 1
                         if new_active_group_counts[grp] == 0:
@@ -886,7 +886,7 @@ def generate_loop_schedules_internal(
 
                     else:
                         new_active_group_counts[grp] = (
-                                sched_state.group_insn_counts[grp] - 1)
+                                sched_state.group_stmt_counts[grp] - 1)
 
             else:
                 new_active_group_counts = sched_state.active_group_counts
@@ -895,18 +895,18 @@ def generate_loop_schedules_internal(
 
             new_uses_of_boostability = []
             if allow_boost:
-                if orig_have & insn.boostable_into:
+                if orig_have & stmt.boostable_into:
                     new_uses_of_boostability.append(
-                            (insn.id, orig_have & insn.boostable_into))
+                            (stmt.id, orig_have & stmt.boostable_into))
 
             new_sched_state = sched_state.copy(
-                    scheduled_insn_ids=sched_state.scheduled_insn_ids | iid_set,
-                    unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set,
+                    scheduled_stmt_ids=sched_state.scheduled_stmt_ids | iid_set,
+                    unscheduled_stmt_ids=sched_state.unscheduled_stmt_ids - iid_set,
                     schedule=(
-                        sched_state.schedule + (RunInstruction(insn_id=insn.id),)),
+                        sched_state.schedule + (RunStatement(stmt_id=stmt.id),)),
                     preschedule=(
                         sched_state.preschedule
-                        if insn_id not in sched_state.prescheduled_insn_ids
+                        if stmt_id not in sched_state.prescheduled_stmt_ids
                         else sched_state.preschedule[1:]),
                     active_group_counts=new_active_group_counts,
                     uses_of_boostability=(
@@ -923,9 +923,9 @@ def generate_loop_schedules_internal(
                     allow_boost=rec_allow_boost, debug=debug):
                 yield sub_sched
 
-            if not sched_state.group_insn_counts:
+            if not sched_state.group_stmt_counts:
                 # No groups: We won't need to backtrack on scheduling
-                # instructions.
+                # statements.
                 return
 
     # }}}
@@ -949,21 +949,21 @@ def generate_loop_schedules_internal(
             can_leave = False
         elif last_entered_loop not in sched_state.breakable_inames:
             # If the iname is not breakable, then check that we've
-            # scheduled all the instructions that require it.
+            # scheduled all the statements that require it.
 
-            for insn_id in sched_state.unscheduled_insn_ids:
-                insn = kernel.id_to_insn[insn_id]
-                if last_entered_loop in kernel.insn_inames(insn):
+            for stmt_id in sched_state.unscheduled_stmt_ids:
+                stmt = kernel.id_to_stmt[stmt_id]
+                if last_entered_loop in kernel.stmt_inames(stmt):
                     if debug_mode:
                         print("cannot leave '%s' because '%s' still depends on it"
-                                % (last_entered_loop, format_insn(kernel, insn.id)))
+                                % (last_entered_loop, format_stmt(kernel, stmt.id)))
 
-                        # check if there's a dependency of insn that needs to be
+                        # check if there's a dependency of stmt that needs to be
                         # outside of last_entered_loop.
-                        for subdep_id in gen_dependencies_except(kernel, insn_id,
-                                sched_state.scheduled_insn_ids):
-                            subdep = kernel.id_to_insn[insn_id]
-                            want = (kernel.insn_inames(subdep_id)
+                        for subdep_id in gen_dependencies_except(kernel, stmt_id,
+                                sched_state.scheduled_stmt_ids):
+                            subdep = kernel.id_to_stmt[stmt_id]
+                            want = (kernel.stmt_inames(subdep_id)
                                     - sched_state.parallel_inames)
                             if (
                                     last_entered_loop not in want and
@@ -982,10 +982,10 @@ def generate_loop_schedules_internal(
                                         "warn": Fore.RED + Style.BRIGHT,
                                         "reset_all": Style.RESET_ALL,
                                         "iname": last_entered_loop,
-                                        "subdep": format_insn_id(kernel, subdep_id),
-                                        "dep": format_insn_id(kernel, insn_id),
-                                        "subdep_i": format_insn(kernel, subdep_id),
-                                        "dep_i": format_insn(kernel, insn_id),
+                                        "subdep": format_stmt_id(kernel, subdep_id),
+                                        "dep": format_stmt_id(kernel, stmt_id),
+                                        "subdep_i": format_stmt(kernel, subdep_id),
+                                        "dep_i": format_stmt(kernel, stmt_id),
                                         })
 
                     can_leave = False
@@ -994,14 +994,14 @@ def generate_loop_schedules_internal(
         if can_leave:
             can_leave = False
 
-            # We may only leave this loop if we've scheduled an instruction
+            # We may only leave this loop if we've scheduled an statement
             # since entering it.
 
-            seen_an_insn = False
+            seen_an_stmt = False
             ignore_count = 0
             for sched_item in sched_state.schedule[::-1]:
-                if isinstance(sched_item, RunInstruction):
-                    seen_an_insn = True
+                if isinstance(sched_item, RunStatement):
+                    seen_an_stmt = True
                 elif isinstance(sched_item, LeaveLoop):
                     ignore_count += 1
                 elif isinstance(sched_item, EnterLoop):
@@ -1009,7 +1009,7 @@ def generate_loop_schedules_internal(
                         ignore_count -= 1
                     else:
                         assert sched_item.iname == last_entered_loop
-                        if seen_an_insn:
+                        if seen_an_stmt:
                             can_leave = True
                         break
 
@@ -1036,10 +1036,10 @@ def generate_loop_schedules_internal(
 
     # {{{ see if any loop can be entered now
 
-    # Find inames that are being referenced by as yet unscheduled instructions.
+    # Find inames that are being referenced by as yet unscheduled statements.
     needed_inames = set()
-    for insn_id in sched_state.unscheduled_insn_ids:
-        needed_inames.update(kernel.insn_inames(insn_id))
+    for stmt_id in sched_state.unscheduled_stmt_ids:
+        needed_inames.update(kernel.stmt_inames(stmt_id))
 
     needed_inames = (needed_inames
             # There's no notion of 'entering' a parallel loop
@@ -1053,8 +1053,8 @@ def generate_loop_schedules_internal(
         print("inames still needed :", ",".join(needed_inames))
         print("active inames :", ",".join(sched_state.active_inames))
         print("inames entered so far :", ",".join(sched_state.entered_inames))
-        print("reachable insns:", ",".join(reachable_insn_ids))
-        print("active groups (with insn counts):", ",".join(
+        print("reachable stmts:", ",".join(reachable_stmt_ids))
+        print("active groups (with stmt counts):", ",".join(
             "%s: %d" % (grp, c)
             for grp, c in six.iteritems(sched_state.active_group_counts)))
         print(75*"-")
@@ -1086,18 +1086,18 @@ def generate_loop_schedules_internal(
                 continue
 
             if (
-                    not sched_state.loop_insn_dep_map.get(iname, set())
-                    <= sched_state.scheduled_insn_ids):
+                    not sched_state.loop_stmt_dep_map.get(iname, set())
+                    <= sched_state.scheduled_stmt_ids):
                 if debug_mode:
                     print(
                             "scheduling {iname} prohibited by loop dependency map "
-                            "(needs '{needed_insns})'"
+                            "(needs '{needed_stmts})'"
                             .format(
                                 iname=iname,
-                                needed_insns=", ".join(
-                                    sched_state.loop_insn_dep_map.get(iname, set())
+                                needed_stmts=", ".join(
+                                    sched_state.loop_stmt_dep_map.get(iname, set())
                                     -
-                                    sched_state.scheduled_insn_ids)))
+                                    sched_state.scheduled_stmt_ids)))
 
                 continue
 
@@ -1120,8 +1120,8 @@ def generate_loop_schedules_internal(
                     iname_home_domain_params
                     &
                     set(kernel.temporary_variables)):
-                writer_insn, = kernel.writer_map()[domain_par]
-                if writer_insn not in sched_state.scheduled_insn_ids:
+                writer_stmt, = kernel.writer_map()[domain_par]
+                if writer_stmt not in sched_state.scheduled_stmt_ids:
                     data_dep_written = False
                     if debug_mode:
                         print("iname '%s' not scheduled because domain "
@@ -1134,21 +1134,21 @@ def generate_loop_schedules_internal(
 
             # }}}
 
-            # {{{ determine if that gets us closer to being able to schedule an insn
+            # {{{ determine if that gets us closer to being able to schedule an stmt
 
-            usefulness = None  # highest insn priority enabled by iname
+            usefulness = None  # highest stmt priority enabled by iname
 
             hypothetically_active_loops = active_inames_set | set([iname])
-            for insn_id in reachable_insn_ids:
-                insn = kernel.id_to_insn[insn_id]
+            for stmt_id in reachable_stmt_ids:
+                stmt = kernel.id_to_stmt[stmt_id]
 
-                want = kernel.insn_inames(insn) | insn.boostable_into
+                want = kernel.stmt_inames(stmt) | stmt.boostable_into
 
                 if hypothetically_active_loops <= want:
                     if usefulness is None:
-                        usefulness = insn.priority
+                        usefulness = stmt.priority
                     else:
-                        usefulness = max(usefulness, insn.priority)
+                        usefulness = max(usefulness, stmt.priority)
 
             if usefulness is None:
                 if debug_mode:
@@ -1262,18 +1262,18 @@ def generate_loop_schedules_internal(
 
     if (
             not sched_state.active_inames
-            and not sched_state.unscheduled_insn_ids
+            and not sched_state.unscheduled_stmt_ids
             and not sched_state.preschedule):
         # if done, yield result
         debug.log_success(sched_state.schedule)
 
-        for boost_insn_id, boost_inames in sched_state.uses_of_boostability:
+        for boost_stmt_id, boost_inames in sched_state.uses_of_boostability:
             warn_with_kernel(
                     kernel, "used_boostability",
-                    "instruction '%s' was implicitly nested inside "
+                    "statement '%s' was implicitly nested inside "
                     "inames '%s' based on an idempotence heuristic. "
                     "This is deprecated and will stop working in loopy 2017.x."
-                    % (boost_insn_id, ", ".join(boost_inames)),
+                    % (boost_stmt_id, ", ".join(boost_inames)),
                     DeprecationWarning)
 
         yield sched_state.schedule
@@ -1296,31 +1296,31 @@ def generate_loop_schedules_internal(
 # {{{ filter nops from schedule
 
 def filter_nops_from_schedule(kernel, schedule):
-    from loopy.kernel.instruction import NoOpInstruction
+    from loopy.kernel.statement import NoOpStatement
     return [
             sched_item
             for sched_item in schedule
-            if (not isinstance(sched_item, RunInstruction)
-                or not isinstance(kernel.id_to_insn[sched_item.insn_id],
-                    NoOpInstruction))]
+            if (not isinstance(sched_item, RunStatement)
+                or not isinstance(kernel.id_to_stmt[sched_item.stmt_id],
+                    NoOpStatement))]
 
 # }}}
 
 
-# {{{ convert barrier instructions to proper barriers
+# {{{ convert barrier statements to proper barriers
 
-def convert_barrier_instructions_to_barriers(kernel, schedule):
-    from loopy.kernel.instruction import BarrierInstruction
+def convert_barrier_statements_to_barriers(kernel, schedule):
+    from loopy.kernel.statement import BarrierStatement
 
     result = []
     for sched_item in schedule:
-        if isinstance(sched_item, RunInstruction):
-            insn = kernel.id_to_insn[sched_item.insn_id]
-            if isinstance(insn, BarrierInstruction):
+        if isinstance(sched_item, RunStatement):
+            stmt = kernel.id_to_stmt[sched_item.stmt_id]
+            if isinstance(stmt, BarrierStatement):
                 result.append(Barrier(
-                    kind=insn.kind,
-                    originating_insn_id=insn.id,
-                    comment="Barrier inserted due to %s" % insn.id))
+                    kind=stmt.kind,
+                    originating_stmt_id=stmt.id,
+                    comment="Barrier inserted due to %s" % stmt.id))
                 continue
 
         result.append(sched_item)
@@ -1336,16 +1336,16 @@ class DependencyRecord(ImmutableRecord):
     """
     .. attribute:: source
 
-        A :class:`loopy.InstructionBase` instance.
+        A :class:`loopy.StatementBase` instance.
 
     .. attribute:: target
 
-        A :class:`loopy.InstructionBase` instance.
+        A :class:`loopy.StatementBase` instance.
 
     .. attribute:: dep_descr
 
         A string containing a phrase describing the dependency. The variables
-        '{src}' and '{tgt}' will be replaced by their respective instruction IDs.
+        '{src}' and '{tgt}' will be replaced by their respective statement IDs.
 
     .. attribute:: variable
 
@@ -1435,10 +1435,10 @@ class DependencyTracker(object):
 
     def add_source(self, source):
         """
-        Specify that an instruction may be used as the source of a dependency edge.
+        Specify that an statement may be used as the source of a dependency edge.
         """
-        # If source is an insn ID, look up the actual instruction.
-        source = self.kernel.id_to_insn.get(source, source)
+        # If source is an stmt ID, look up the actual statement.
+        source = self.kernel.id_to_stmt.get(source, source)
 
         for written in self.map_to_base_storage(
                 set(source.assignee_var_names()) & self.relevant_vars):
@@ -1451,13 +1451,13 @@ class DependencyTracker(object):
     def gen_dependencies_with_target_at(self, target):
         """
         Generate :class:`DependencyRecord` instances for dependencies edges
-        whose target is the given instruction.
+        whose target is the given statement.
 
-        :arg target: The ID of the instruction for which dependencies
+        :arg target: The ID of the statement for which dependencies
             with conflicting var access should be found.
         """
-        # If target is an insn ID, look up the actual instruction.
-        target = self.kernel.id_to_insn.get(target, target)
+        # If target is an stmt ID, look up the actual statement.
+        target = self.kernel.id_to_stmt.get(target, target)
 
         tgt_write = self.map_to_base_storage(
             set(target.assignee_var_names()) & self.relevant_vars)
@@ -1493,8 +1493,8 @@ class DependencyTracker(object):
                     continue
 
                 yield DependencyRecord(
-                        source=self.kernel.id_to_insn[source],
-                        target=self.kernel.id_to_insn[target],
+                        source=self.kernel.id_to_stmt[source],
+                        target=self.kernel.id_to_stmt[target],
                         dep_descr=dep_descr,
                         variable=var,
                         var_kind=self.var_kind)
@@ -1502,13 +1502,13 @@ class DependencyTracker(object):
     def describe_dependency(self, source, target):
         dep_descr = None
 
-        source = self.kernel.id_to_insn[source]
-        target = self.kernel.id_to_insn[target]
+        source = self.kernel.id_to_stmt[source]
+        target = self.kernel.id_to_stmt[target]
 
         if self.reverse:
             source, target = target, source
 
-        target_deps = self.kernel.recursive_insn_dep_map()[target.id]
+        target_deps = self.kernel.recursive_stmt_dep_map()[target.id]
         if source.id in target_deps:
             if self.reverse:
                 dep_descr = "{tgt} rev-depends on {src}"
@@ -1529,15 +1529,15 @@ def barrier_kind_more_or_equally_global(kind1, kind2):
     return (kind1 == kind2) or (kind1 == "global" and kind2 == "local")
 
 
-def insn_ids_reaching_end_without_intervening_barrier(schedule, kind):
-    return _insn_ids_reaching_end(schedule, kind, reverse=False)
+def stmt_ids_reaching_end_without_intervening_barrier(schedule, kind):
+    return _stmt_ids_reaching_end(schedule, kind, reverse=False)
 
 
-def insn_ids_reachable_from_start_without_intervening_barrier(schedule, kind):
-    return _insn_ids_reaching_end(schedule, kind, reverse=True)
+def stmt_ids_reachable_from_start_without_intervening_barrier(schedule, kind):
+    return _stmt_ids_reaching_end(schedule, kind, reverse=True)
 
 
-def _insn_ids_reaching_end(schedule, kind, reverse):
+def _stmt_ids_reaching_end(schedule, kind, reverse):
     if reverse:
         schedule = reversed(schedule)
         enter_scope_item_kind = LeaveLoop
@@ -1546,45 +1546,45 @@ def _insn_ids_reaching_end(schedule, kind, reverse):
         enter_scope_item_kind = EnterLoop
         leave_scope_item_kind = LeaveLoop
 
-    insn_ids_alive_at_scope = [set()]
+    stmt_ids_alive_at_scope = [set()]
 
     for sched_item in schedule:
         if isinstance(sched_item, enter_scope_item_kind):
-            insn_ids_alive_at_scope.append(set())
+            stmt_ids_alive_at_scope.append(set())
         elif isinstance(sched_item, leave_scope_item_kind):
-            innermost_scope = insn_ids_alive_at_scope.pop()
-            # Instructions in deeper scopes are alive but could be killed by
+            innermost_scope = stmt_ids_alive_at_scope.pop()
+            # Statements in deeper scopes are alive but could be killed by
             # barriers at a shallower level, e.g.:
             #
             # for i
-            #     insn0
+            #     stmt0
             # end
-            # barrier()   <= kills insn0
+            # barrier()   <= kills stmt0
             #
             # Hence we merge this scope into the parent scope.
-            insn_ids_alive_at_scope[-1].update(innermost_scope)
+            stmt_ids_alive_at_scope[-1].update(innermost_scope)
         elif isinstance(sched_item, Barrier):
-            # This barrier kills only the instruction ids that are alive at
+            # This barrier kills only the statement ids that are alive at
             # the current scope (or deeper). Without further analysis, we
-            # can't assume that instructions at shallower scope can be
+            # can't assume that statements at shallower scope can be
             # killed by deeper barriers, since loops might be empty, e.g.:
             #
-            # insn0          <= isn't killed by barrier (i loop could be empty)
+            # stmt0          <= isn't killed by barrier (i loop could be empty)
             # for i
-            #     insn1      <= is killed by barrier
+            #     stmt1      <= is killed by barrier
             #     for j
-            #         insn2  <= is killed by barrier
+            #         stmt2  <= is killed by barrier
             #     end
             #     barrier()
             # end
             if barrier_kind_more_or_equally_global(sched_item.kind, kind):
-                insn_ids_alive_at_scope[-1].clear()
+                stmt_ids_alive_at_scope[-1].clear()
         else:
-            insn_ids_alive_at_scope[-1] |= set(
-                    insn_id for insn_id in sched_item_to_insn_id(sched_item))
+            stmt_ids_alive_at_scope[-1] |= set(
+                    stmt_id for stmt_id in sched_item_to_stmt_id(sched_item))
 
-    assert len(insn_ids_alive_at_scope) == 1
-    return insn_ids_alive_at_scope[-1]
+    assert len(stmt_ids_alive_at_scope) == 1
+    return stmt_ids_alive_at_scope[-1]
 
 
 def append_barrier_or_raise_error(schedule, dep, verify_only):
@@ -1594,7 +1594,7 @@ def append_barrier_or_raise_error(schedule, dep, verify_only):
                 "Dependency '%s' (for variable '%s') "
                 "requires synchronization "
                 "by a %s barrier (add a 'no_sync_with' "
-                "instruction option to state that no "
+                "statement option to state that no "
                 "synchronization is needed)"
                 % (
                     dep.dep_descr.format(
@@ -1608,7 +1608,7 @@ def append_barrier_or_raise_error(schedule, dep, verify_only):
         schedule.append(Barrier(
             comment=comment,
             kind=dep.var_kind,
-            originating_insn_id=None))
+            originating_stmt_id=None))
 
 
 def insert_barriers(kernel, schedule, kind, verify_only, level=0):
@@ -1629,10 +1629,10 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
         if reverse:
             # Populate the dependency tracker with sources from the tail end of
             # the schedule block.
-            for insn_id in (
-                    insn_ids_reaching_end_without_intervening_barrier(
+            for stmt_id in (
+                    stmt_ids_reaching_end_without_intervening_barrier(
                         schedule, kind)):
-                dep_tracker.add_source(insn_id)
+                dep_tracker.add_source(stmt_id)
 
         result = []
 
@@ -1644,11 +1644,11 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
                 subloop, new_i = gather_schedule_block(schedule, i)
 
                 loop_head = (
-                    insn_ids_reachable_from_start_without_intervening_barrier(
+                    stmt_ids_reachable_from_start_without_intervening_barrier(
                         subloop, kind))
 
                 loop_tail = (
-                    insn_ids_reaching_end_without_intervening_barrier(
+                    stmt_ids_reaching_end_without_intervening_barrier(
                         subloop, kind))
 
                 # Checks if a barrier is needed before the loop. This handles
@@ -1662,8 +1662,8 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
                 #     ...
                 from itertools import chain
                 for dep in chain.from_iterable(
-                        dep_tracker.gen_dependencies_with_target_at(insn)
-                        for insn in loop_head):
+                        dep_tracker.gen_dependencies_with_target_at(stmt)
+                        for stmt in loop_head):
                     append_barrier_or_raise_error(result, dep, verify_only)
                     # This barrier gets inserted outside the loop, hence it is
                     # executed unconditionally and so kills all sources before
@@ -1692,14 +1692,14 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
                     dep_tracker.discard_all_sources()
                 i += 1
 
-            elif isinstance(sched_item, RunInstruction):
+            elif isinstance(sched_item, RunStatement):
                 for dep in dep_tracker.gen_dependencies_with_target_at(
-                        sched_item.insn_id):
+                        sched_item.stmt_id):
                     append_barrier_or_raise_error(result, dep, verify_only)
                     dep_tracker.discard_all_sources()
                     break
                 result.append(sched_item)
-                dep_tracker.add_source(sched_item.insn_id)
+                dep_tracker.add_source(sched_item.stmt_id)
                 i += 1
 
             elif isinstance(sched_item, (CallKernel, ReturnFromKernel)):
@@ -1731,7 +1731,7 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
             i = new_i
 
         elif isinstance(sched_item,
-                (Barrier, RunInstruction, CallKernel, ReturnFromKernel)):
+                (Barrier, RunStatement, CallKernel, ReturnFromKernel)):
             result.append(sched_item)
             i += 1
 
@@ -1756,7 +1756,7 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0):
 
 def generate_loop_schedules(kernel, debug_args={}):
     from pytools import MinRecursionLimit
-    with MinRecursionLimit(max(len(kernel.instructions) * 2,
+    with MinRecursionLimit(max(len(kernel.statements) * 2,
                                len(kernel.all_inames()) * 4)):
         for sched in generate_loop_schedules_inner(kernel, debug_args=debug_args):
             yield sched
@@ -1778,14 +1778,14 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
     preschedule = kernel.schedule if kernel.state == kernel_state.SCHEDULED else ()
 
     prescheduled_inames = set(
-            insn.iname
-            for insn in preschedule
-            if isinstance(insn, EnterLoop))
+            stmt.iname
+            for stmt in preschedule
+            if isinstance(stmt, EnterLoop))
 
-    prescheduled_insn_ids = set(
-        insn_id
+    prescheduled_stmt_ids = set(
+        stmt_id
         for item in preschedule
-        for insn_id in sched_item_to_insn_id(item))
+        for stmt_id in sched_item_to_stmt_id(item))
 
     from loopy.kernel.data import IlpBaseTag, ConcurrentTag, VectorizeTag
     ilp_inames = set(
@@ -1805,7 +1805,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
     sched_state = SchedulerState(
             kernel=kernel,
             loop_nest_around_map=loop_nest_around_map,
-            loop_insn_dep_map=find_loop_insn_dep_map(
+            loop_stmt_dep_map=find_loop_stmt_dep_map(
                 kernel,
                 loop_nest_with_map=loop_nest_with_map,
                 loop_nest_around_map=loop_nest_around_map),
@@ -1814,7 +1814,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
             vec_inames=vec_inames,
 
             prescheduled_inames=prescheduled_inames,
-            prescheduled_insn_ids=prescheduled_insn_ids,
+            prescheduled_stmt_ids=prescheduled_stmt_ids,
 
             # time-varying part
             active_inames=(),
@@ -1823,8 +1823,8 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
 
             schedule=(),
 
-            unscheduled_insn_ids=set(insn.id for insn in kernel.instructions),
-            scheduled_insn_ids=frozenset(),
+            unscheduled_stmt_ids=set(stmt.id for stmt in kernel.statements),
+            scheduled_stmt_ids=frozenset(),
             within_subkernel=kernel.state != kernel_state.SCHEDULED,
             may_schedule_global_barriers=True,
 
@@ -1833,7 +1833,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
             # ilp and vec are not parallel for the purposes of the scheduler
             parallel_inames=parallel_inames - ilp_inames - vec_inames,
 
-            group_insn_counts=group_insn_counts(kernel),
+            group_stmt_counts=group_stmt_counts(kernel),
             active_group_counts={},
 
             uses_of_boostability=[])
@@ -1881,7 +1881,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
                 debug.stop()
 
                 gen_sched = filter_nops_from_schedule(kernel, gen_sched)
-                gen_sched = convert_barrier_instructions_to_barriers(
+                gen_sched = convert_barrier_statements_to_barriers(
                         kernel, gen_sched)
 
                 gsize, lsize = kernel.get_grid_size_upper_bounds()
diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py
index 1a0789c2f..9c73fdffe 100644
--- a/loopy/schedule/device_mapping.py
+++ b/loopy/schedule/device_mapping.py
@@ -24,7 +24,7 @@ THE SOFTWARE.
 
 from loopy.diagnostic import LoopyError
 from loopy.schedule import (Barrier, CallKernel, EnterLoop, LeaveLoop,
-                            ReturnFromKernel, RunInstruction)
+                            ReturnFromKernel, RunStatement)
 from loopy.schedule.tools import get_block_boundaries
 
 
@@ -72,7 +72,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen):
         while i <= end_idx:
             sched_item = schedule[i]
 
-            if isinstance(sched_item, RunInstruction):
+            if isinstance(sched_item, RunStatement):
                 current_chunk.append(sched_item)
                 i += 1
 
@@ -145,7 +145,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen):
             new_schedule +
             [dummy_return.copy()])
 
-    # Assign names, extra_inames to CallKernel / ReturnFromKernel instructions
+    # Assign names, extra_inames to CallKernel / ReturnFromKernel statements
     inames = []
 
     for idx, sched_item in enumerate(new_schedule):
diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py
index f9b08d343..1801ff8e0 100644
--- a/loopy/schedule/tools.py
+++ b/loopy/schedule/tools.py
@@ -51,20 +51,20 @@ def get_block_boundaries(schedule):
 # {{{ subkernel tools
 
 def temporaries_read_in_subkernel(kernel, subkernel):
-    from loopy.kernel.tools import get_subkernel_to_insn_id_map
-    insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel]
+    from loopy.kernel.tools import get_subkernel_to_stmt_id_map
+    stmt_ids = get_subkernel_to_stmt_id_map(kernel)[subkernel]
     return frozenset(tv
-            for insn_id in insn_ids
-            for tv in kernel.id_to_insn[insn_id].read_dependency_names()
+            for stmt_id in stmt_ids
+            for tv in kernel.id_to_stmt[stmt_id].read_dependency_names()
             if tv in kernel.temporary_variables)
 
 
 def temporaries_written_in_subkernel(kernel, subkernel):
-    from loopy.kernel.tools import get_subkernel_to_insn_id_map
-    insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel]
+    from loopy.kernel.tools import get_subkernel_to_stmt_id_map
+    stmt_ids = get_subkernel_to_stmt_id_map(kernel)[subkernel]
     return frozenset(tv
-            for insn_id in insn_ids
-            for tv in kernel.id_to_insn[insn_id].write_dependency_names()
+            for stmt_id in stmt_ids
+            for tv in kernel.id_to_stmt[stmt_id].write_dependency_names()
             if tv in kernel.temporary_variables)
 
 # }}}
@@ -75,7 +75,7 @@ def temporaries_written_in_subkernel(kernel, subkernel):
 def add_extra_args_to_schedule(kernel):
     """
     Fill the `extra_args` fields in all the :class:`loopy.schedule.CallKernel`
-    instructions in the schedule with global temporaries.
+    statements in the schedule with global temporaries.
     """
     new_schedule = []
     from loopy.schedule import CallKernel
diff --git a/loopy/statistics.py b/loopy/statistics.py
index 88d7ec328..69bf555bc 100755
--- a/loopy/statistics.py
+++ b/loopy/statistics.py
@@ -1097,7 +1097,7 @@ def count(kernel, set, space=None):
     return add_assumptions_guard(kernel, count)
 
 
-def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None):
+def get_unused_hw_axes_factor(knl, stmt, disregard_local_axes, space=None):
     # FIXME: Multi-kernel support
     gsize, lsize = knl.get_grid_size_upper_bounds()
 
@@ -1105,7 +1105,7 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None):
     l_used = set()
 
     from loopy.kernel.data import LocalIndexTag, GroupIndexTag
-    for iname in knl.insn_inames(insn):
+    for iname in knl.stmt_inames(stmt):
         tag = knl.iname_to_tag.get(iname)
 
         if isinstance(tag, LocalIndexTag):
@@ -1135,17 +1135,17 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None):
     return add_assumptions_guard(knl, result)
 
 
-def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False):
-    insn_inames = knl.insn_inames(insn)
+def count_stmt_runs(knl, stmt, count_redundant_work, disregard_local_axes=False):
+    stmt_inames = knl.stmt_inames(stmt)
 
     if disregard_local_axes:
         from loopy.kernel.data import LocalIndexTag
-        insn_inames = [iname for iname in insn_inames if not
+        stmt_inames = [iname for iname in stmt_inames if not
                        isinstance(knl.iname_to_tag.get(iname), LocalIndexTag)]
 
-    inames_domain = knl.get_inames_domain(insn_inames)
+    inames_domain = knl.get_inames_domain(stmt_inames)
     domain = (inames_domain.project_out_except(
-                            insn_inames, [dim_type.set]))
+                            stmt_inames, [dim_type.set]))
 
     space = isl.Space.create_from_names(isl.DEFAULT_CONTEXT,
             set=[], params=knl.outer_params())
@@ -1153,7 +1153,7 @@ def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False)
     c = count(knl, domain, space=space)
 
     if count_redundant_work:
-        unused_fac = get_unused_hw_axes_factor(knl, insn,
+        unused_fac = get_unused_hw_axes_factor(knl, stmt,
                         disregard_local_axes=disregard_local_axes,
                         space=space)
         return c * unused_fac
@@ -1210,10 +1210,10 @@ def get_op_map(knl, numpy_types=True, count_redundant_work=False):
 
     op_map = ToCountMap()
     op_counter = ExpressionOpCounter(knl)
-    for insn in knl.instructions:
-        ops = op_counter(insn.assignee) + op_counter(insn.expression)
-        op_map = op_map + ops*count_insn_runs(
-                knl, insn,
+    for stmt in knl.statements:
+        ops = op_counter(stmt.assignee) + op_counter(stmt.expression)
+        op_map = op_map + ops*count_stmt_runs(
+                knl, stmt,
                 count_redundant_work=count_redundant_work)
 
     if numpy_types:
@@ -1296,11 +1296,11 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False):
 
     cache_holder = CacheHolder()
 
-    @memoize_in(cache_holder, "insn_count")
-    def get_insn_count(knl, insn_id, uniform=False):
-        insn = knl.id_to_insn[insn_id]
-        return count_insn_runs(
-                knl, insn, disregard_local_axes=uniform,
+    @memoize_in(cache_holder, "stmt_count")
+    def get_stmt_count(knl, stmt_id, uniform=False):
+        stmt = knl.id_to_stmt[stmt_id]
+        return count_stmt_runs(
+                knl, stmt, disregard_local_axes=uniform,
                 count_redundant_work=count_redundant_work)
 
     knl = infer_unknown_types(knl, expect_completion=True)
@@ -1310,13 +1310,13 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False):
     access_counter_g = GlobalMemAccessCounter(knl)
     access_counter_l = LocalMemAccessCounter(knl)
 
-    for insn in knl.instructions:
+    for stmt in knl.statements:
         access_expr = (
-                access_counter_g(insn.expression)
-                + access_counter_l(insn.expression)
+                access_counter_g(stmt.expression)
+                + access_counter_l(stmt.expression)
                 ).with_set_attributes(direction="load")
 
-        access_assignee_g = access_counter_g(insn.assignee).with_set_attributes(
+        access_assignee_g = access_counter_g(stmt.assignee).with_set_attributes(
                 direction="store")
 
         # FIXME: (!!!!) for now, don't count writes to local mem
@@ -1329,7 +1329,7 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False):
             access_map = (
                     access_map
                     + ToCountMap({key: val})
-                    * get_insn_count(knl, insn.id, is_uniform))
+                    * get_stmt_count(knl, stmt.id, is_uniform))
             #currently not counting stride of local mem access
 
         for key, val in six.iteritems(access_assignee_g.count_map):
@@ -1339,7 +1339,7 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False):
             access_map = (
                     access_map
                     + ToCountMap({key: val})
-                    * get_insn_count(knl, insn.id, is_uniform))
+                    * get_stmt_count(knl, stmt.id, is_uniform))
             # for now, don't count writes to local mem
 
     if numpy_types:
@@ -1387,7 +1387,7 @@ def get_synchronization_map(knl):
 
     from loopy.preprocess import preprocess_kernel, infer_unknown_types
     from loopy.schedule import (EnterLoop, LeaveLoop, Barrier,
-            CallKernel, ReturnFromKernel, RunInstruction)
+            CallKernel, ReturnFromKernel, RunStatement)
     from operator import mul
     knl = infer_unknown_types(knl, expect_completion=True)
     knl = preprocess_kernel(knl)
@@ -1424,7 +1424,7 @@ def get_synchronization_map(knl):
             result = result + ToCountMap(
                     {"kernel_launch": get_count_poly(iname_list)})
 
-        elif isinstance(sched_item, (ReturnFromKernel, RunInstruction)):
+        elif isinstance(sched_item, (ReturnFromKernel, RunStatement)):
             pass
 
         else:
@@ -1459,24 +1459,24 @@ def gather_access_footprints(kernel, ignore_uncountable=False):
     write_footprints = []
     read_footprints = []
 
-    for insn in kernel.instructions:
-        if not isinstance(insn, MultiAssignmentBase):
+    for stmt in kernel.statements:
+        if not isinstance(stmt, MultiAssignmentBase):
             warn_with_kernel(kernel, "count_non_assignment",
-                    "Non-assignment instruction encountered in "
+                    "Non-assignment statement encountered in "
                     "gather_access_footprints, not counted")
             continue
 
-        insn_inames = kernel.insn_inames(insn)
-        inames_domain = kernel.get_inames_domain(insn_inames)
-        domain = (inames_domain.project_out_except(insn_inames,
+        stmt_inames = kernel.stmt_inames(stmt)
+        inames_domain = kernel.get_inames_domain(stmt_inames)
+        domain = (inames_domain.project_out_except(stmt_inames,
                                                    [dim_type.set]))
 
         afg = AccessFootprintGatherer(kernel, domain,
                 ignore_uncountable=ignore_uncountable)
 
-        for assignee in insn.assignees:
-            write_footprints.append(afg(insn.assignees))
-        read_footprints.append(afg(insn.expression))
+        for assignee in stmt.assignees:
+            write_footprints.append(afg(stmt.assignees))
+        read_footprints.append(afg(stmt.expression))
 
     write_footprints = AccessFootprintGatherer.combine(write_footprints)
     read_footprints = AccessFootprintGatherer.combine(read_footprints)
diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index 543c2743b..8ac520d70 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -595,7 +595,7 @@ def parse_tagged_name(expr):
 class ExpansionState(ImmutableRecord):
     """
     .. attribute:: kernel
-    .. attribute:: instruction
+    .. attribute:: statement
 
     .. attribute:: stack
 
@@ -608,8 +608,8 @@ class ExpansionState(ImmutableRecord):
     """
 
     @property
-    def insn_id(self):
-        return self.instruction.id
+    def stmt_id(self):
+        return self.statement.id
 
     def apply_arg_context(self, expr):
         from pymbolic.mapper.substitutor import make_subst_func
@@ -651,12 +651,12 @@ class SubstitutionRuleRenamer(IdentityMapper):
             return TaggedVariable(new_name, tag)
 
 
-def rename_subst_rules_in_instructions(insns, renames):
+def rename_subst_rules_in_statements(stmts, renames):
     subst_renamer = SubstitutionRuleRenamer(renames)
 
     return [
-            insn.with_transformed_expressions(subst_renamer)
-            for insn in insns]
+            stmt.with_transformed_expressions(subst_renamer)
+            for stmt in stmts]
 
 
 class SubstitutionRuleMappingContext(object):
@@ -766,11 +766,11 @@ class SubstitutionRuleMappingContext(object):
     def finish_kernel(self, kernel):
         new_substs, renames = self._get_new_substitutions_and_renames()
 
-        new_insns = rename_subst_rules_in_instructions(kernel.instructions, renames)
+        new_stmts = rename_subst_rules_in_statements(kernel.statements, renames)
 
         return kernel.copy(
             substitutions=new_substs,
-            instructions=new_insns)
+            statements=new_stmts)
 
 
 class RuleAwareIdentityMapper(IdentityMapper):
@@ -844,30 +844,30 @@ class RuleAwareIdentityMapper(IdentityMapper):
         else:
             return sym
 
-    def __call__(self, expr, kernel, insn):
-        from loopy.kernel.data import InstructionBase
-        assert insn is None or isinstance(insn, InstructionBase)
+    def __call__(self, expr, kernel, stmt):
+        from loopy.kernel.data import StatementBase
+        assert stmt is None or isinstance(stmt, StatementBase)
 
         return IdentityMapper.__call__(self, expr,
                 ExpansionState(
                     kernel=kernel,
-                    instruction=insn,
+                    statement=stmt,
                     stack=(),
                     arg_context={}))
 
-    def map_instruction(self, kernel, insn):
-        return insn
+    def map_statement(self, kernel, stmt):
+        return stmt
 
     def map_kernel(self, kernel):
-        new_insns = [
+        new_stmts = [
                 # While subst rules are not allowed in assignees, the mapper
                 # may perform tasks entirely unrelated to subst rules, so
                 # we must map assignees, too.
-                self.map_instruction(kernel,
-                    insn.with_transformed_expressions(self, kernel, insn))
-                for insn in kernel.instructions]
+                self.map_statement(kernel,
+                    stmt.with_transformed_expressions(self, kernel, stmt))
+                for stmt in kernel.statements]
 
-        return kernel.copy(instructions=new_insns)
+        return kernel.copy(statements=new_stmts)
 
 
 class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
@@ -880,7 +880,7 @@ class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper):
     def map_variable(self, expr, expn_state):
         if (expr.name in expn_state.arg_context
                 or not self.within(
-                    expn_state.kernel, expn_state.instruction, expn_state.stack)):
+                    expn_state.kernel, expn_state.statement, expn_state.stack)):
             return super(RuleAwareSubstitutionMapper, self).map_variable(
                     expr, expn_state)
 
@@ -907,7 +907,7 @@ class RuleAwareSubstitutionRuleExpander(RuleAwareIdentityMapper):
 
         new_stack = expn_state.stack + ((name, tags),)
 
-        if self.within(expn_state.kernel, expn_state.instruction, new_stack):
+        if self.within(expn_state.kernel, expn_state.statement, new_stack):
             # expand
             rule = self.rules[name]
 
diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py
index 5800a0236..f34de9150 100644
--- a/loopy/target/__init__.py
+++ b/loopy/target/__init__.py
@@ -201,10 +201,10 @@ class ASTBuilderBase(object):
     def get_image_arg_decl(self, name, shape, num_target_axes, dtype, is_written):
         raise NotImplementedError()
 
-    def emit_assignment(self, codegen_state, insn):
+    def emit_assignment(self, codegen_state, stmt):
         raise NotImplementedError()
 
-    def emit_multiple_assignment(self, codegen_state, insn):
+    def emit_multiple_assignment(self, codegen_state, stmt):
         raise NotImplementedError()
 
     def emit_sequential_loop(self, codegen_state, iname, iname_dtype,
diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index e54ac0f69..09d138241 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -607,18 +607,18 @@ class CASTBuilder(ASTBuilderBase):
 
         return arg_decl
 
-    def emit_assignment(self, codegen_state, insn):
+    def emit_assignment(self, codegen_state, stmt):
         kernel = codegen_state.kernel
         ecm = codegen_state.expression_to_code_mapper
 
-        assignee_var_name, = insn.assignee_var_names()
+        assignee_var_name, = stmt.assignee_var_names()
 
         lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
         lhs_dtype = lhs_var.dtype
 
-        if insn.atomicity is not None:
+        if stmt.atomicity is not None:
             lhs_atomicity = [
-                    a for a in insn.atomicity if a.var_name == assignee_var_name]
+                    a for a in stmt.atomicity if a.var_name == assignee_var_name]
             assert len(lhs_atomicity) <= 1
             if lhs_atomicity:
                 lhs_atomicity, = lhs_atomicity
@@ -630,13 +630,13 @@ class CASTBuilder(ASTBuilderBase):
         from loopy.kernel.data import AtomicInit, AtomicUpdate
         from loopy.expression import dtype_to_type_context
 
-        lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None)
+        lhs_code = ecm(stmt.assignee, prec=PREC_NONE, type_context=None)
         rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
         if lhs_atomicity is None:
             from cgen import Assign
             return Assign(
                     lhs_code,
-                    ecm(insn.expression, prec=PREC_NONE,
+                    ecm(stmt.expression, prec=PREC_NONE,
                         type_context=rhs_type_context,
                         needed_dtype=lhs_dtype))
 
@@ -647,7 +647,7 @@ class CASTBuilder(ASTBuilderBase):
             codegen_state.seen_atomic_dtypes.add(lhs_dtype)
             return codegen_state.ast_builder.emit_atomic_update(
                     codegen_state, lhs_atomicity, lhs_var,
-                    insn.assignee, insn.expression,
+                    stmt.assignee, stmt.expression,
                     lhs_dtype, rhs_type_context)
 
         else:
@@ -658,16 +658,16 @@ class CASTBuilder(ASTBuilderBase):
             lhs_expr, rhs_expr, lhs_dtype):
         raise NotImplementedError("atomic updates in %s" % type(self).__name__)
 
-    def emit_tuple_assignment(self, codegen_state, insn):
+    def emit_tuple_assignment(self, codegen_state, stmt):
         ecm = codegen_state.expression_to_code_mapper
 
         from cgen import Assign, block_if_necessary
         assignments = []
 
         for i, (assignee, parameter) in enumerate(
-                zip(insn.assignees, insn.expression.parameters)):
+                zip(stmt.assignees, stmt.expression.parameters)):
             lhs_code = ecm(assignee, prec=PREC_NONE, type_context=None)
-            assignee_var_name = insn.assignee_var_names()[i]
+            assignee_var_name = stmt.assignee_var_names()[i]
             lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
             lhs_dtype = lhs_var.dtype
 
@@ -681,21 +681,21 @@ class CASTBuilder(ASTBuilderBase):
 
         return block_if_necessary(assignments)
 
-    def emit_multiple_assignment(self, codegen_state, insn):
+    def emit_multiple_assignment(self, codegen_state, stmt):
         ecm = codegen_state.expression_to_code_mapper
 
         from pymbolic.primitives import Variable
         from pymbolic.mapper.stringifier import PREC_NONE
 
-        func_id = insn.expression.function
-        parameters = insn.expression.parameters
+        func_id = stmt.expression.function
+        parameters = stmt.expression.parameters
 
         if isinstance(func_id, Variable):
             func_id = func_id.name
 
         assignee_var_descriptors = [
                 codegen_state.kernel.get_var_descriptor(a)
-                for a in insn.assignee_var_names()]
+                for a in stmt.assignee_var_names()]
 
         par_dtypes = tuple(ecm.infer_type(par) for par in parameters)
 
@@ -709,7 +709,7 @@ class CASTBuilder(ASTBuilderBase):
 
         if mangle_result.target_name == "loopy_make_tuple":
             # This shorcut avoids actually having to emit a 'make_tuple' function.
-            return self.emit_tuple_assignment(codegen_state, insn)
+            return self.emit_tuple_assignment(codegen_state, stmt)
 
         from loopy.expression import dtype_to_type_context
         c_parameters = [
@@ -727,10 +727,10 @@ class CASTBuilder(ASTBuilderBase):
 
         from pymbolic import var
         for i, (a, tgt_dtype) in enumerate(
-                zip(insn.assignees[1:], mangle_result.result_dtypes[1:])):
+                zip(stmt.assignees[1:], mangle_result.result_dtypes[1:])):
             if tgt_dtype != ecm.infer_type(a):
                 raise LoopyError("type mismatch in %d'th (1-based) left-hand "
-                        "side of instruction '%s'" % (i+1, insn.id))
+                        "side of statement '%s'" % (i+1, stmt.id))
             c_parameters.append(
                         # TODO Yuck: The "where-at function": &(...)
                         var("&")(
@@ -752,7 +752,7 @@ class CASTBuilder(ASTBuilderBase):
                 assignee_var_descriptors[0].dtype,
                 result)
 
-        lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None)
+        lhs_code = ecm(stmt.assignees[0], prec=PREC_NONE, type_context=None)
 
         from cgen import Assign
         return Assign(
diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py
index 2bdffb5aa..484973433 100644
--- a/loopy/target/cuda.py
+++ b/loopy/target/cuda.py
@@ -244,10 +244,10 @@ class CUDACASTBuilder(CASTBuilder):
             from cgen import Extern
             fdecl = Extern("C", fdecl)
 
-        from loopy.schedule import get_insn_ids_for_block_at
+        from loopy.schedule import get_stmt_ids_for_block_at
         _, local_grid_size = \
-                codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
-                        get_insn_ids_for_block_at(
+                codegen_state.kernel.get_grid_sizes_for_stmt_ids_as_exprs(
+                        get_stmt_ids_for_block_at(
                             codegen_state.kernel.schedule, schedule_index))
 
         from loopy.symbolic import get_dependencies
@@ -294,7 +294,7 @@ class CUDACASTBuilder(CASTBuilder):
     def emit_barrier(self, kind, comment):
         """
         :arg kind: ``"local"`` or ``"global"``
-        :return: a :class:`loopy.codegen.GeneratedInstruction`.
+        :return: a :class:`loopy.codegen.GeneratedStatement`.
         """
         if kind == "local":
             if comment:
diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py
index 35dade904..4d5a60595 100644
--- a/loopy/target/ispc.py
+++ b/loopy/target/ispc.py
@@ -362,31 +362,31 @@ class ISPCASTBuilder(CASTBuilder):
         from cgen.ispc import ISPCUniform
         return ISPCUniform(result)
 
-    def emit_assignment(self, codegen_state, insn):
+    def emit_assignment(self, codegen_state, stmt):
         kernel = codegen_state.kernel
         ecm = codegen_state.expression_to_code_mapper
 
-        assignee_var_name, = insn.assignee_var_names()
+        assignee_var_name, = stmt.assignee_var_names()
 
         lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name)
         lhs_dtype = lhs_var.dtype
 
-        if insn.atomicity:
+        if stmt.atomicity:
             raise NotImplementedError("atomic ops in ISPC")
 
         from loopy.expression import dtype_to_type_context
         from pymbolic.mapper.stringifier import PREC_NONE
 
         rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
-        rhs_code = ecm(insn.expression, prec=PREC_NONE,
+        rhs_code = ecm(stmt.expression, prec=PREC_NONE,
                     type_context=rhs_type_context,
                     needed_dtype=lhs_dtype)
 
-        lhs = insn.assignee
+        lhs = stmt.assignee
 
         # {{{ handle streaming stores
 
-        if "!streaming_store" in insn.tags:
+        if "!streaming_store" in stmt.tags:
             ary = ecm.find_array(lhs)
 
             from loopy.kernel.array import get_access_info
@@ -455,7 +455,7 @@ class ISPCASTBuilder(CASTBuilder):
                     isinstance(
                         kernel.iname_to_tag.get(dep), LocalIndexTag)
                     and kernel.iname_to_tag.get(dep).axis == 0
-                    for dep in get_dependencies(insn.expression))
+                    for dep in get_dependencies(stmt.expression))
 
             if not rhs_has_programindex:
                 rhs_code = "broadcast(%s, 0)" % rhs_code
diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py
index a5f7562c4..e47a7466a 100644
--- a/loopy/target/opencl.py
+++ b/loopy/target/opencl.py
@@ -413,9 +413,9 @@ class OpenCLCASTBuilder(CASTBuilder):
         from cgen.opencl import CLKernel, CLRequiredWorkGroupSize
         fdecl = CLKernel(fdecl)
 
-        from loopy.schedule import get_insn_ids_for_block_at
-        _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
-                get_insn_ids_for_block_at(
+        from loopy.schedule import get_stmt_ids_for_block_at
+        _, local_sizes = codegen_state.kernel.get_grid_sizes_for_stmt_ids_as_exprs(
+                get_stmt_ids_for_block_at(
                     codegen_state.kernel.schedule, schedule_index))
 
         from loopy.symbolic import get_dependencies
@@ -453,7 +453,7 @@ class OpenCLCASTBuilder(CASTBuilder):
     def emit_barrier(self, kind, comment):
         """
         :arg kind: ``"local"`` or ``"global"``
-        :return: a :class:`loopy.codegen.GeneratedInstruction`.
+        :return: a :class:`loopy.codegen.GeneratedStatement`.
         """
         if kind == "local":
             if comment:
diff --git a/loopy/target/python.py b/loopy/target/python.py
index ce04986d3..4c3c33fb5 100644
--- a/loopy/target/python.py
+++ b/loopy/target/python.py
@@ -291,18 +291,18 @@ class PythonASTBuilderBase(ASTBuilderBase):
         from genpy import If
         return If(condition_str, ast)
 
-    def emit_assignment(self, codegen_state, insn):
+    def emit_assignment(self, codegen_state, stmt):
         ecm = codegen_state.expression_to_code_mapper
 
-        if insn.atomicity:
+        if stmt.atomicity:
             raise NotImplementedError("atomic ops in Python")
 
         from pymbolic.mapper.stringifier import PREC_NONE
         from genpy import Assign
 
         return Assign(
-                ecm(insn.assignee, prec=PREC_NONE, type_context=None),
-                ecm(insn.expression, prec=PREC_NONE, type_context=None))
+                ecm(stmt.assignee, prec=PREC_NONE, type_context=None),
+                ecm(stmt.expression, prec=PREC_NONE, type_context=None))
 
     # }}}
 
diff --git a/loopy/tools.py b/loopy/tools.py
index d6952d547..022bebabc 100644
--- a/loopy/tools.py
+++ b/loopy/tools.py
@@ -135,8 +135,8 @@ class LoopyEqKeyBuilder(object):
     Usage::
 
         kb = LoopyEqKeyBuilder()
-        kb.update_for_class(insn.__class__)
-        kb.update_for_field("field", insn.field)
+        kb.update_for_class(stmt.__class__)
+        kb.update_for_field("field", stmt.field)
         ...
         key = kb.key()
 
diff --git a/loopy/transform/arithmetic.py b/loopy/transform/arithmetic.py
index b7f47c38a..6ec67bf5a 100644
--- a/loopy/transform/arithmetic.py
+++ b/loopy/transform/arithmetic.py
@@ -34,9 +34,9 @@ def fold_constants(kernel):
     from loopy.symbolic import ConstantFoldingMapper
     cfm = ConstantFoldingMapper()
 
-    new_insns = [
-            insn.with_transformed_expressions(cfm)
-            for insn in kernel.instructions]
+    new_stmts = [
+            stmt.with_transformed_expressions(cfm)
+            for stmt in kernel.statements]
 
     new_substs = dict(
             (sub.name,
@@ -44,7 +44,7 @@ def fold_constants(kernel):
             for sub in six.itervalues(kernel.substitutions))
 
     return kernel.copy(
-            instructions=new_insns,
+            statements=new_stmts,
             substitutions=new_substs)
 
 # }}}
@@ -135,8 +135,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
         else:
             raise ValueError("unexpected type of access_expr")
 
-    def is_assignee(insn):
-        return var_name in insn.assignee_var_names()
+    def is_assignee(stmt):
+        return var_name in stmt.assignee_var_names()
 
     def iterate_as(cls, expr):
         if isinstance(expr, cls):
@@ -151,16 +151,16 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
 
     from loopy.kernel.data import Assignment
 
-    for insn in kernel.instructions:
-        if not is_assignee(insn):
+    for stmt in kernel.statements:
+        if not is_assignee(stmt):
             continue
 
-        if not isinstance(insn, Assignment):
+        if not isinstance(stmt, Assignment):
             raise LoopyError("'%s' modified by non-single-assignment"
                     % var_name)
 
-        lhs = insn.assignee
-        rhs = insn.expression
+        lhs = stmt.assignee
+        rhs = stmt.expression
 
         if is_zero(rhs):
             continue
@@ -182,8 +182,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
                 for part in iterate_as(Product, term):
                     if var_name in get_dependencies(part):
                         raise LoopyError("unexpected dependency on '%s' "
-                                "in RHS of instruction '%s'"
-                                % (var_name, insn.id))
+                                "in RHS of statement '%s'"
+                                % (var_name, stmt.id))
 
                 product_parts = set(iterate_as(Product, term))
 
@@ -211,8 +211,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
                 for part in iterate_as(Product, term):
                     if var_name in get_dependencies(part):
                         raise LoopyError("unexpected dependency on '%s' "
-                                "in RHS of instruction '%s'"
-                                % (var_name, insn.id))
+                                "in RHS of statement '%s'"
+                                % (var_name, stmt.id))
 
                 product_parts = set(iterate_as(Product, term))
 
@@ -235,27 +235,27 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
 
     # {{{ remove common factors
 
-    new_insns = []
+    new_stmts = []
 
-    for insn in kernel.instructions:
-        if not isinstance(insn, Assignment) or not is_assignee(insn):
-            new_insns.append(insn)
+    for stmt in kernel.statements:
+        if not isinstance(stmt, Assignment) or not is_assignee(stmt):
+            new_stmts.append(stmt)
             continue
 
-        index_key = extract_index_key(insn.assignee)
+        index_key = extract_index_key(stmt.assignee)
 
-        lhs = insn.assignee
-        rhs = insn.expression
+        lhs = stmt.assignee
+        rhs = stmt.expression
 
         if is_zero(rhs):
-            new_insns.append(insn)
+            new_stmts.append(stmt)
             continue
 
         index_key = extract_index_key(lhs)
         cf_index, unif_result = find_unifiable_cf_index(index_key)
 
         if cf_index is None:
-            new_insns.append(insn)
+            new_stmts.append(stmt)
             continue
 
         _, my_common_factors = common_factors[cf_index]
@@ -281,8 +281,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
                         if part not in mapped_my_common_factors
                         ]))
 
-        new_insns.append(
-                insn.copy(expression=flattened_sum(new_sum_terms)))
+        new_stmts.append(
+                stmt.copy(expression=flattened_sum(new_sum_terms)))
 
     # }}}
 
@@ -314,21 +314,21 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
         else:
             return expr
 
-    insns = new_insns
-    new_insns = []
+    stmts = new_stmts
+    new_stmts = []
 
     subm = SubstitutionMapper(find_substitution)
 
-    for insn in insns:
-        if not isinstance(insn, Assignment) or is_assignee(insn):
-            new_insns.append(insn)
+    for stmt in stmts:
+        if not isinstance(stmt, Assignment) or is_assignee(stmt):
+            new_stmts.append(stmt)
             continue
 
-        new_insns.append(insn.with_transformed_expressions(subm))
+        new_stmts.append(stmt.with_transformed_expressions(subm))
 
     # }}}
 
-    return kernel.copy(instructions=new_insns)
+    return kernel.copy(statements=new_stmts)
 
 # }}}
 
diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py
index e7a86300f..e74259cc3 100644
--- a/loopy/transform/batch.py
+++ b/loopy/transform/batch.py
@@ -168,9 +168,9 @@ def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch",
 
     batch_iname_set = frozenset([batch_iname])
     kernel = kernel.copy(
-            instructions=[
-                insn.copy(within_inames=insn.within_inames | batch_iname_set)
-                for insn in kernel.instructions])
+            statements=[
+                stmt.copy(within_inames=stmt.within_inames | batch_iname_set)
+                for stmt in kernel.statements])
 
     return kernel
 
diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py
index 1b059b6a7..c0ef05d24 100644
--- a/loopy/transform/buffer.py
+++ b/loopy/transform/buffer.py
@@ -52,7 +52,7 @@ class ArrayAccessReplacer(RuleAwareIdentityMapper):
         self.array_base_map = array_base_map
 
         self.var_name = var_name
-        self.modified_insn_ids = set()
+        self.modified_stmt_ids = set()
 
         self.buf_var = buf_var
 
@@ -60,28 +60,28 @@ class ArrayAccessReplacer(RuleAwareIdentityMapper):
         result = None
         if expr.name == self.var_name and self.within(
                 expn_state.kernel,
-                expn_state.instruction,
+                expn_state.statement,
                 expn_state.stack):
             result = self.map_array_access((), expn_state)
 
         if result is None:
             return super(ArrayAccessReplacer, self).map_variable(expr, expn_state)
         else:
-            self.modified_insn_ids.add(expn_state.insn_id)
+            self.modified_stmt_ids.add(expn_state.stmt_id)
             return result
 
     def map_subscript(self, expr, expn_state):
         result = None
         if expr.aggregate.name == self.var_name and self.within(
                 expn_state.kernel,
-                expn_state.instruction,
+                expn_state.statement,
                 expn_state.stack):
             result = self.map_array_access(expr.index_tuple, expn_state)
 
         if result is None:
             return super(ArrayAccessReplacer, self).map_subscript(expr, expn_state)
         else:
-            self.modified_insn_ids.add(expn_state.insn_id)
+            self.modified_stmt_ids.add(expn_state.stmt_id)
             return result
 
     def map_array_access(self, index, expn_state):
@@ -153,7 +153,7 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
         being buffered).
     :arg store_expression: Either *None*, *False*, or an expression involving
         variables 'base' and 'buffer' (without array indices).
-        (*None* indicates that a default storage instruction should be used,
+        (*None* indicates that a default storage statement should be used,
         *False* indicates that no storing of the temporary should occur
         at all.)
     :arg within: If not None, limit the action of the transformation to
@@ -259,14 +259,14 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
     within_inames = set()
 
     access_descriptors = []
-    for insn in kernel.instructions:
-        if not within(kernel, insn.id, ()):
+    for stmt in kernel.statements:
+        if not within(kernel, stmt.id, ()):
             continue
 
         from pymbolic.primitives import Variable, Subscript
         from loopy.symbolic import LinearSubscript
 
-        for assignee in insn.assignees:
+        for assignee in stmt.assignees:
             if isinstance(assignee, Variable):
                 assignee_name = assignee.name
                 index = ()
@@ -289,7 +289,7 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
                         - buffer_inames_set)
                 access_descriptors.append(
                         AccessDescriptor(
-                            identifier=insn.id,
+                            identifier=stmt.id,
                             storage_axis_exprs=index))
 
     # {{{ find fetch/store inames
@@ -384,11 +384,11 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
 
     # }}}
 
-    new_insns = []
+    new_stmts = []
 
     buf_var = var(buf_var_name)
 
-    # {{{ generate init instruction
+    # {{{ generate init statement
 
     buf_var_init = buf_var
     if non1_init_inames:
@@ -419,9 +419,9 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
                     "base": init_base,
                     }))(init_expression)
 
-    init_insn_id = kernel.make_unique_instruction_id(based_on="init_"+var_name)
+    init_stmt_id = kernel.make_unique_statement_id(based_on="init_"+var_name)
     from loopy.kernel.data import Assignment
-    init_instruction = Assignment(id=init_insn_id,
+    init_statement = Assignment(id=init_stmt_id,
                 assignee=buf_var_init,
                 expression=init_expression,
                 within_inames=(
@@ -439,14 +439,14 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
     kernel = rule_mapping_context.finish_kernel(aar.map_kernel(kernel))
 
     did_write = False
-    for insn_id in aar.modified_insn_ids:
-        insn = kernel.id_to_insn[insn_id]
-        if buf_var_name in insn.assignee_var_names():
+    for stmt_id in aar.modified_stmt_ids:
+        stmt = kernel.id_to_stmt[stmt_id]
+        if buf_var_name in stmt.assignee_var_names():
             did_write = True
 
-    # {{{ add init_insn_id to depends_on
+    # {{{ add init_stmt_id to depends_on
 
-    new_insns = []
+    new_stmts = []
 
     def none_to_empty_set(s):
         if s is None:
@@ -454,19 +454,19 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
         else:
             return s
 
-    for insn in kernel.instructions:
-        if insn.id in aar.modified_insn_ids:
-            new_insns.append(
-                    insn.copy(
+    for stmt in kernel.statements:
+        if stmt.id in aar.modified_stmt_ids:
+            new_stmts.append(
+                    stmt.copy(
                         depends_on=(
-                            none_to_empty_set(insn.depends_on)
-                            | frozenset([init_insn_id]))))
+                            none_to_empty_set(stmt.depends_on)
+                            | frozenset([init_stmt_id]))))
         else:
-            new_insns.append(insn)
+            new_stmts.append(stmt)
 
     # }}}
 
-    # {{{ generate store instruction
+    # {{{ generate store statement
 
     buf_var_store = buf_var
     if non1_store_inames:
@@ -498,10 +498,10 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
 
     if store_expression is not False:
         from loopy.kernel.data import Assignment
-        store_instruction = Assignment(
-                    id=kernel.make_unique_instruction_id(based_on="store_"+var_name),
-                    depends_on=frozenset(aar.modified_insn_ids),
-                    no_sync_with=frozenset([(init_insn_id, "any")]),
+        store_statement = Assignment(
+                    id=kernel.make_unique_statement_id(based_on="store_"+var_name),
+                    depends_on=frozenset(aar.modified_stmt_ids),
+                    no_sync_with=frozenset([(init_stmt_id, "any")]),
                     assignee=store_target,
                     expression=store_expression,
                     within_inames=(
@@ -512,16 +512,16 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
 
     # }}}
 
-    new_insns.append(init_instruction)
+    new_stmts.append(init_statement)
     if did_write:
-        new_insns.append(store_instruction)
+        new_stmts.append(store_statement)
     else:
         for iname in store_inames:
             del new_iname_to_tag[iname]
 
     kernel = kernel.copy(
             domains=new_kernel_domains,
-            instructions=new_insns,
+            statements=new_stmts,
             temporary_variables=new_temporary_variables)
 
     from loopy import tag_inames
diff --git a/loopy/transform/data.py b/loopy/transform/data.py
index 575311b11..e2c1a5080 100644
--- a/loopy/transform/data.py
+++ b/loopy/transform/data.py
@@ -227,7 +227,7 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
 
 
     :arg fetch_outer_inames: The inames within which the fetch
-        instruction is nested. If *None*, make an educated guess.
+        statement is nested. If *None*, make an educated guess.
 
     This function internally uses :func:`extract_subst` and :func:`precompute`.
     """
@@ -446,8 +446,8 @@ def remove_unused_arguments(knl):
     exp_knl = lp.expand_subst(knl)
 
     refd_vars = set(knl.all_params())
-    for insn in exp_knl.instructions:
-        refd_vars.update(insn.dependency_names())
+    for stmt in exp_knl.statements:
+        refd_vars.update(stmt.dependency_names())
 
     from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag
     from loopy.symbolic import get_dependencies
@@ -512,19 +512,19 @@ def alias_temporaries(knl, names, base_name_prefix=None,
     names_set = set(names)
 
     if synchronize_for_exclusive_use:
-        new_insns = []
-        for insn in knl.instructions:
-            temp_deps = insn.dependency_names() & names_set
+        new_stmts = []
+        for stmt in knl.statements:
+            temp_deps = stmt.dependency_names() & names_set
 
             if not temp_deps:
-                new_insns.append(insn)
+                new_stmts.append(stmt)
                 continue
 
             if len(temp_deps) > 1:
-                raise LoopyError("Instruction {insn} refers to multiple of the "
+                raise LoopyError("Statement {stmt} refers to multiple of the "
                         "temporaries being aliased, namely '{temps}'. Cannot alias."
                         .format(
-                            insn=insn.id,
+                            stmt=stmt.id,
                             temps=", ".join(temp_deps)))
 
             temp_name, = temp_deps
@@ -534,13 +534,13 @@ def alias_temporaries(knl, names, base_name_prefix=None,
                     frozenset(group_names[:temp_idx])
                     | frozenset(group_names[temp_idx+1:]))
 
-            new_insns.append(
-                    insn.copy(
-                        groups=insn.groups | frozenset([group_name]),
+            new_stmts.append(
+                    stmt.copy(
+                        groups=stmt.groups | frozenset([group_name]),
                         conflicts_with_groups=(
-                            insn.conflicts_with_groups | other_group_names)))
+                            stmt.conflicts_with_groups | other_group_names)))
     else:
-        new_insns = knl.instructions
+        new_stmts = knl.statements
 
     new_temporary_variables = {}
     for tv in six.itervalues(knl.temporary_variables):
@@ -556,7 +556,7 @@ def alias_temporaries(knl, names, base_name_prefix=None,
             new_temporary_variables[tv.name] = tv
 
     return knl.copy(
-            instructions=new_insns,
+            statements=new_stmts,
             temporary_variables=new_temporary_variables)
 
 # }}}
@@ -624,7 +624,7 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False):
             kernel.substitutions, var_name_gen)
     smap = RuleAwareSubstitutionMapper(rule_mapping_context,
                     make_subst_func(subst_dict),
-                    within=lambda knl, insn, stack: True)
+                    within=lambda knl, stmt, stack: True)
 
     kernel = smap.map_kernel(kernel)
 
@@ -683,7 +683,7 @@ def set_temporary_scope(kernel, temp_var_names, scope):
 
 # {{{ reduction_arg_to_subst_rule
 
-def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=None):
+def reduction_arg_to_subst_rule(knl, inames, stmt_match=None, subst_rule_name=None):
     if isinstance(inames, str):
         inames = [s.strip() for s in inames.split(",")]
 
@@ -731,15 +731,15 @@ def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=No
 
     from loopy.kernel.data import MultiAssignmentBase
 
-    new_insns = []
-    for insn in knl.instructions:
-        if not isinstance(insn, MultiAssignmentBase):
-            new_insns.append(insn)
+    new_stmts = []
+    for stmt in knl.statements:
+        if not isinstance(stmt, MultiAssignmentBase):
+            new_stmts.append(stmt)
         else:
-            new_insns.append(insn.copy(expression=cb_mapper(insn.expression)))
+            new_stmts.append(stmt.copy(expression=cb_mapper(stmt.expression)))
 
     return knl.copy(
-            instructions=new_insns,
+            statements=new_stmts,
             substitutions=substs)
 
 # }}}
diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py
index d4dcb3701..e762082b6 100644
--- a/loopy/transform/diff.py
+++ b/loopy/transform/diff.py
@@ -168,12 +168,12 @@ class DifferentiationContext(object):
         self.imported_outputs = set()
         self.output_to_diff_output = {}
 
-        self.generate_instruction_id = self.kernel.get_instruction_id_generator()
+        self.generate_statement_id = self.kernel.get_statement_id_generator()
 
         self.new_args = []
         self.new_temporary_variables = {}
-        self.new_instructions = []
-        self.imported_instructions = set()
+        self.new_statements = []
+        self.imported_statements = set()
         self.new_domains = []
 
         self.rule_mapping_context = SubstitutionRuleMappingContext(
@@ -189,7 +189,7 @@ class DifferentiationContext(object):
         knl = knl.copy(
                 args=new_args,
                 temporary_variables=new_temp_vars,
-                instructions=self.new_instructions,
+                statements=self.new_statements,
                 domains=knl.domains + self.new_domains)
 
         del new_args
@@ -226,24 +226,24 @@ class DifferentiationContext(object):
 
     # }}}
 
-    def import_instruction_and_deps(self, insn_id):
-        if insn_id in self.imported_instructions:
+    def import_statement_and_deps(self, stmt_id):
+        if stmt_id in self.imported_statements:
             return
 
-        insn = self.kernel.id_to_insn[insn_id]
-        self.new_instructions.append(insn)
-        self.imported_instructions.add(insn_id)
+        stmt = self.kernel.id_to_stmt[stmt_id]
+        self.new_statements.append(stmt)
+        self.imported_statements.add(stmt_id)
 
         id_map = RuleAwareIdentityMapper(self.rule_mapping_context)
 
-        if isinstance(insn, lp.Assignment):
-            id_map(insn.expression, self.kernel, insn)
+        if isinstance(stmt, lp.Assignment):
+            id_map(stmt.expression, self.kernel, stmt)
         else:
             raise RuntimeError("do not know how to deal with "
-                    "instruction of type %s" % type(insn))
+                    "statement of type %s" % type(stmt))
 
-        for dep in insn.depends_on:
-            self.import_instruction_and_deps(dep)
+        for dep in stmt.depends_on:
+            self.import_statement_and_deps(dep)
 
     def import_output_var(self, var_name):
         writers = self.kernel.writer_map().get(var_name, [])
@@ -255,8 +255,8 @@ class DifferentiationContext(object):
         if not writers:
             return
 
-        insn_id, = writers
-        self.import_instruction_and_deps(insn_id)
+        stmt_id, = writers
+        self.import_statement_and_deps(stmt_id)
 
     def get_diff_var(self, var_name):
         """
@@ -279,7 +279,7 @@ class DifferentiationContext(object):
                     % var_name)
 
         orig_writer_id, = writers
-        orig_writer_insn = self.kernel.id_to_insn[orig_writer_id]
+        orig_writer_stmt = self.kernel.id_to_stmt[orig_writer_id]
 
         diff_inames = self.add_diff_inames()
         diff_iname_exprs = tuple(var(diname) for diname in diff_inames)
@@ -289,32 +289,32 @@ class DifferentiationContext(object):
         diff_mapper = LoopyDiffMapper(self.rule_mapping_context, self,
                 diff_inames)
 
-        diff_expr = diff_mapper(orig_writer_insn.expression,
-                self.kernel, orig_writer_insn)
+        diff_expr = diff_mapper(orig_writer_stmt.expression,
+                self.kernel, orig_writer_stmt)
 
         if not diff_expr:
             return None
 
-        assert isinstance(orig_writer_insn, lp.Assignment)
-        if isinstance(orig_writer_insn.assignee, p.Subscript):
-            lhs_ind = orig_writer_insn.assignee.index_tuple
-        elif isinstance(orig_writer_insn.assignee, p.Variable):
+        assert isinstance(orig_writer_stmt, lp.Assignment)
+        if isinstance(orig_writer_stmt.assignee, p.Subscript):
+            lhs_ind = orig_writer_stmt.assignee.index_tuple
+        elif isinstance(orig_writer_stmt.assignee, p.Variable):
             lhs_ind = ()
         else:
             raise LoopyError(
                     "Unrecognized LHS type in differentiation: %s"
-                    % type(orig_writer_insn.assignee).__name__)
+                    % type(orig_writer_stmt.assignee).__name__)
 
-        new_insn_id = self.generate_instruction_id()
-        insn = lp.Assignment(
-                id=new_insn_id,
+        new_stmt_id = self.generate_statement_id()
+        stmt = lp.Assignment(
+                id=new_stmt_id,
                 assignee=var(new_var_name)[
                     lhs_ind + diff_iname_exprs],
                 expression=diff_expr,
                 within_inames=(
-                    orig_writer_insn.within_inames | frozenset(diff_inames)))
+                    orig_writer_stmt.within_inames | frozenset(diff_inames)))
 
-        self.new_instructions.append(insn)
+        self.new_statements.append(stmt)
 
         # }}}
 
@@ -383,7 +383,7 @@ def diff_kernel(knl, diff_outputs, by, diff_iname_prefix="diff_i",
 
     var_name_gen = knl.get_var_name_generator()
 
-    # {{{ differentiate instructions
+    # {{{ differentiate statements
 
     diff_context = DifferentiationContext(
             knl, var_name_gen, by, diff_iname_prefix=diff_iname_prefix,
diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py
index 77c2d3ade..b9be8dd18 100644
--- a/loopy/transform/fusion.py
+++ b/loopy/transform/fusion.py
@@ -209,10 +209,10 @@ def _fuse_two_kernels(knla, knlb):
     knlb = _apply_renames_in_exprs(knlb, b_var_renames)
 
     from pymbolic.imperative.transform import \
-            fuse_instruction_streams_with_unique_ids
-    new_instructions, old_b_id_to_new_b_id = \
-            fuse_instruction_streams_with_unique_ids(
-                    knla.instructions, knlb.instructions)
+            fuse_statement_streams_with_unique_ids
+    new_statements, old_b_id_to_new_b_id = \
+            fuse_statement_streams_with_unique_ids(
+                    knla.statements, knlb.statements)
 
     # {{{ fuse assumptions
 
@@ -238,7 +238,7 @@ def _fuse_two_kernels(knla, knlb):
     from loopy.kernel import LoopKernel
     return LoopKernel(
             domains=new_domains,
-            instructions=new_instructions,
+            statements=new_statements,
             args=new_args,
             name="%s_and_%s" % (knla.name, knlb.name),
             preambles=_ordered_merge_lists(knla.preambles, knlb.preambles),
@@ -321,10 +321,10 @@ def fuse_kernels(kernels, suffixes=None, data_flow=None):
         succeed.
 
     *   Temporaries are automatically renamed to remain uniquely associated
-        with each instruction stream.
+        with each statement stream.
 
-    *   The resulting kernel will contain all instructions from each entry
-        of *kernels*. Clashing instruction IDs will be renamed to ensure
+    *   The resulting kernel will contain all statements from each entry
+        of *kernels*. Clashing statement IDs will be renamed to ensure
         uniqueness.
 
     .. versionchanged:: 2016.2
@@ -368,44 +368,44 @@ def fuse_kernels(kernels, suffixes=None, data_flow=None):
 
         # }}}
 
-    kernel_insn_ids = []
+    kernel_stmt_ids = []
     result = None
 
     for knlb in kernels:
         if result is None:
             result = knlb
-            kernel_insn_ids.append([
-                insn.id for insn in knlb.instructions])
+            kernel_stmt_ids.append([
+                stmt.id for stmt in knlb.statements])
         else:
             result, old_b_id_to_new_b_id = _fuse_two_kernels(
                     knla=result,
                     knlb=knlb)
 
-            kernel_insn_ids.append([
-                old_b_id_to_new_b_id[insn.id]
-                for insn in knlb.instructions])
+            kernel_stmt_ids.append([
+                old_b_id_to_new_b_id[stmt.id]
+                for stmt in knlb.statements])
 
     # {{{ realize data_flow dependencies
 
-    id_to_insn = result.id_to_insn.copy()
+    id_to_stmt = result.id_to_stmt.copy()
 
     for var_name, from_kernel, to_kernel in data_flow:
         from_writer_ids = frozenset(
-                insn_id
-                for insn_id in kernel_insn_ids[from_kernel]
-                if var_name in id_to_insn[insn_id].assignee_var_names())
+                stmt_id
+                for stmt_id in kernel_stmt_ids[from_kernel]
+                if var_name in id_to_stmt[stmt_id].assignee_var_names())
 
-        for insn_id in kernel_insn_ids[to_kernel]:
-            insn = id_to_insn[insn_id]
-            if var_name in insn.read_dependency_names():
-                insn = insn.copy(depends_on=insn.depends_on | from_writer_ids)
+        for stmt_id in kernel_stmt_ids[to_kernel]:
+            stmt = id_to_stmt[stmt_id]
+            if var_name in stmt.read_dependency_names():
+                stmt = stmt.copy(depends_on=stmt.depends_on | from_writer_ids)
 
-            id_to_insn[insn_id] = insn
+            id_to_stmt[stmt_id] = stmt
 
-    result = result.copy(instructions=[
-            id_to_insn[insn_id]
-            for insn_ids in kernel_insn_ids
-            for insn_id in insn_ids])
+    result = result.copy(statements=[
+            id_to_stmt[stmt_id]
+            for stmt_ids in kernel_stmt_ids
+            for stmt_id in stmt_ids])
 
     # }}}
 
diff --git a/loopy/transform/ilp.py b/loopy/transform/ilp.py
index 0ac71d603..0c86f6dc3 100644
--- a/loopy/transform/ilp.py
+++ b/loopy/transform/ilp.py
@@ -77,12 +77,12 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None):
     # {{{ find variables that need extra indices
 
     for tv in six.itervalues(kernel.temporary_variables):
-        for writer_insn_id in wmap.get(tv.name, []):
-            writer_insn = kernel.id_to_insn[writer_insn_id]
+        for writer_stmt_id in wmap.get(tv.name, []):
+            writer_stmt = kernel.id_to_stmt[writer_stmt_id]
 
             if iname is None:
                 ilp_inames = frozenset(iname
-                        for iname in kernel.insn_inames(writer_insn)
+                        for iname in kernel.stmt_inames(writer_stmt)
                         if isinstance(
                             kernel.iname_to_tag.get(iname),
                             (IlpBaseTag, VectorizeTag)))
@@ -97,7 +97,7 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None):
                 ilp_inames = frozenset([iname])
 
             referenced_ilp_inames = (ilp_inames
-                    & writer_insn.write_dependency_names())
+                    & writer_stmt.write_dependency_names())
 
             new_ilp_inames = ilp_inames - referenced_ilp_inames
 
@@ -106,10 +106,10 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None):
 
             if tv.name in var_to_new_ilp_inames:
                 if new_ilp_inames != set(var_to_new_ilp_inames[tv.name]):
-                    raise LoopyError("instruction '%s' requires adding "
+                    raise LoopyError("statement '%s' requires adding "
                             "indices for ILP inames '%s' on var '%s', but previous "
-                            "instructions required inames '%s'"
-                            % (writer_insn_id, ", ".join(new_ilp_inames),
+                            "statements required inames '%s'"
+                            % (writer_stmt_id, ", ".join(new_ilp_inames),
                                 ", ".join(var_to_new_ilp_inames[tv.name])))
 
                 continue
@@ -167,30 +167,30 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None):
             (var_name, tuple(var(iname) for iname in inames))
             for var_name, inames in six.iteritems(var_to_new_ilp_inames))
 
-    new_insns = []
+    new_stmts = []
 
-    for insn in kernel.instructions:
+    for stmt in kernel.statements:
         eiii = ExtraInameIndexInserter(var_to_extra_iname)
-        new_insn = insn.with_transformed_expressions(eiii)
-        if not eiii.seen_ilp_inames <= insn.within_inames:
+        new_stmt = stmt.with_transformed_expressions(eiii)
+        if not eiii.seen_ilp_inames <= stmt.within_inames:
 
             from loopy.diagnostic import warn_with_kernel
             warn_with_kernel(
                     kernel,
                     "implicit_ilp_iname",
-                    "Instruction '%s': touched variable that (for ILP) "
-                    "required iname(s) '%s', but that the instruction was not "
+                    "Statement '%s': touched variable that (for ILP) "
+                    "required iname(s) '%s', but that the statement was not "
                     "previously within the iname(s). Previously, this would "
-                    "implicitly promote the instruction, but that behavior is "
+                    "implicitly promote the statement, but that behavior is "
                     "deprecated and will stop working in 2018.1."
-                    % (insn.id, ", ".join(
-                        eiii.seen_ilp_inames - insn.within_inames)))
+                    % (stmt.id, ", ".join(
+                        eiii.seen_ilp_inames - stmt.within_inames)))
 
-        new_insns.append(new_insn)
+        new_stmts.append(new_stmt)
 
     return kernel.copy(
         temporary_variables=new_temp_vars,
-        instructions=new_insns)
+        statements=new_stmts)
 
 # }}}
 
@@ -198,14 +198,14 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None):
 # {{{ realize_ilp
 
 def realize_ilp(kernel, iname):
-    """Instruction-level parallelism (as realized by the loopy iname
+    """Statement-level parallelism (as realized by the loopy iname
     tag ``"ilp"``) provides the illusion that multiple concurrent
-    program instances execute in lockstep within a single instruction
+    program instances execute in lockstep within a single statement
     stream.
 
-    To do so, storage that is private to each instruction stream needs to be
+    To do so, storage that is private to each statement stream needs to be
     duplicated so that each program instance receives its own copy.  Storage
-    that is written to in an instruction using an ILP iname but whose left-hand
+    that is written to in an statement using an ILP iname but whose left-hand
     side indices do not contain said ILP iname is marked for duplication.
 
     This storage duplication is carried out automatically at code generation
diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 22fd7b3bb..105ba7bd8 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -70,7 +70,7 @@ __doc__ = """
 
 .. autofunction:: make_reduction_inames_unique
 
-.. autofunction:: add_inames_to_insn
+.. autofunction:: add_inames_to_stmt
 
 """
 
@@ -137,7 +137,7 @@ class _InameSplitter(RuleAwareIdentityMapper):
                 and self.split_iname not in expn_state.arg_context
                 and self.within(
                     expn_state.kernel,
-                    expn_state.instruction,
+                    expn_state.statement,
                     expn_state.stack)):
             new_inames = list(expr.inames)
             new_inames.remove(self.split_iname)
@@ -155,7 +155,7 @@ class _InameSplitter(RuleAwareIdentityMapper):
                 and self.split_iname not in expn_state.arg_context
                 and self.within(
                     expn_state.kernel,
-                    expn_state.instruction,
+                    expn_state.statement,
                     expn_state.stack)):
             return self.replacement_index
         else:
@@ -246,20 +246,20 @@ def _split_iname_backend(kernel, split_iname,
 
     # {{{ update within_inames
 
-    new_insns = []
-    for insn in kernel.instructions:
-        if split_iname in insn.within_inames:
+    new_stmts = []
+    for stmt in kernel.statements:
+        if split_iname in stmt.within_inames:
             new_within_inames = (
-                    (insn.within_inames.copy()
+                    (stmt.within_inames.copy()
                     - frozenset([split_iname]))
                     | frozenset([outer_iname, inner_iname]))
         else:
-            new_within_inames = insn.within_inames
+            new_within_inames = stmt.within_inames
 
-        insn = insn.copy(
+        stmt = stmt.copy(
                 within_inames=new_within_inames)
 
-        new_insns.append(insn)
+        new_stmts.append(stmt)
 
     # }}}
 
@@ -279,7 +279,7 @@ def _split_iname_backend(kernel, split_iname,
     kernel = kernel.copy(
             domains=new_domains,
             iname_slab_increments=iname_slab_increments,
-            instructions=new_insns,
+            statements=new_stmts,
             applied_iname_rewrites=applied_iname_rewrites,
             loop_priority=frozenset(new_priorities))
 
@@ -458,7 +458,7 @@ class _InameJoiner(RuleAwareSubstitutionMapper):
                 - set(expn_state.arg_context))
         if overlap and self.within(
                 expn_state.kernel,
-                expn_state.instruction,
+                expn_state.statement,
                 expn_state.stack):
             if overlap != expr_inames:
                 raise LoopyError(
@@ -563,14 +563,14 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None):
 
         return frozenset(result)
 
-    new_insns = [
-            insn.copy(
-                within_inames=subst_within_inames(insn.within_inames))
-            for insn in kernel.instructions]
+    new_stmts = [
+            stmt.copy(
+                within_inames=subst_within_inames(stmt.within_inames))
+            for stmt in kernel.statements]
 
     kernel = (kernel
             .copy(
-                instructions=new_insns,
+                statements=new_stmts,
                 domains=domch.get_domains_with(new_domain),
                 applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict]
                 ))
@@ -725,7 +725,7 @@ class _InameDuplicator(RuleAwareIdentityMapper):
         if (set(expr.inames) & self.old_inames_set
                 and self.within(
                     expn_state.kernel,
-                    expn_state.instruction,
+                    expn_state.statement,
                     expn_state.stack)):
             new_inames = tuple(
                     self.old_to_new.get(iname, iname)
@@ -747,21 +747,21 @@ class _InameDuplicator(RuleAwareIdentityMapper):
                 or expr.name in expn_state.arg_context
                 or not self.within(
                     expn_state.kernel,
-                    expn_state.instruction,
+                    expn_state.statement,
                     expn_state.stack)):
             return super(_InameDuplicator, self).map_variable(expr, expn_state)
         else:
             from pymbolic import var
             return var(new_name)
 
-    def map_instruction(self, kernel, insn):
-        if not self.within(kernel, insn, ()):
-            return insn
+    def map_statement(self, kernel, stmt):
+        if not self.within(kernel, stmt, ()):
+            return stmt
 
         new_fid = frozenset(
                 self.old_to_new.get(iname, iname)
-                for iname in insn.within_inames)
-        return insn.copy(within_inames=new_fid)
+                for iname in stmt.within_inames)
+        return stmt.copy(within_inames=new_fid)
 
 
 def duplicate_inames(knl, inames, within, new_inames=None, suffix=None,
@@ -854,28 +854,28 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None,
 
 # {{{ iname duplication for schedulability
 
-def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])):
-    # Remove common inames of the current insn_deps, as they are not relevant
+def _get_iname_duplication_options(stmt_deps, old_common_inames=frozenset([])):
+    # Remove common inames of the current stmt_deps, as they are not relevant
     # for splitting.
-    common = frozenset([]).union(*insn_deps).intersection(*insn_deps)
+    common = frozenset([]).union(*stmt_deps).intersection(*stmt_deps)
 
     # If common inames were found, we reduce the problem and go into recursion
     if common:
-        # Remove the common inames from the instruction dependencies
-        insn_deps = (
-            frozenset(dep - common for dep in insn_deps)
+        # Remove the common inames from the statement dependencies
+        stmt_deps = (
+            frozenset(dep - common for dep in stmt_deps)
             -
             frozenset([frozenset([])]))
         # Join the common inames with those previously found
         common = common.union(old_common_inames)
 
         # Go into recursion
-        for option in _get_iname_duplication_options(insn_deps, common):
+        for option in _get_iname_duplication_options(stmt_deps, common):
             yield option
         # Do not yield anything beyond here!
         return
 
-    # Try finding a partitioning of the remaining inames, such that all instructions
+    # Try finding a partitioning of the remaining inames, such that all statements
     # use only inames from one of the disjoint sets from the partitioning.
     def join_sets_if_not_disjoint(sets):
         for s1 in sets:
@@ -888,7 +888,7 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])):
 
         return sets, True
 
-    partitioning = insn_deps
+    partitioning = stmt_deps
     stop = False
     while not stop:
         partitioning, stop = join_sets_if_not_disjoint(partitioning)
@@ -897,7 +897,7 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])):
     # subproblems
     if len(partitioning) > 1:
         for part in partitioning:
-            working_set = frozenset(s for s in insn_deps if s.issubset(part))
+            working_set = frozenset(s for s in stmt_deps if s.issubset(part))
             for option in _get_iname_duplication_options(working_set,
                                                          old_common_inames):
                 yield option
@@ -907,19 +907,19 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])):
 
         # There are splitting options for all inames
         for iname in inames:
-            iname_insns = frozenset(
-                    insn for insn in insn_deps if frozenset([iname]).issubset(insn))
+            iname_stmts = frozenset(
+                    stmt for stmt in stmt_deps if frozenset([iname]).issubset(stmt))
 
             import itertools as it
-            # For a given iname, the set of instructions containing this iname
+            # For a given iname, the set of statements containing this iname
             # is inspected.  For each element of the power set without the
             # empty and the full set, one duplication option is generated.
-            for insns_to_dup in it.chain.from_iterable(
-                    it.combinations(iname_insns, l)
-                    for l in range(1, len(iname_insns))):
+            for stmts_to_dup in it.chain.from_iterable(
+                    it.combinations(iname_stmts, l)
+                    for l in range(1, len(iname_stmts))):
                 yield (
                     iname,
-                    tuple(insn.union(old_common_inames) for insn in insns_to_dup))
+                    tuple(stmt.union(old_common_inames) for stmt in stmts_to_dup))
 
     # If partitioning was empty, we have recursed successfully and yield nothing
 
@@ -946,31 +946,31 @@ def get_iname_duplication_options(knl, use_boostable_into=False):
                              \"\"\")
 
     In the example, there are four possibilities to resolve the problem:
-    * duplicating i in instruction i3
-    * duplicating i in instruction i1 and i3
-    * duplicating j in instruction i2
-    * duplicating i in instruction i2 and i3
+    * duplicating i in statement i3
+    * duplicating i in statement i1 and i3
+    * duplicating j in statement i2
+    * duplicating i in statement i2 and i3
 
     Use :func:`has_schedulable_iname_nesting` to decide, whether an iname needs to be
     duplicated in a given kernel.
     """
     # First we extract the minimal necessary information from the kernel
     if use_boostable_into:
-        insn_deps = (
-            frozenset(insn.within_inames.union(
-                insn.boostable_into if insn.boostable_into is not None
+        stmt_deps = (
+            frozenset(stmt.within_inames.union(
+                stmt.boostable_into if stmt.boostable_into is not None
                 else frozenset([]))
-                for insn in knl.instructions)
+                for stmt in knl.statements)
             -
             frozenset([frozenset([])]))
     else:
-        insn_deps = (
-            frozenset(insn.within_inames for insn in knl.instructions)
+        stmt_deps = (
+            frozenset(stmt.within_inames for stmt in knl.statements)
             -
             frozenset([frozenset([])]))
 
     # Get the duplication options as a tuple of iname and a set
-    for iname, insns in _get_iname_duplication_options(insn_deps):
+    for iname, stmts in _get_iname_duplication_options(stmt_deps):
         # Check whether this iname has a parallel tag and discard it if so
         from loopy.kernel.data import ConcurrentTag
         if (iname in knl.iname_to_tag
@@ -987,7 +987,7 @@ def get_iname_duplication_options(knl, use_boostable_into=False):
             from warnings import warn
             from loopy.diagnostic import LoopyWarning
             warn("Kernel '%s' required the deprecated 'boostable_into' "
-                 "instruction attribute in order to be schedulable!" % knl.name,
+                 "statement attribute in order to be schedulable!" % knl.name,
                  LoopyWarning)
 
             # Return to avoid yielding the duplication
@@ -998,10 +998,10 @@ def get_iname_duplication_options(knl, use_boostable_into=False):
         # loopy.duplicate_inames
         from loopy.match import Id, Or
         within = Or(tuple(
-            Id(insn.id) for insn in knl.instructions
-            if insn.within_inames in insns))
+            Id(stmt.id) for stmt in knl.statements
+            if stmt.within_inames in stmts))
 
-        # Only yield the result if an instruction matched. With
+        # Only yield the result if an statement matched. With
         # use_boostable_into=True this is not always true.
 
         if within.children:
@@ -1085,18 +1085,18 @@ def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None):
         knl = rule_mapping_context.finish_kernel(
                 smap.map_kernel(knl))
 
-        new_instructions = []
-        for insn in knl.instructions:
-            if (old_iname in insn.within_inames
-                    and within(knl, insn, ())):
-                insn = insn.copy(
+        new_statements = []
+        for stmt in knl.statements:
+            if (old_iname in stmt.within_inames
+                    and within(knl, stmt, ())):
+                stmt = stmt.copy(
                         within_inames=(
-                            (insn.within_inames - frozenset([old_iname]))
+                            (stmt.within_inames - frozenset([old_iname]))
                             | frozenset([new_iname])))
 
-            new_instructions.append(insn)
+            new_statements.append(stmt)
 
-        knl = knl.copy(instructions=new_instructions)
+        knl = knl.copy(statements=new_statements)
 
     else:
         knl = duplicate_inames(
@@ -1136,10 +1136,10 @@ def remove_unused_inames(knl, inames=None):
 
     inames = set(inames)
     used_inames = set()
-    for insn in exp_knl.instructions:
+    for stmt in exp_knl.statements:
         used_inames.update(
-                exp_knl.insn_inames(insn.id)
-                | insn.reduction_inames())
+                exp_knl.stmt_inames(stmt.id)
+                | stmt.reduction_inames())
 
     unused_inames = inames - used_inames
 
@@ -1184,7 +1184,7 @@ class _ReductionSplitter(RuleAwareIdentityMapper):
         if (self.inames <= set(expr.inames)
                 and self.within(
                     expn_state.kernel,
-                    expn_state.instruction,
+                    expn_state.statement,
                     expn_state.stack)):
             leftover_inames = set(expr.inames) - self.inames
 
@@ -1451,43 +1451,43 @@ def affine_map_inames(kernel, old_inames, new_inames, equations):
 
     # }}}
 
-    # {{{ switch iname refs in instructions
+    # {{{ switch iname refs in statements
 
-    def fix_iname_set(insn_id, inames):
+    def fix_iname_set(stmt_id, inames):
         if old_inames_set <= inames:
             return (inames - old_inames_set) | new_inames_set
         elif old_inames_set & inames:
-            raise LoopyError("instruction '%s' uses only a part (%s), not all, "
+            raise LoopyError("statement '%s' uses only a part (%s), not all, "
                     "of the old inames"
-                    % (insn_id, ", ".join(old_inames_set & inames)))
+                    % (stmt_id, ", ".join(old_inames_set & inames)))
         else:
             return inames
 
-    new_instructions = [
-            insn.copy(within_inames=fix_iname_set(
-                insn.id, insn.within_inames))
-            for insn in kernel.instructions]
+    new_statements = [
+            stmt.copy(within_inames=fix_iname_set(
+                stmt.id, stmt.within_inames))
+            for stmt in kernel.statements]
 
     # }}}
 
-    return kernel.copy(domains=new_domains, instructions=new_instructions)
+    return kernel.copy(domains=new_domains, statements=new_statements)
 
 # }}}
 
 
 # {{{ find unused axes
 
-def find_unused_axis_tag(kernel, kind, insn_match=None):
+def find_unused_axis_tag(kernel, kind, stmt_match=None):
     """For one of the hardware-parallel execution tags, find an unused
     axis.
 
-    :arg insn_match: An instruction match as understood by
+    :arg stmt_match: An statement match as understood by
         :func:`loopy.match.parse_match`.
     :arg kind: may be "l" or "g", or the corresponding tag class name
 
     :returns: an :class:`GroupIndexTag` or :class:`LocalIndexTag`
-        that is not being used within the instructions matched by
-        *insn_match*.
+        that is not being used within the statements matched by
+        *stmt_match*.
     """
     used_axes = set()
 
@@ -1505,11 +1505,11 @@ def find_unused_axis_tag(kernel, kind, insn_match=None):
             raise LoopyError("invlaid tag kind: %s" % kind)
 
     from loopy.match import parse_match
-    match = parse_match(insn_match)
-    insns = [insn for insn in kernel.instructions if match(kernel, insn)]
+    match = parse_match(stmt_match)
+    stmts = [stmt for stmt in kernel.statements if match(kernel, stmt)]
 
-    for insn in insns:
-        for iname in kernel.insn_inames(insn):
+    for stmt in stmts:
+        for iname in kernel.stmt_inames(stmt):
             dim_tag = kernel.iname_to_tag.get(iname)
 
             if isinstance(dim_tag, kind):
@@ -1557,7 +1557,7 @@ class _ReductionInameUniquifier(RuleAwareIdentityMapper):
     def map_reduction(self, expr, expn_state):
         within = self.within(
                     expn_state.kernel,
-                    expn_state.instruction,
+                    expn_state.statement,
                     expn_state.stack)
 
         for iname in expr.inames:
@@ -1645,19 +1645,19 @@ def make_reduction_inames_unique(kernel, inames=None, within=None):
 # }}}
 
 
-# {{{ add_inames_to_insn
+# {{{ add_inames_to_stmt
 
-def add_inames_to_insn(knl, inames, insn_match):
+def add_inames_to_stmt(knl, inames, stmt_match):
     """
     :arg inames: a frozenset of inames that will be added to the
-        instructions matched by *insn_match*, or a comma-separated
+        statements matched by *stmt_match*, or a comma-separated
         string that parses to such a tuple.
-    :arg insn_match: An instruction match as understood by
+    :arg stmt_match: An statement match as understood by
         :func:`loopy.match.parse_match`.
 
     :returns: an :class:`GroupIndexTag` or :class:`LocalIndexTag`
-        that is not being used within the instructions matched by
-        *insn_match*.
+        that is not being used within the statements matched by
+        *stmt_match*.
 
     .. versionadded:: 2016.3
     """
@@ -1669,18 +1669,18 @@ def add_inames_to_insn(knl, inames, insn_match):
         raise TypeError("'inames' must be a frozenset")
 
     from loopy.match import parse_match
-    match = parse_match(insn_match)
+    match = parse_match(stmt_match)
 
-    new_instructions = []
+    new_statements = []
 
-    for insn in knl.instructions:
-        if match(knl, insn):
-            new_instructions.append(
-                    insn.copy(within_inames=insn.within_inames | inames))
+    for stmt in knl.statements:
+        if match(knl, stmt):
+            new_statements.append(
+                    stmt.copy(within_inames=stmt.within_inames | inames))
         else:
-            new_instructions.append(insn)
+            new_statements.append(stmt)
 
-    return knl.copy(instructions=new_instructions)
+    return knl.copy(statements=new_statements)
 
 # }}}
 
diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
deleted file mode 100644
index 37c5d85a1..000000000
--- a/loopy/transform/instruction.py
+++ /dev/null
@@ -1,339 +0,0 @@
-from __future__ import division, absolute_import
-
-__copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
-
-__license__ = """
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-"""
-
-import six  # noqa
-
-from loopy.diagnostic import LoopyError
-
-
-# {{{ find_instructions
-
-def find_instructions(kernel, insn_match):
-    from loopy.match import parse_match
-    match = parse_match(insn_match)
-    return [insn for insn in kernel.instructions if match(kernel, insn)]
-
-# }}}
-
-
-# {{{ map_instructions
-
-def map_instructions(kernel, insn_match, f):
-    from loopy.match import parse_match
-    match = parse_match(insn_match)
-
-    new_insns = []
-
-    for insn in kernel.instructions:
-        if match(kernel, insn):
-            new_insns.append(f(insn))
-        else:
-            new_insns.append(insn)
-
-    return kernel.copy(instructions=new_insns)
-
-# }}}
-
-
-# {{{ set_instruction_priority
-
-def set_instruction_priority(kernel, insn_match, priority):
-    """Set the priority of instructions matching *insn_match* to *priority*.
-
-    *insn_match* may be any instruction id match understood by
-    :func:`loopy.match.parse_match`.
-    """
-
-    def set_prio(insn):
-        return insn.copy(priority=priority)
-
-    return map_instructions(kernel, insn_match, set_prio)
-
-# }}}
-
-
-# {{{ add_dependency
-
-def add_dependency(kernel, insn_match, depends_on):
-    """Add the instruction dependency *dependency* to the instructions matched
-    by *insn_match*.
-
-    *insn_match* and *depends_on* may be any instruction id match understood by
-    :func:`loopy.match.parse_match`.
-
-    .. versionchanged:: 2016.3
-
-        Third argument renamed to *depends_on* for clarity, allowed to
-        be not just ID but also match expression.
-    """
-
-    if isinstance(depends_on, str) and depends_on in kernel.id_to_insn:
-        added_deps = frozenset([depends_on])
-    else:
-        added_deps = frozenset(
-                dep.id for dep in find_instructions(kernel, depends_on))
-
-    if not added_deps:
-        raise LoopyError("no instructions found matching '%s' "
-                "(to add as dependencies)" % depends_on)
-
-    matched = [False]
-
-    def add_dep(insn):
-        new_deps = insn.depends_on
-        matched[0] = True
-        if new_deps is None:
-            new_deps = added_deps
-        else:
-            new_deps = new_deps | added_deps
-
-        return insn.copy(depends_on=new_deps)
-
-    result = map_instructions(kernel, insn_match, add_dep)
-
-    if not matched[0]:
-        raise LoopyError("no instructions found matching '%s' "
-                "(to which dependencies would be added)" % insn_match)
-
-    return result
-
-# }}}
-
-
-# {{{ remove_instructions
-
-def remove_instructions(kernel, insn_ids):
-    """Return a new kernel with instructions in *insn_ids* removed.
-
-    Dependencies across (one, for now) deleted isntructions are propagated.
-    Behavior is undefined for now for chains of dependencies within the
-    set of deleted instructions.
-
-    This also updates *no_sync_with* for all instructions.
-    """
-
-    if not insn_ids:
-        return kernel
-
-    assert isinstance(insn_ids, set)
-    id_to_insn = kernel.id_to_insn
-
-    new_insns = []
-    for insn in kernel.instructions:
-        if insn.id in insn_ids:
-            continue
-
-        # transitively propagate dependencies
-        # (only one level for now)
-        if insn.depends_on is None:
-            depends_on = frozenset()
-        else:
-            depends_on = insn.depends_on
-
-        new_deps = depends_on - insn_ids
-
-        for dep_id in depends_on & insn_ids:
-            new_deps = new_deps | id_to_insn[dep_id].depends_on
-
-        # update no_sync_with
-
-        new_no_sync_with = frozenset((insn_id, scope)
-                for insn_id, scope in insn.no_sync_with
-                if insn_id not in insn_ids)
-
-        new_insns.append(
-                insn.copy(depends_on=new_deps, no_sync_with=new_no_sync_with))
-
-    return kernel.copy(
-            instructions=new_insns)
-
-# }}}
-
-
-# {{{ replace_instruction_ids
-
-def replace_instruction_ids(kernel, replacements):
-    new_insns = []
-
-    for insn in kernel.instructions:
-        changed = False
-        new_depends_on = []
-        new_no_sync_with = []
-
-        for dep in insn.depends_on:
-            if dep in replacements:
-                new_depends_on.extend(replacements[dep])
-                changed = True
-            else:
-                new_depends_on.append(dep)
-
-        for insn_id, scope in insn.no_sync_with:
-            if insn_id in replacements:
-                new_no_sync_with.extend(
-                        (repl, scope) for repl in replacements[insn_id])
-                changed = True
-            else:
-                new_no_sync_with.append((insn_id, scope))
-
-        new_insns.append(
-                insn.copy(
-                    depends_on=frozenset(new_depends_on),
-                    no_sync_with=frozenset(new_no_sync_with))
-                if changed else insn)
-
-    return kernel.copy(instructions=new_insns)
-
-# }}}
-
-
-# {{{ tag_instructions
-
-def tag_instructions(kernel, new_tag, within=None):
-    from loopy.match import parse_match
-    within = parse_match(within)
-
-    new_insns = []
-    for insn in kernel.instructions:
-        if within(kernel, insn):
-            new_insns.append(
-                    insn.copy(tags=insn.tags | frozenset([new_tag])))
-        else:
-            new_insns.append(insn)
-
-    return kernel.copy(instructions=new_insns)
-
-# }}}
-
-
-# {{{ add nosync
-
-def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False):
-    """Add a *no_sync_with* directive between *source* and *sink*.
-    *no_sync_with* is only added if *sink* depends on *source* or
-    if the instruction pair is in a conflicting group.
-
-    This function does not check for the presence of a memory dependency.
-
-    :arg kernel: The kernel
-    :arg source: Either a single instruction id, or any instruction id
-        match understood by :func:`loopy.match.parse_match`.
-    :arg sink: Either a single instruction id, or any instruction id
-        match understood by :func:`loopy.match.parse_match`.
-    :arg scope: A valid *no_sync_with* scope. See
-        :attr:`loopy.InstructionBase.no_sync_with` for allowable scopes.
-    :arg bidirectional: A :class:`bool`. If *True*, add a *no_sync_with*
-        to both the source and sink instructions, otherwise the directive
-        is only added to the sink instructions.
-    :arg force: A :class:`bool`. If *True*, add a *no_sync_with* directive
-        even without the presence of a dependency edge or conflicting
-        instruction group.
-
-    :return: The updated kernel
-    """
-
-    if isinstance(source, str) and source in kernel.id_to_insn:
-        sources = frozenset([source])
-    else:
-        sources = frozenset(
-                source.id for source in find_instructions(kernel, source))
-
-    if isinstance(sink, str) and sink in kernel.id_to_insn:
-        sinks = frozenset([sink])
-    else:
-        sinks = frozenset(
-                sink.id for sink in find_instructions(kernel, sink))
-
-    def insns_in_conflicting_groups(insn1_id, insn2_id):
-        insn1 = kernel.id_to_insn[insn1_id]
-        insn2 = kernel.id_to_insn[insn2_id]
-        return (
-                bool(insn1.groups & insn2.conflicts_with_groups)
-                or
-                bool(insn2.groups & insn1.conflicts_with_groups))
-
-    from collections import defaultdict
-    nosync_to_add = defaultdict(set)
-
-    for sink in sinks:
-        for source in sources:
-
-            needs_nosync = force or (
-                    source in kernel.recursive_insn_dep_map()[sink]
-                    or insns_in_conflicting_groups(source, sink))
-
-            if not needs_nosync:
-                continue
-
-            nosync_to_add[sink].add((source, scope))
-            if bidirectional:
-                nosync_to_add[source].add((sink, scope))
-
-    new_instructions = list(kernel.instructions)
-
-    for i, insn in enumerate(new_instructions):
-        if insn.id in nosync_to_add:
-            new_instructions[i] = insn.copy(no_sync_with=insn.no_sync_with
-                    | frozenset(nosync_to_add[insn.id]))
-
-    return kernel.copy(instructions=new_instructions)
-
-# }}}
-
-
-# {{{ uniquify_instruction_ids
-
-def uniquify_instruction_ids(kernel):
-    """Converts any ids that are :class:`loopy.UniqueName` or *None* into unique
-    strings.
-
-    This function does *not* deduplicate existing instruction ids.
-    """
-
-    from loopy.kernel.creation import UniqueName
-
-    insn_ids = set(
-            insn.id for insn in kernel.instructions
-            if insn.id is not None and not isinstance(insn.id, UniqueName))
-
-    from pytools import UniqueNameGenerator
-    insn_id_gen = UniqueNameGenerator(insn_ids)
-
-    new_instructions = []
-
-    for insn in kernel.instructions:
-        if insn.id is None:
-            new_instructions.append(
-                    insn.copy(id=insn_id_gen("insn")))
-        elif isinstance(insn.id, UniqueName):
-            new_instructions.append(
-                    insn.copy(id=insn_id_gen(insn.id.name)))
-        else:
-            new_instructions.append(insn)
-
-    return kernel.copy(instructions=new_instructions)
-
-# }}}
-
-
-# vim: foldmethod=marker
diff --git a/loopy/transform/padding.py b/loopy/transform/padding.py
index d695e3595..a22b1db93 100644
--- a/loopy/transform/padding.py
+++ b/loopy/transform/padding.py
@@ -83,7 +83,7 @@ def split_array_dim(kernel, arrays_and_axes, count, auto_split_inames=True,
         elif len(rest) == 2:
             return rest
         else:
-            raise RuntimeError("split instruction '%s' not understood" % rest)
+            raise RuntimeError("split statement '%s' not understood" % rest)
 
     if isinstance(arrays_and_axes, tuple):
         arrays_and_axes = [arrays_and_axes]
diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py
index 6077332c4..85fc34840 100644
--- a/loopy/transform/precompute.py
+++ b/loopy/transform/precompute.py
@@ -84,7 +84,7 @@ class RuleInvocationGatherer(RuleAwareIdentityMapper):
 
         process_me = process_me and self.within(
                 expn_state.kernel,
-                expn_state.instruction,
+                expn_state.statement,
                 expn_state.stack)
 
         if not process_me:
@@ -136,7 +136,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper):
             access_descriptors, array_base_map,
             storage_axis_names, storage_axis_sources,
             non1_storage_axis_names,
-            temporary_name, compute_insn_id, compute_dep_id,
+            temporary_name, compute_stmt_id, compute_dep_id,
             compute_read_variables):
         super(RuleInvocationReplacer, self).__init__(rule_mapping_context)
 
@@ -152,18 +152,18 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper):
         self.non1_storage_axis_names = non1_storage_axis_names
 
         self.temporary_name = temporary_name
-        self.compute_insn_id = compute_insn_id
+        self.compute_stmt_id = compute_stmt_id
         self.compute_dep_id = compute_dep_id
 
         self.compute_read_variables = compute_read_variables
-        self.compute_insn_depends_on = set()
+        self.compute_stmt_depends_on = set()
 
     def map_substitution(self, name, tag, arguments, expn_state):
         if not (
                 name == self.subst_name
                 and self.within(
                     expn_state.kernel,
-                    expn_state.instruction,
+                    expn_state.statement,
                     expn_state.stack)
                 and (self.subst_tag is None or self.subst_tag == tag)):
             return super(RuleInvocationReplacer, self).map_substitution(
@@ -222,34 +222,34 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper):
         return new_outer_expr
 
     def map_kernel(self, kernel):
-        new_insns = []
+        new_stmts = []
 
-        excluded_insn_ids = set([self.compute_insn_id, self.compute_dep_id])
+        excluded_stmt_ids = set([self.compute_stmt_id, self.compute_dep_id])
 
-        for insn in kernel.instructions:
+        for stmt in kernel.statements:
             self.replaced_something = False
 
-            insn = insn.with_transformed_expressions(self, kernel, insn)
+            stmt = stmt.with_transformed_expressions(self, kernel, stmt)
 
             if self.replaced_something:
-                insn = insn.copy(
+                stmt = stmt.copy(
                         depends_on=(
-                            insn.depends_on
+                            stmt.depends_on
                             | frozenset([self.compute_dep_id])))
 
-                for dep in insn.depends_on:
-                    if dep in excluded_insn_ids:
+                for dep in stmt.depends_on:
+                    if dep in excluded_stmt_ids:
                         continue
 
-                    dep_insn = kernel.id_to_insn[dep]
-                    if (frozenset(dep_insn.assignee_var_names())
+                    dep_stmt = kernel.id_to_stmt[dep]
+                    if (frozenset(dep_stmt.assignee_var_names())
                             & self.compute_read_variables):
-                        self.compute_insn_depends_on.update(
-                                insn.depends_on - excluded_insn_ids)
+                        self.compute_stmt_depends_on.update(
+                                stmt.depends_on - excluded_stmt_ids)
 
-            new_insns.append(insn)
+            new_stmts.append(stmt)
 
-        return kernel.copy(instructions=new_insns)
+        return kernel.copy(statements=new_stmts)
 
 # }}}
 
@@ -260,7 +260,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
         storage_axis_to_tag={}, default_tag="l.auto", dtype=None,
         fetch_bounding_box=False,
         temporary_scope=None, temporary_is_local=None,
-        compute_insn_id=None):
+        compute_stmt_id=None):
     """Precompute the expression described in the substitution rule determined by
     *subst_use* and store it in a temporary array. A precomputation needs two
     things to operate, a list of *sweep_inames* (order irrelevant) and an
@@ -325,10 +325,10 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
         May also equivalently be a comma-separated string.
 
     :arg precompute_outer_inames: A :class:`frozenset` of inames within which
-        the compute instruction is nested. If *None*, make an educated guess.
+        the compute statement is nested. If *None*, make an educated guess.
         May also be specified as a comma-separated string.
 
-    :arg compute_insn_id: The ID of the instruction generated to perform the
+    :arg compute_stmt_id: The ID of the statement generated to perform the
         precomputation.
 
     If `storage_axes` is not specified, it defaults to the arrangement
@@ -473,11 +473,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
         del rule_mapping_context
 
         import loopy as lp
-        for insn in kernel.instructions:
-            if isinstance(insn, lp.MultiAssignmentBase):
-                for assignee in insn.assignees:
-                    invg(assignee, kernel, insn)
-                invg(insn.expression, kernel, insn)
+        for stmt in kernel.statements:
+            if isinstance(stmt, lp.MultiAssignmentBase):
+                for assignee in stmt.assignees:
+                    invg(assignee, kernel, stmt)
+                invg(stmt.expression, kernel, stmt)
 
         access_descriptors = invg.access_descriptors
         if not access_descriptors:
@@ -754,7 +754,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
 
     kernel = kernel.copy(domains=new_kernel_domains)
 
-    # {{{ set up compute insn
+    # {{{ set up compute stmt
 
     if temporary_name is None:
         temporary_name = var_name_gen(based_on=c_subst_name)
@@ -765,7 +765,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
         assignee = assignee[
                 tuple(var(iname) for iname in non1_storage_axis_names)]
 
-    # {{{ process substitutions on compute instruction
+    # {{{ process substitutions on compute statement
 
     storage_axis_subst_dict = {}
 
@@ -792,29 +792,29 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
     # }}}
 
     from loopy.kernel.data import Assignment
-    if compute_insn_id is None:
-        compute_insn_id = kernel.make_unique_instruction_id(based_on=c_subst_name)
+    if compute_stmt_id is None:
+        compute_stmt_id = kernel.make_unique_statement_id(based_on=c_subst_name)
 
-    compute_insn = Assignment(
-            id=compute_insn_id,
+    compute_stmt = Assignment(
+            id=compute_stmt_id,
             assignee=assignee,
             expression=compute_expression,
             # within_inames determined below
             )
-    compute_dep_id = compute_insn_id
-    added_compute_insns = [compute_insn]
+    compute_dep_id = compute_stmt_id
+    added_compute_stmts = [compute_stmt]
 
     if temporary_scope == temp_var_scope.GLOBAL:
-        barrier_insn_id = kernel.make_unique_instruction_id(
+        barrier_stmt_id = kernel.make_unique_statement_id(
                 based_on=c_subst_name+"_barrier")
-        from loopy.kernel.instruction import BarrierInstruction
-        barrier_insn = BarrierInstruction(
-                id=barrier_insn_id,
-                depends_on=frozenset([compute_insn_id]),
+        from loopy.kernel.statement import BarrierStatement
+        barrier_stmt = BarrierStatement(
+                id=barrier_stmt_id,
+                depends_on=frozenset([compute_stmt_id]),
                 kind="global")
-        compute_dep_id = barrier_insn_id
+        compute_dep_id = barrier_stmt_id
 
-        added_compute_insns.append(barrier_insn)
+        added_compute_stmts.append(barrier_stmt)
 
     # }}}
 
@@ -828,58 +828,58 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
             access_descriptors, abm,
             storage_axis_names, storage_axis_sources,
             non1_storage_axis_names,
-            temporary_name, compute_insn_id, compute_dep_id,
+            temporary_name, compute_stmt_id, compute_dep_id,
             compute_read_variables=get_dependencies(expander(compute_expression)))
 
     kernel = invr.map_kernel(kernel)
     kernel = kernel.copy(
-            instructions=added_compute_insns + kernel.instructions)
+            statements=added_compute_stmts + kernel.statements)
     kernel = rule_mapping_context.finish_kernel(kernel)
 
     # }}}
 
-    # {{{ add dependencies to compute insn
+    # {{{ add dependencies to compute stmt
 
     kernel = kernel.copy(
-            instructions=[
-                insn.copy(depends_on=frozenset(invr.compute_insn_depends_on))
-                if insn.id == compute_insn_id
-                else insn
-                for insn in kernel.instructions])
+            statements=[
+                stmt.copy(depends_on=frozenset(invr.compute_stmt_depends_on))
+                if stmt.id == compute_stmt_id
+                else stmt
+                for stmt in kernel.statements])
 
     # }}}
 
-    # {{{ propagate storage iname subst to dependencies of compute instructions
+    # {{{ propagate storage iname subst to dependencies of compute statements
 
     from loopy.kernel.tools import find_recursive_dependencies
     compute_deps = find_recursive_dependencies(
-            kernel, frozenset([compute_insn_id]))
+            kernel, frozenset([compute_stmt_id]))
 
     # FIXME: Need to verify that there are no outside dependencies
     # on compute_deps
 
     prior_storage_axis_names = frozenset(storage_axis_subst_dict)
 
-    new_insns = []
-    for insn in kernel.instructions:
-        if (insn.id in compute_deps
-                and insn.within_inames & prior_storage_axis_names):
-            insn = (insn
+    new_stmts = []
+    for stmt in kernel.statements:
+        if (stmt.id in compute_deps
+                and stmt.within_inames & prior_storage_axis_names):
+            stmt = (stmt
                     .with_transformed_expressions(
-                        lambda expr: expr_subst_map(expr, kernel, insn))
+                        lambda expr: expr_subst_map(expr, kernel, stmt))
                     .copy(within_inames=frozenset(
                         storage_axis_subst_dict.get(iname, var(iname)).name
-                        for iname in insn.within_inames)))
+                        for iname in stmt.within_inames)))
 
-            new_insns.append(insn)
+            new_stmts.append(stmt)
         else:
-            new_insns.append(insn)
+            new_stmts.append(stmt)
 
-    kernel = kernel.copy(instructions=new_insns)
+    kernel = kernel.copy(statements=new_stmts)
 
     # }}}
 
-    # {{{ determine inames for compute insn
+    # {{{ determine inames for compute stmt
 
     if precompute_outer_inames is None:
         from loopy.kernel.tools import guess_iname_deps_based_on_var_use
@@ -888,7 +888,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
                     | frozenset(
                         (expanding_usage_arg_deps | value_inames)
                         - sweep_inames_set)
-                    | guess_iname_deps_based_on_var_use(kernel, compute_insn))
+                    | guess_iname_deps_based_on_var_use(kernel, compute_stmt))
     else:
         if not isinstance(precompute_outer_inames, frozenset):
             raise TypeError("precompute_outer_inames must be a frozenset")
@@ -897,11 +897,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
                 | frozenset(non1_storage_axis_names)
 
     kernel = kernel.copy(
-            instructions=[
-                insn.copy(within_inames=precompute_outer_inames)
-                if insn.id == compute_insn_id
-                else insn
-                for insn in kernel.instructions])
+            statements=[
+                stmt.copy(within_inames=precompute_outer_inames)
+                if stmt.id == compute_stmt_id
+                else stmt
+                for stmt in kernel.statements])
 
     # }}}
 
diff --git a/loopy/transform/save.py b/loopy/transform/save.py
index 2ba2338b0..39824fbd4 100644
--- a/loopy/transform/save.py
+++ b/loopy/transform/save.py
@@ -30,7 +30,7 @@ import six
 from loopy.kernel.data import auto, temp_var_scope
 from pytools import memoize_method, Record
 from loopy.schedule import (
-            EnterLoop, LeaveLoop, RunInstruction,
+            EnterLoop, LeaveLoop, RunStatement,
             CallKernel, ReturnFromKernel, Barrier)
 
 from loopy.schedule.tools import get_block_boundaries
@@ -51,12 +51,12 @@ __doc__ = """
 
 class LivenessResult(dict):
 
-    class InstructionResult(Record):
+    class StatementResult(Record):
         __slots__ = ["live_in", "live_out"]
 
     @classmethod
     def make_empty(cls, nscheditems):
-        return cls((idx, cls.InstructionResult(live_in=set(), live_out=set()))
+        return cls((idx, cls.StatementResult(live_in=set(), live_out=set()))
                    for idx in range(nscheditems))
 
 
@@ -83,7 +83,7 @@ class LivenessAnalysis(object):
                 # Account for empty loop
                 loop_end = block_bounds[sched_idx + 1]
                 after = successors[loop_end] | set([sched_idx + 1])
-            elif isinstance(next_item, (LeaveLoop, RunInstruction,
+            elif isinstance(next_item, (LeaveLoop, RunStatement,
                     CallKernel, ReturnFromKernel, Barrier)):
                 after = set([sched_idx + 1])
             else:
@@ -95,7 +95,7 @@ class LivenessAnalysis(object):
                 # Account for loop
                 loop_begin = block_bounds[sched_idx]
                 after |= set([loop_begin])
-            elif not isinstance(item, (EnterLoop, RunInstruction,
+            elif not isinstance(item, (EnterLoop, RunStatement,
                     CallKernel, ReturnFromKernel, Barrier)):
                 raise LoopyError("unexpected type of schedule item: {ty}"
                     .format(ty=type(item).__name__))
@@ -109,13 +109,13 @@ class LivenessAnalysis(object):
         kill = dict((idx, set()) for idx in range(len(self.schedule)))
 
         for sched_idx, sched_item in enumerate(self.schedule):
-            if not isinstance(sched_item, RunInstruction):
+            if not isinstance(sched_item, RunStatement):
                 continue
-            insn = self.kernel.id_to_insn[sched_item.insn_id]
-            for var in insn.assignee_var_names():
+            stmt = self.kernel.id_to_stmt[sched_item.stmt_id]
+            for var in stmt.assignee_var_names():
                 if var not in self.kernel.temporary_variables:
                     continue
-                if not insn.predicates:
+                if not stmt.predicates:
                     # Fully kills the liveness only when unconditional.
                     kill[sched_idx].add(var)
                 if len(self.kernel.temporary_variables[var].shape) > 0:
@@ -127,7 +127,7 @@ class LivenessAnalysis(object):
                     # or a full write. Instead, we analyze the access
                     # footprint later on to determine how much to reload/save.
                     gen[sched_idx].add(var)
-            for var in insn.read_dependency_names():
+            for var in stmt.read_dependency_names():
                 if var not in self.kernel.temporary_variables:
                     continue
                 gen[sched_idx].add(var)
@@ -174,10 +174,10 @@ class LivenessAnalysis(object):
 
     def __getitem__(self, sched_idx):
         """
-        :arg insn: An instruction name or instance of
-            :class:`loopy.instruction.InstructionBase`
+        :arg stmt: An statement name or instance of
+            :class:`loopy.statement.StatementBase`
 
-        :returns: A :class:`LivenessResult` associated with `insn`
+        :returns: A :class:`LivenessResult` associated with `stmt`
         """
         return self.liveness()[sched_idx]
 
@@ -238,20 +238,20 @@ class TemporarySaver(object):
     def __init__(self, kernel):
         self.kernel = kernel
         self.var_name_gen = kernel.get_var_name_generator()
-        self.insn_name_gen = kernel.get_instruction_id_generator()
+        self.stmt_name_gen = kernel.get_statement_id_generator()
 
         # These fields keep track of updates to the kernel.
-        self.insns_to_insert = []
-        self.insns_to_update = {}
+        self.stmts_to_insert = []
+        self.stmts_to_update = {}
         self.extra_args_to_add = {}
         self.updated_iname_to_tag = {}
         self.updated_temporary_variables = {}
 
-        # temporary name -> save or reload insn ids
+        # temporary name -> save or reload stmt ids
         from collections import defaultdict
         self.temporary_to_save_ids = defaultdict(set)
         self.temporary_to_reload_ids = defaultdict(set)
-        self.subkernel_to_newly_added_insn_ids = defaultdict(set)
+        self.subkernel_to_newly_added_stmt_ids = defaultdict(set)
 
         # Maps names of base_storage to the name of the temporary
         # representative chosen for saves/reloads
@@ -268,9 +268,9 @@ class TemporarySaver(object):
                             arg.name for arg in kernel.args
                             if isinstance(arg, ValueArg)))))
 
-    def find_accessing_instructions_in_subkernel(self, temporary, subkernel):
-        # Find all accessing instructions in the subkernel. If base_storage is
-        # present, this includes instructions that access aliasing memory.
+    def find_accessing_statements_in_subkernel(self, temporary, subkernel):
+        # Find all accessing statements in the subkernel. If base_storage is
+        # present, this includes statements that access aliasing memory.
 
         aliasing_names = set([temporary])
         base_storage = self.kernel.temporary_variables[temporary].base_storage
@@ -278,24 +278,24 @@ class TemporarySaver(object):
         if base_storage is not None:
             aliasing_names |= self.base_storage_to_temporary_map[base_storage]
 
-        from loopy.kernel.tools import get_subkernel_to_insn_id_map
-        accessing_insns_in_subkernel = set()
-        subkernel_insns = get_subkernel_to_insn_id_map(self.kernel)[subkernel]
+        from loopy.kernel.tools import get_subkernel_to_stmt_id_map
+        accessing_stmts_in_subkernel = set()
+        subkernel_stmts = get_subkernel_to_stmt_id_map(self.kernel)[subkernel]
 
         for name in aliasing_names:
             try:
-                accessing_insns_in_subkernel |= (
-                        self.kernel.reader_map()[name] & subkernel_insns)
+                accessing_stmts_in_subkernel |= (
+                        self.kernel.reader_map()[name] & subkernel_stmts)
             except KeyError:
                 pass
 
             try:
-                accessing_insns_in_subkernel |= (
-                        self.kernel.writer_map()[name] & subkernel_insns)
+                accessing_stmts_in_subkernel |= (
+                        self.kernel.writer_map()[name] & subkernel_stmts)
             except KeyError:
                 pass
 
-        return frozenset(accessing_insns_in_subkernel)
+        return frozenset(accessing_stmts_in_subkernel)
 
     @property
     @memoize_method
@@ -356,14 +356,14 @@ class TemporarySaver(object):
         try:
             pre_barrier = next(item for item in
                 self.kernel.schedule[subkernel_start::-1]
-                if is_global_barrier(item)).originating_insn_id
+                if is_global_barrier(item)).originating_stmt_id
         except StopIteration:
             pre_barrier = None
 
         try:
             post_barrier = next(item for item in
                 self.kernel.schedule[subkernel_end:]
-                if is_global_barrier(item)).originating_insn_id
+                if is_global_barrier(item)).originating_stmt_id
         except StopIteration:
             post_barrier = None
 
@@ -379,7 +379,7 @@ class TemporarySaver(object):
         In the case of local temporaries, inames that are tagged
         hw-local do not contribute to the global storage shape.
         """
-        accessor_insn_ids = frozenset(
+        accessor_stmt_ids = frozenset(
             self.kernel.reader_map()[temporary.name]
             | self.kernel.writer_map()[temporary.name])
 
@@ -389,13 +389,13 @@ class TemporarySaver(object):
         def _sortedtags(tags):
             return sorted(tags, key=lambda tag: tag.axis)
 
-        for insn_id in accessor_insn_ids:
-            insn = self.kernel.id_to_insn[insn_id]
+        for stmt_id in accessor_stmt_ids:
+            stmt = self.kernel.id_to_stmt[stmt_id]
 
             my_group_tags = []
             my_local_tags = []
 
-            for iname in insn.within_inames:
+            for iname in stmt.within_inames:
                 tag = self.kernel.iname_to_tag.get(iname)
 
                 if tag is None:
@@ -418,25 +418,25 @@ class TemporarySaver(object):
             if group_tags is None:
                 group_tags = _sortedtags(my_group_tags)
                 local_tags = _sortedtags(my_local_tags)
-                group_tags_originating_insn_id = insn_id
+                group_tags_originating_stmt_id = stmt_id
 
             if (
                     group_tags != _sortedtags(my_group_tags)
                     or local_tags != _sortedtags(my_local_tags)):
                 raise LoopyError(
-                    "inconsistent parallel tags across instructions that access "
-                    "'%s' (specifically, instruction '%s' has tags '%s' but "
-                    "instruction '%s' has tags '%s')"
+                    "inconsistent parallel tags across statements that access "
+                    "'%s' (specifically, statement '%s' has tags '%s' but "
+                    "statement '%s' has tags '%s')"
                     % (temporary.name,
-                       group_tags_originating_insn_id, group_tags + local_tags,
-                       insn_id, my_group_tags + my_local_tags))
+                       group_tags_originating_stmt_id, group_tags + local_tags,
+                       stmt_id, my_group_tags + my_local_tags))
 
         if group_tags is None:
             assert local_tags is None
             return (), ()
 
         group_sizes, local_sizes = (
-            self.kernel.get_grid_sizes_for_insn_ids_as_exprs(accessor_insn_ids))
+            self.kernel.get_grid_sizes_for_stmt_ids_as_exprs(accessor_stmt_ids))
 
         if temporary.scope == lp.temp_var_scope.LOCAL:
             # Elide local axes in the save slot for local temporaries.
@@ -506,7 +506,7 @@ class TemporarySaver(object):
 
         self.new_subdomain = new_subdomain
 
-        save_or_load_insn_id = self.insn_name_gen(
+        save_or_load_stmt_id = self.stmt_name_gen(
             "{name}.{mode}".format(name=temporary, mode=mode))
 
         def add_subscript_if_subscript_nonempty(agg, subscript=()):
@@ -532,15 +532,15 @@ class TemporarySaver(object):
         if mode == "save":
             args = reversed(args)
 
-        accessing_insns_in_subkernel = self.find_accessing_instructions_in_subkernel(
+        accessing_stmts_in_subkernel = self.find_accessing_statements_in_subkernel(
                 temporary, subkernel)
 
         if mode == "save":
-            depends_on = accessing_insns_in_subkernel
+            depends_on = accessing_stmts_in_subkernel
             update_deps = frozenset()
         elif mode == "reload":
             depends_on = frozenset()
-            update_deps = accessing_insns_in_subkernel
+            update_deps = accessing_stmts_in_subkernel
 
         pre_barrier, post_barrier = self.get_enclosing_global_barrier_pair(subkernel)
 
@@ -550,11 +550,11 @@ class TemporarySaver(object):
         if post_barrier is not None:
             update_deps |= set([post_barrier])
 
-        # Create the load / store instruction.
+        # Create the load / store statement.
         from loopy.kernel.data import Assignment
-        save_or_load_insn = Assignment(
+        save_or_load_stmt = Assignment(
             *args,
-            id=save_or_load_insn_id,
+            id=save_or_load_stmt_id,
             within_inames=(
                 self.subkernel_to_surrounding_inames[subkernel]
                 | frozenset(hw_inames + dim_inames)),
@@ -564,18 +564,18 @@ class TemporarySaver(object):
             boostable_into=frozenset())
 
         if mode == "save":
-            self.temporary_to_save_ids[temporary].add(save_or_load_insn_id)
+            self.temporary_to_save_ids[temporary].add(save_or_load_stmt_id)
         else:
-            self.temporary_to_reload_ids[temporary].add(save_or_load_insn_id)
+            self.temporary_to_reload_ids[temporary].add(save_or_load_stmt_id)
 
-        self.subkernel_to_newly_added_insn_ids[subkernel].add(save_or_load_insn_id)
+        self.subkernel_to_newly_added_stmt_ids[subkernel].add(save_or_load_stmt_id)
 
-        self.insns_to_insert.append(save_or_load_insn)
+        self.stmts_to_insert.append(save_or_load_stmt)
 
-        for insn_id in update_deps:
-            insn = self.insns_to_update.get(insn_id, self.kernel.id_to_insn[insn_id])
-            self.insns_to_update[insn_id] = insn.copy(
-                depends_on=insn.depends_on | frozenset([save_or_load_insn_id]))
+        for stmt_id in update_deps:
+            stmt = self.stmts_to_update.get(stmt_id, self.kernel.id_to_stmt[stmt_id])
+            self.stmts_to_update[stmt_id] = stmt.copy(
+                depends_on=stmt.depends_on | frozenset([save_or_load_stmt_id]))
 
         self.updated_temporary_variables[promoted_temporary.name] = (
             promoted_temporary.as_kernel_temporary(self.kernel))
@@ -584,17 +584,17 @@ class TemporarySaver(object):
 
     @memoize_method
     def finish(self):
-        new_instructions = []
+        new_statements = []
 
-        insns_to_insert = dict((insn.id, insn) for insn in self.insns_to_insert)
+        stmts_to_insert = dict((stmt.id, stmt) for stmt in self.stmts_to_insert)
 
-        for orig_insn in self.kernel.instructions:
-            if orig_insn.id in self.insns_to_update:
-                new_instructions.append(self.insns_to_update[orig_insn.id])
+        for orig_stmt in self.kernel.statements:
+            if orig_stmt.id in self.stmts_to_update:
+                new_statements.append(self.stmts_to_update[orig_stmt.id])
             else:
-                new_instructions.append(orig_insn)
-        new_instructions.extend(
-            sorted(insns_to_insert.values(), key=lambda insn: insn.id))
+                new_statements.append(orig_stmt)
+        new_statements.extend(
+            sorted(stmts_to_insert.values(), key=lambda stmt: stmt.id))
 
         self.updated_iname_to_tag.update(self.kernel.iname_to_tag)
         self.updated_temporary_variables.update(self.kernel.temporary_variables)
@@ -606,22 +606,22 @@ class TemporarySaver(object):
 
         kernel = self.kernel.copy(
             domains=new_domains,
-            instructions=new_instructions,
+            statements=new_statements,
             iname_to_tag=self.updated_iname_to_tag,
             temporary_variables=self.updated_temporary_variables,
-            overridden_get_grid_sizes_for_insn_ids=None)
+            overridden_get_grid_sizes_for_stmt_ids=None)
 
         # Add nosync directives to any saves or reloads that were added with a
         # potential dependency chain.
         from loopy.kernel.tools import get_subkernels
         for subkernel in get_subkernels(kernel):
-            relevant_insns = self.subkernel_to_newly_added_insn_ids[subkernel]
+            relevant_stmts = self.subkernel_to_newly_added_stmt_ids[subkernel]
 
             from itertools import product
             for temporary in self.temporary_to_reload_ids:
                 for source, sink in product(
-                        relevant_insns & self.temporary_to_reload_ids[temporary],
-                        relevant_insns & self.temporary_to_save_ids[temporary]):
+                        relevant_stmts & self.temporary_to_reload_ids[temporary],
+                        relevant_stmts & self.temporary_to_save_ids[temporary]):
                     kernel = lp.add_nosync(kernel, "global", source, sink)
 
         from loopy.kernel.tools import assign_automatic_axes
@@ -662,7 +662,7 @@ class TemporarySaver(object):
                             + len(promoted_temporary.hw_dims))
 
         for dim_idx, dim_size in enumerate(promoted_temporary.non_hw_dims):
-            new_iname = self.insn_name_gen("{name}_{mode}_axis_{dim}_{sk}".
+            new_iname = self.stmt_name_gen("{name}_{mode}_axis_{dim}_{sk}".
                 format(name=orig_temporary.name,
                        mode=mode,
                        dim=dim_idx,
@@ -689,7 +689,7 @@ class TemporarySaver(object):
         # Add hardware dims.
         for hw_iname_idx, (hw_tag, dim) in enumerate(
                 zip(promoted_temporary.hw_tags, promoted_temporary.hw_dims)):
-            new_iname = self.insn_name_gen("{name}_{mode}_hw_dim_{dim}_{sk}".
+            new_iname = self.stmt_name_gen("{name}_{mode}_hw_dim_{dim}_{sk}".
                 format(name=orig_temporary.name,
                        mode=mode,
                        dim=hw_iname_idx,
@@ -721,7 +721,7 @@ class TemporarySaver(object):
 
 def save_and_reload_temporaries(knl):
     """
-    Add instructions to save and reload temporary variables that are live
+    Add statements to save and reload temporary variables that are live
     across kernel calls.
 
     The basic code transformation turns schedule segments::
diff --git a/loopy/transform/statement.py b/loopy/transform/statement.py
new file mode 100644
index 000000000..afea0430e
--- /dev/null
+++ b/loopy/transform/statement.py
@@ -0,0 +1,339 @@
+from __future__ import division, absolute_import
+
+__copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import six  # noqa
+
+from loopy.diagnostic import LoopyError
+
+
+# {{{ find_statements
+
+def find_statements(kernel, stmt_match):
+    from loopy.match import parse_match
+    match = parse_match(stmt_match)
+    return [stmt for stmt in kernel.statements if match(kernel, stmt)]
+
+# }}}
+
+
+# {{{ map_statements
+
+def map_statements(kernel, stmt_match, f):
+    from loopy.match import parse_match
+    match = parse_match(stmt_match)
+
+    new_stmts = []
+
+    for stmt in kernel.statements:
+        if match(kernel, stmt):
+            new_stmts.append(f(stmt))
+        else:
+            new_stmts.append(stmt)
+
+    return kernel.copy(statements=new_stmts)
+
+# }}}
+
+
+# {{{ set_statement_priority
+
+def set_statement_priority(kernel, stmt_match, priority):
+    """Set the priority of statements matching *stmt_match* to *priority*.
+
+    *stmt_match* may be any statement id match understood by
+    :func:`loopy.match.parse_match`.
+    """
+
+    def set_prio(stmt):
+        return stmt.copy(priority=priority)
+
+    return map_statements(kernel, stmt_match, set_prio)
+
+# }}}
+
+
+# {{{ add_dependency
+
+def add_dependency(kernel, stmt_match, depends_on):
+    """Add the statement dependency *dependency* to the statements matched
+    by *stmt_match*.
+
+    *stmt_match* and *depends_on* may be any statement id match understood by
+    :func:`loopy.match.parse_match`.
+
+    .. versionchanged:: 2016.3
+
+        Third argument renamed to *depends_on* for clarity, allowed to
+        be not just ID but also match expression.
+    """
+
+    if isinstance(depends_on, str) and depends_on in kernel.id_to_stmt:
+        added_deps = frozenset([depends_on])
+    else:
+        added_deps = frozenset(
+                dep.id for dep in find_statements(kernel, depends_on))
+
+    if not added_deps:
+        raise LoopyError("no statements found matching '%s' "
+                "(to add as dependencies)" % depends_on)
+
+    matched = [False]
+
+    def add_dep(stmt):
+        new_deps = stmt.depends_on
+        matched[0] = True
+        if new_deps is None:
+            new_deps = added_deps
+        else:
+            new_deps = new_deps | added_deps
+
+        return stmt.copy(depends_on=new_deps)
+
+    result = map_statements(kernel, stmt_match, add_dep)
+
+    if not matched[0]:
+        raise LoopyError("no statements found matching '%s' "
+                "(to which dependencies would be added)" % stmt_match)
+
+    return result
+
+# }}}
+
+
+# {{{ remove_statements
+
+def remove_statements(kernel, stmt_ids):
+    """Return a new kernel with statements in *stmt_ids* removed.
+
+    Dependencies across (one, for now) deleted isntructions are propagated.
+    Behavior is undefined for now for chains of dependencies within the
+    set of deleted statements.
+
+    This also updates *no_sync_with* for all statements.
+    """
+
+    if not stmt_ids:
+        return kernel
+
+    assert isinstance(stmt_ids, set)
+    id_to_stmt = kernel.id_to_stmt
+
+    new_stmts = []
+    for stmt in kernel.statements:
+        if stmt.id in stmt_ids:
+            continue
+
+        # transitively propagate dependencies
+        # (only one level for now)
+        if stmt.depends_on is None:
+            depends_on = frozenset()
+        else:
+            depends_on = stmt.depends_on
+
+        new_deps = depends_on - stmt_ids
+
+        for dep_id in depends_on & stmt_ids:
+            new_deps = new_deps | id_to_stmt[dep_id].depends_on
+
+        # update no_sync_with
+
+        new_no_sync_with = frozenset((stmt_id, scope)
+                for stmt_id, scope in stmt.no_sync_with
+                if stmt_id not in stmt_ids)
+
+        new_stmts.append(
+                stmt.copy(depends_on=new_deps, no_sync_with=new_no_sync_with))
+
+    return kernel.copy(
+            statements=new_stmts)
+
+# }}}
+
+
+# {{{ replace_statement_ids
+
+def replace_statement_ids(kernel, replacements):
+    new_stmts = []
+
+    for stmt in kernel.statements:
+        changed = False
+        new_depends_on = []
+        new_no_sync_with = []
+
+        for dep in stmt.depends_on:
+            if dep in replacements:
+                new_depends_on.extend(replacements[dep])
+                changed = True
+            else:
+                new_depends_on.append(dep)
+
+        for stmt_id, scope in stmt.no_sync_with:
+            if stmt_id in replacements:
+                new_no_sync_with.extend(
+                        (repl, scope) for repl in replacements[stmt_id])
+                changed = True
+            else:
+                new_no_sync_with.append((stmt_id, scope))
+
+        new_stmts.append(
+                stmt.copy(
+                    depends_on=frozenset(new_depends_on),
+                    no_sync_with=frozenset(new_no_sync_with))
+                if changed else stmt)
+
+    return kernel.copy(statements=new_stmts)
+
+# }}}
+
+
+# {{{ tag_statements
+
+def tag_statements(kernel, new_tag, within=None):
+    from loopy.match import parse_match
+    within = parse_match(within)
+
+    new_stmts = []
+    for stmt in kernel.statements:
+        if within(kernel, stmt):
+            new_stmts.append(
+                    stmt.copy(tags=stmt.tags | frozenset([new_tag])))
+        else:
+            new_stmts.append(stmt)
+
+    return kernel.copy(statements=new_stmts)
+
+# }}}
+
+
+# {{{ add nosync
+
+def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False):
+    """Add a *no_sync_with* directive between *source* and *sink*.
+    *no_sync_with* is only added if *sink* depends on *source* or
+    if the statement pair is in a conflicting group.
+
+    This function does not check for the presence of a memory dependency.
+
+    :arg kernel: The kernel
+    :arg source: Either a single statement id, or any statement id
+        match understood by :func:`loopy.match.parse_match`.
+    :arg sink: Either a single statement id, or any statement id
+        match understood by :func:`loopy.match.parse_match`.
+    :arg scope: A valid *no_sync_with* scope. See
+        :attr:`loopy.StatementBase.no_sync_with` for allowable scopes.
+    :arg bidirectional: A :class:`bool`. If *True*, add a *no_sync_with*
+        to both the source and sink statements, otherwise the directive
+        is only added to the sink statements.
+    :arg force: A :class:`bool`. If *True*, add a *no_sync_with* directive
+        even without the presence of a dependency edge or conflicting
+        statement group.
+
+    :return: The updated kernel
+    """
+
+    if isinstance(source, str) and source in kernel.id_to_stmt:
+        sources = frozenset([source])
+    else:
+        sources = frozenset(
+                source.id for source in find_statements(kernel, source))
+
+    if isinstance(sink, str) and sink in kernel.id_to_stmt:
+        sinks = frozenset([sink])
+    else:
+        sinks = frozenset(
+                sink.id for sink in find_statements(kernel, sink))
+
+    def stmts_in_conflicting_groups(stmt1_id, stmt2_id):
+        stmt1 = kernel.id_to_stmt[stmt1_id]
+        stmt2 = kernel.id_to_stmt[stmt2_id]
+        return (
+                bool(stmt1.groups & stmt2.conflicts_with_groups)
+                or
+                bool(stmt2.groups & stmt1.conflicts_with_groups))
+
+    from collections import defaultdict
+    nosync_to_add = defaultdict(set)
+
+    for sink in sinks:
+        for source in sources:
+
+            needs_nosync = force or (
+                    source in kernel.recursive_stmt_dep_map()[sink]
+                    or stmts_in_conflicting_groups(source, sink))
+
+            if not needs_nosync:
+                continue
+
+            nosync_to_add[sink].add((source, scope))
+            if bidirectional:
+                nosync_to_add[source].add((sink, scope))
+
+    new_statements = list(kernel.statements)
+
+    for i, stmt in enumerate(new_statements):
+        if stmt.id in nosync_to_add:
+            new_statements[i] = stmt.copy(no_sync_with=stmt.no_sync_with
+                    | frozenset(nosync_to_add[stmt.id]))
+
+    return kernel.copy(statements=new_statements)
+
+# }}}
+
+
+# {{{ uniquify_statement_ids
+
+def uniquify_statement_ids(kernel):
+    """Converts any ids that are :class:`loopy.UniqueName` or *None* into unique
+    strings.
+
+    This function does *not* deduplicate existing statement ids.
+    """
+
+    from loopy.kernel.creation import UniqueName
+
+    stmt_ids = set(
+            stmt.id for stmt in kernel.statements
+            if stmt.id is not None and not isinstance(stmt.id, UniqueName))
+
+    from pytools import UniqueNameGenerator
+    stmt_id_gen = UniqueNameGenerator(stmt_ids)
+
+    new_statements = []
+
+    for stmt in kernel.statements:
+        if stmt.id is None:
+            new_statements.append(
+                    stmt.copy(id=stmt_id_gen("stmt")))
+        elif isinstance(stmt.id, UniqueName):
+            new_statements.append(
+                    stmt.copy(id=stmt_id_gen(stmt.id.name)))
+        else:
+            new_statements.append(stmt)
+
+    return kernel.copy(statements=new_statements)
+
+# }}}
+
+
+# vim: foldmethod=marker
diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py
index 79ceff9fd..0fb706e2d 100644
--- a/loopy/transform/subst.py
+++ b/loopy/transform/subst.py
@@ -40,7 +40,7 @@ logger = logging.getLogger(__name__)
 
 
 class ExprDescriptor(ImmutableRecord):
-    __slots__ = ["insn", "expr", "unif_var_dict"]
+    __slots__ = ["stmt", "expr", "unif_var_dict"]
 
 
 # {{{ extract_subst
@@ -128,7 +128,7 @@ def extract_subst(kernel, subst_name, template, parameters=()):
 
             expr_descriptors.append(
                     ExprDescriptor(
-                        insn=insn,
+                        stmt=stmt,
                         expr=expr,
                         unif_var_dict=dict((lhs.name, rhs)
                             for lhs, rhs in urec.equations)))
@@ -140,8 +140,8 @@ def extract_subst(kernel, subst_name, template, parameters=()):
             CallbackMapper, WalkMapper, IdentityMapper)
     dfmapper = CallbackMapper(gather_exprs, WalkMapper())
 
-    for insn in kernel.instructions:
-        dfmapper(insn.expression)
+    for stmt in kernel.statements:
+        dfmapper(stmt.expression)
 
     for sr in six.itervalues(kernel.substitutions):
         dfmapper(sr.expression)
@@ -151,7 +151,7 @@ def extract_subst(kernel, subst_name, template, parameters=()):
     if not expr_descriptors:
         raise RuntimeError("no expressions matching '%s'" % template)
 
-    # {{{ substitute rule into instructions
+    # {{{ substitute rule into statements
 
     def replace_exprs(expr, mapper):
         found = False
@@ -175,11 +175,11 @@ def extract_subst(kernel, subst_name, template, parameters=()):
 
     cbmapper = CallbackMapper(replace_exprs, IdentityMapper())
 
-    new_insns = []
+    new_stmts = []
 
-    for insn in kernel.instructions:
-        new_expr = cbmapper(insn.expression)
-        new_insns.append(insn.copy(expression=new_expr))
+    for stmt in kernel.statements:
+        new_expr = cbmapper(stmt.expression)
+        new_stmts.append(stmt.copy(expression=new_expr))
 
     from loopy.kernel.data import SubstitutionRule
     new_substs = {
@@ -196,7 +196,7 @@ def extract_subst(kernel, subst_name, template, parameters=()):
     # }}}
 
     return kernel.copy(
-            instructions=new_insns,
+            statements=new_stmts,
             substitutions=new_substs)
 
 # }}}
@@ -205,14 +205,14 @@ def extract_subst(kernel, subst_name, template, parameters=()):
 # {{{ assignment_to_subst
 
 class AssignmentToSubstChanger(RuleAwareIdentityMapper):
-    def __init__(self, rule_mapping_context, lhs_name, definition_insn_ids,
+    def __init__(self, rule_mapping_context, lhs_name, definition_stmt_ids,
             usage_to_definition, extra_arguments, within):
         self.var_name_gen = rule_mapping_context.make_unique_var_name
 
         super(AssignmentToSubstChanger, self).__init__(rule_mapping_context)
 
         self.lhs_name = lhs_name
-        self.definition_insn_ids = definition_insn_ids
+        self.definition_stmt_ids = definition_stmt_ids
         self.usage_to_definition = usage_to_definition
 
         from pymbolic import var
@@ -220,18 +220,18 @@ class AssignmentToSubstChanger(RuleAwareIdentityMapper):
 
         self.within = within
 
-        self.definition_insn_id_to_subst_name = {}
+        self.definition_stmt_id_to_subst_name = {}
 
         self.saw_unmatched_usage_sites = {}
-        for def_id in self.definition_insn_ids:
+        for def_id in self.definition_stmt_ids:
             self.saw_unmatched_usage_sites[def_id] = False
 
-    def get_subst_name(self, def_insn_id):
+    def get_subst_name(self, def_stmt_id):
         try:
-            return self.definition_insn_id_to_subst_name[def_insn_id]
+            return self.definition_stmt_id_to_subst_name[def_stmt_id]
         except KeyError:
             subst_name = self.var_name_gen(self.lhs_name+"_subst")
-            self.definition_insn_id_to_subst_name[def_insn_id] = subst_name
+            self.definition_stmt_id_to_subst_name[def_stmt_id] = subst_name
             return subst_name
 
     def map_variable(self, expr, expn_state):
@@ -255,16 +255,16 @@ class AssignmentToSubstChanger(RuleAwareIdentityMapper):
                 expr, expn_state)
 
     def transform_access(self, index, expn_state):
-        my_insn_id = expn_state.insn_id
+        my_stmt_id = expn_state.stmt_id
 
-        if my_insn_id in self.definition_insn_ids:
+        if my_stmt_id in self.definition_stmt_ids:
             return None
 
-        my_def_id = self.usage_to_definition[my_insn_id]
+        my_def_id = self.usage_to_definition[my_stmt_id]
 
         if not self.within(
                 expn_state.kernel,
-                expn_state.instruction,
+                expn_state.statement,
                 expn_state.stack):
             self.saw_unmatched_usage_sites[my_def_id] = True
             return None
@@ -314,31 +314,31 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
     from loopy.kernel.creation import apply_single_writer_depencency_heuristic
     dep_kernel = apply_single_writer_depencency_heuristic(dep_kernel)
 
-    id_to_insn = dep_kernel.id_to_insn
+    id_to_stmt = dep_kernel.id_to_stmt
 
-    def get_relevant_definition_insn_id(usage_insn_id):
-        insn = id_to_insn[usage_insn_id]
+    def get_relevant_definition_stmt_id(usage_stmt_id):
+        stmt = id_to_stmt[usage_stmt_id]
 
         def_id = set()
-        for dep_id in insn.depends_on:
-            dep_insn = id_to_insn[dep_id]
-            if lhs_name in dep_insn.write_dependency_names():
-                if lhs_name in dep_insn.read_dependency_names():
-                    raise LoopyError("instruction '%s' both reads *and* "
+        for dep_id in stmt.depends_on:
+            dep_stmt = id_to_stmt[dep_id]
+            if lhs_name in dep_stmt.write_dependency_names():
+                if lhs_name in dep_stmt.read_dependency_names():
+                    raise LoopyError("statement '%s' both reads *and* "
                             "writes '%s'--cannot transcribe to substitution "
                             "rule" % (dep_id, lhs_name))
 
                 def_id.add(dep_id)
             else:
-                rec_result = get_relevant_definition_insn_id(dep_id)
+                rec_result = get_relevant_definition_stmt_id(dep_id)
                 if rec_result is not None:
                     def_id.add(rec_result)
 
         if len(def_id) > 1:
             raise LoopyError("more than one write to '%s' found in "
                     "depdendencies of '%s'--definition cannot be resolved "
-                    "(writer instructions ids: %s)"
-                    % (lhs_name, usage_insn_id, ", ".join(def_id)))
+                    "(writer statements ids: %s)"
+                    % (lhs_name, usage_stmt_id, ", ".join(def_id)))
 
         if not def_id:
             return None
@@ -349,26 +349,26 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
 
     usage_to_definition = {}
 
-    for insn in dep_kernel.instructions:
-        if lhs_name not in insn.read_dependency_names():
+    for stmt in dep_kernel.statements:
+        if lhs_name not in stmt.read_dependency_names():
             continue
 
-        def_id = get_relevant_definition_insn_id(insn.id)
+        def_id = get_relevant_definition_stmt_id(stmt.id)
         if def_id is None:
             raise LoopyError("no write to '%s' found in dependency tree "
                     "of '%s'--definition cannot be resolved"
-                    % (lhs_name, insn.id))
+                    % (lhs_name, stmt.id))
 
-        usage_to_definition[insn.id] = def_id
+        usage_to_definition[stmt.id] = def_id
 
-    definition_insn_ids = set()
-    for insn in kernel.instructions:
-        if lhs_name in insn.write_dependency_names():
-            definition_insn_ids.add(insn.id)
+    definition_stmt_ids = set()
+    for stmt in kernel.statements:
+        if lhs_name in stmt.write_dependency_names():
+            definition_stmt_ids.add(stmt.id)
 
     # }}}
 
-    if not definition_insn_ids:
+    if not definition_stmt_ids:
         raise LoopyError("no assignments to variable '%s' found"
                 % lhs_name)
 
@@ -378,7 +378,7 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
     rule_mapping_context = SubstitutionRuleMappingContext(
             kernel.substitutions, kernel.get_var_name_generator())
     tts = AssignmentToSubstChanger(rule_mapping_context,
-            lhs_name, definition_insn_ids,
+            lhs_name, definition_stmt_ids,
             usage_to_definition, extra_arguments, within)
 
     kernel = rule_mapping_context.finish_kernel(tts.map_kernel(kernel))
@@ -388,27 +388,27 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
     # {{{ create new substitution rules
 
     new_substs = kernel.substitutions.copy()
-    for def_id, subst_name in six.iteritems(tts.definition_insn_id_to_subst_name):
-        def_insn = kernel.id_to_insn[def_id]
+    for def_id, subst_name in six.iteritems(tts.definition_stmt_id_to_subst_name):
+        def_stmt = kernel.id_to_stmt[def_id]
 
         from loopy.kernel.data import Assignment
-        assert isinstance(def_insn, Assignment)
+        assert isinstance(def_stmt, Assignment)
 
         from pymbolic.primitives import Variable, Subscript
-        if isinstance(def_insn.assignee, Subscript):
-            indices = def_insn.assignee.index_tuple
-        elif isinstance(def_insn.assignee, Variable):
+        if isinstance(def_stmt.assignee, Subscript):
+            indices = def_stmt.assignee.index_tuple
+        elif isinstance(def_stmt.assignee, Variable):
             indices = ()
         else:
             raise LoopyError(
                     "Unrecognized LHS type: %s"
-                    % type(def_insn.assignee).__name__)
+                    % type(def_stmt.assignee).__name__)
 
         arguments = []
 
         for i in indices:
             if not isinstance(i, Variable):
-                raise LoopyError("In defining instruction '%s': "
+                raise LoopyError("In defining statement '%s': "
                         "asignee index '%s' is not a plain variable. "
                         "Perhaps use loopy.affine_map_inames() "
                         "to perform substitution." % (def_id, i))
@@ -418,7 +418,7 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
         new_substs[subst_name] = SubstitutionRule(
                 name=subst_name,
                 arguments=tuple(arguments) + extra_arguments,
-                expression=def_insn.expression)
+                expression=def_stmt.expression)
 
     # }}}
 
@@ -450,11 +450,11 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None,
     # }}}
 
     import loopy as lp
-    kernel = lp.remove_instructions(
+    kernel = lp.remove_statements(
             kernel,
             set(
-                insn_id
-                for insn_id, still_used in six.iteritems(
+                stmt_id
+                for stmt_id, still_used in six.iteritems(
                     tts.saw_unmatched_usage_sites)
                 if not still_used))
 
diff --git a/loopy/type_inference.py b/loopy/type_inference.py
index 409cbbc5e..b38ab7fa5 100644
--- a/loopy/type_inference.py
+++ b/loopy/type_inference.py
@@ -418,17 +418,17 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander):
 
     type_inf_mapper = type_inf_mapper.copy()
 
-    for writer_insn_id in kernel.writer_map().get(var_name, []):
-        writer_insn = kernel.id_to_insn[writer_insn_id]
-        if not isinstance(writer_insn, lp.MultiAssignmentBase):
+    for writer_stmt_id in kernel.writer_map().get(var_name, []):
+        writer_stmt = kernel.id_to_stmt[writer_stmt_id]
+        if not isinstance(writer_stmt, lp.MultiAssignmentBase):
             continue
 
-        expr = subst_expander(writer_insn.expression)
+        expr = subst_expander(writer_stmt.expression)
 
         debug("             via expr %s", expr)
-        if isinstance(writer_insn, lp.Assignment):
+        if isinstance(writer_stmt, lp.Assignment):
             result = type_inf_mapper(expr, return_dtype_set=True)
-        elif isinstance(writer_insn, lp.CallInstruction):
+        elif isinstance(writer_stmt, lp.CallStatement):
             return_dtype_set = type_inf_mapper(expr, return_tuple=True,
                     return_dtype_set=True)
 
@@ -437,7 +437,7 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander):
                 result_i = None
                 found = False
                 for assignee, comp_dtype_set in zip(
-                        writer_insn.assignee_var_names(), return_dtype_set):
+                        writer_stmt.assignee_var_names(), return_dtype_set):
                     if assignee == var_name:
                         found = True
                         result_i = comp_dtype_set
@@ -526,8 +526,8 @@ def infer_unknown_types(kernel, expect_completion=False):
     dep_graph = dict(
             (written_var, set(
                 read_var
-                for insn_id in writer_map.get(written_var, [])
-                for read_var in kernel.id_to_insn[insn_id].read_dependency_names()
+                for stmt_id in writer_map.get(written_var, [])
+                for read_var in kernel.id_to_stmt[stmt_id].read_dependency_names()
                 if read_var in names_for_type_inference))
             for written_var in names_for_type_inference)
 
diff --git a/proto-tests/test_fem_assembly.py b/proto-tests/test_fem_assembly.py
index a2cba7c57..ba5c7ecaa 100644
--- a/proto-tests/test_fem_assembly.py
+++ b/proto-tests/test_fem_assembly.py
@@ -51,7 +51,7 @@ def test_laplacian_stiffness(ctx_factory):
         # This (mostly) reproduces Figure 3.1.
 
         knl = lp.tag_inames(knl, {"dx_axis": "unr"})
-        return knl, ["K", "i", "j", "q", "ax_b_insn"]
+        return knl, ["K", "i", "j", "q", "ax_b_stmt"]
 
     def variant_pg4(knl):
         # This (mostly) reproduces the unlabeled code snippet on pg. 4.
@@ -60,7 +60,7 @@ def test_laplacian_stiffness(ctx_factory):
         Ncloc = 16
         knl = lp.split_iname(knl, "K", Ncloc,
                 outer_iname="Ko", inner_iname="Kloc")
-        return knl, ["Ko", "Kloc", "i", "j", "q", "ax_b_insn"]
+        return knl, ["Ko", "Kloc", "i", "j", "q", "ax_b_stmt"]
 
     def variant_fig32(knl):
         # This (mostly) reproduces Figure 3.2.
@@ -71,7 +71,7 @@ def test_laplacian_stiffness(ctx_factory):
         knl = lp.precompute(knl, "dPsi", np.float32, ["i", "q", "dx_axis"],
                 default_tag=None)
         knl = lp.tag_inames(knl, {"dx_axis": "unr", "dxi": "unr"})
-        return knl, ["Ko", "Kloc", "dPsi_q", "ij", "i", "j", "q", "ax_b_insn"]
+        return knl, ["Ko", "Kloc", "dPsi_q", "ij", "i", "j", "q", "ax_b_stmt"]
 
     def variant_fig33(knl):
         # This is meant to (mostly) reproduce Figure 3.3.
@@ -97,7 +97,7 @@ def test_laplacian_stiffness(ctx_factory):
                 outer_iname="Ko", inner_iname="Kloc",
                 outer_tag="g.0")
         knl = lp.tag_inames(knl, {"i": "l.1", "j": "l.0"})
-        return knl, ["K", "i", "j", "q", "ax_b_insn"]
+        return knl, ["K", "i", "j", "q", "ax_b_stmt"]
 
     def variant_simple_gpu_prefetch(knl):
         # This adds prefetching to the GPU variant above.
@@ -116,7 +116,7 @@ def test_laplacian_stiffness(ctx_factory):
         knl = lp.add_prefetch(knl, "DPsi", [0, 1, 2])
         knl = lp.add_prefetch(knl, "jacInv", [0, 1, 3])
         knl = lp.add_prefetch(knl, "jacDet", [1])
-        return knl, ["K", "i", "j", "q", "ax_b_insn"]
+        return knl, ["K", "i", "j", "q", "ax_b_stmt"]
 
     # Plug in variant name here
     #                        |
diff --git a/stmt-compat-fixes.patch b/stmt-compat-fixes.patch
index 7c7c9322e..7cd61941e 100644
--- a/stmt-compat-fixes.patch
+++ b/stmt-compat-fixes.patch
@@ -19,8 +19,8 @@ index 68fcca1..6d788df 100644
 +            overridden_get_grid_sizes_for_stmt_ids=None,
 +
 +            # compat
-+            instructions=None,
-+            overridden_get_grid_sizes_for_insn_ids=None,
++            statements=None,
++            overridden_get_grid_sizes_for_stmt_ids=None,
 +            ):
          """
          :arg overridden_get_grid_sizes_for_stmt_ids: A callable. When kernels get
@@ -29,22 +29,22 @@ index 68fcca1..6d788df 100644
              from loopy.kernel.tools import SetOperationCacheManager
              cache_manager = SetOperationCacheManager()
  
-+        if statements is not None and instructions is not None:
-+            raise TypeError("may not specify both instructions and statements")
-+        elif statements is None and instructions is None:
++        if statements is not None and statements is not None:
++            raise TypeError("may not specify both statements and statements")
++        elif statements is None and statements is None:
 +            raise TypeError(
-+                    "must specify exactly one of instructions and statements")
-+        elif instructions is not None:
-+            statements = instructions
++                    "must specify exactly one of statements and statements")
++        elif statements is not None:
++            statements = statements
 +
 +        if (overridden_get_grid_sizes_for_stmt_ids is not None
 +                and overridden_get_grid_sizes_for_stmt_ids is not None):
 +            raise TypeError("may not specify both "
 +                    "overridden_get_grid_sizes_for_stmt_ids "
-+                    "and overridden_get_grid_sizes_for_insn_ids{")
-+        elif overridden_get_grid_sizes_for_insn_ids is not None:
++                    "and overridden_get_grid_sizes_for_stmt_ids{")
++        elif overridden_get_grid_sizes_for_stmt_ids is not None:
 +            overridden_get_grid_sizes_for_stmt_ids = \
-+                    overridden_get_grid_sizes_for_insn_ids
++                    overridden_get_grid_sizes_for_stmt_ids
 +
          # {{{ process assumptions
  
@@ -55,7 +55,7 @@ index 68fcca1..6d788df 100644
              "rules",
 -            "statements",
 +            "Statements",
-+            "instructions",
++            "statements",
              "Dependencies",
              "schedule",
              ])
@@ -64,7 +64,7 @@ index 68fcca1..6d788df 100644
                  lines.append(str(kernel.substitutions[rule_name]))
  
 -        if "statements" in what:
-+        if "Statements" in what or "instructions" in what:
++        if "Statements" in what or "statements" in what:
              lines.extend(sep)
              if show_labels:
                  lines.append("STATEMENTS:")
@@ -72,17 +72,17 @@ index 68fcca1..6d788df 100644
  
      # }}}
  
-+    # {{{ "instruction" compat goop
++    # {{{ "statement" compat goop
 +
 +    @property
-+    def id_to_insn(self):
++    def id_to_stmt(self):
 +        return self.id_to_stmt
 +
 +    @property
-+    def instructions(self):
++    def statements(self):
 +        return self.statements
 +
-+    def get_instruction_id_generator(self, based_on="insn"):
++    def get_statement_id_generator(self, based_on="stmt"):
 +        return self.get_statement_id_generator(based_on)
 +
 +    # }}}
diff --git a/test/test_diff.py b/test/test_diff.py
index 95471f9b1..c4d752349 100644
--- a/test/test_diff.py
+++ b/test/test_diff.py
@@ -65,7 +65,7 @@ def test_diff(ctx_factory):
     dknl, diff_map = diff_kernel(knl, "z", "x")
     dknl = lp.remove_unused_arguments(dknl)
 
-    dknl = lp.add_inames_to_insn(dknl, "diff_i0", "writes:a_dx or writes:a")
+    dknl = lp.add_inames_to_stmt(dknl, "diff_i0", "writes:a_dx or writes:a")
 
     print(dknl)
 
diff --git a/test/test_fortran.py b/test/test_fortran.py
index 6e05aa6ad..4fe79c2bf 100644
--- a/test/test_fortran.py
+++ b/test/test_fortran.py
@@ -267,7 +267,7 @@ def test_tagged(ctx_factory):
 
     knl, = lp.parse_fortran(fortran_src)
 
-    assert sum(1 for insn in lp.find_instructions(knl, "tag:input")) == 2
+    assert sum(1 for stmt in lp.find_statements(knl, "tag:input")) == 2
 
 
 @pytest.mark.parametrize("buffer_inames", [
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 772d536d1..e7c689757 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -290,7 +290,7 @@ def test_rank_one(ctx_factory):
         knl = lp.add_prefetch(knl, "a")
         knl = lp.add_prefetch(knl, "b")
         knl = lp.prioritize_loops(knl, ["i", "j"])
-        knl = lp.add_inames_to_insn(knl, "i", "writes:b_fetch")
+        knl = lp.add_inames_to_stmt(knl, "i", "writes:b_fetch")
         return knl
 
     def variant_2(knl):
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 704fd391f..8fcba1e21 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -628,14 +628,14 @@ def test_vector_ilp_with_prefetch(ctx_factory):
     assert len(list(re.finditer("barrier", code))) == 1
 
 
-def test_c_instruction(ctx_factory):
+def test_c_statement(ctx_factory):
     #logging.basicConfig(level=logging.DEBUG)
     ctx = ctx_factory()
 
     knl = lp.make_kernel(
             "{[i,j]: 0<=i,j<n }",
             [
-                lp.CInstruction("i,j", """
+                lp.CStatement("i,j", """
                     x = sin((float) i*j);
                     """, assignees="x"),
                 "a[i,j] = x",
@@ -653,7 +653,7 @@ def test_c_instruction(ctx_factory):
     print(lp.CompiledKernel(ctx, knl).get_highlighted_code())
 
 
-def test_dependent_domain_insn_iname_finding(ctx_factory):
+def test_dependent_domain_stmt_iname_finding(ctx_factory):
     ctx = ctx_factory()
 
     knl = lp.make_kernel([
@@ -674,7 +674,7 @@ def test_dependent_domain_insn_iname_finding(ctx_factory):
                 "..."])
 
     print(knl)
-    assert "isrc_box" in knl.insn_inames("set_strength")
+    assert "isrc_box" in knl.stmt_inames("set_strength")
 
     print(lp.CompiledKernel(ctx, knl).get_highlighted_code(
             dict(
@@ -700,7 +700,7 @@ def test_inames_deps_from_write_subscript(ctx_factory):
                 "..."])
 
     print(knl)
-    assert "i" in knl.insn_inames("myred")
+    assert "i" in knl.stmt_inames("myred")
 
 
 def test_modulo_indexing(ctx_factory):
@@ -859,7 +859,7 @@ def test_slab_decomposition_does_not_double_execute(ctx_factory):
 
 def test_multiple_writes_to_local_temporary():
     # Loopy would previously only handle barrier insertion correctly if exactly
-    # one instruction wrote to each local temporary. This tests that multiple
+    # one statement wrote to each local temporary. This tests that multiple
     # writes are OK.
 
     knl = lp.make_kernel(
@@ -1042,11 +1042,11 @@ def test_within_inames_and_reduction():
 
     # This is (purposefully) somewhat un-idiomatic, to replicate the conditions
     # under which the above bug was found. If assignees were phi[i], then the
-    # iname propagation heuristic would not assume that dependent instructions
+    # iname propagation heuristic would not assume that dependent statements
     # need to run inside of 'i', and hence the forced_iname_* bits below would not
     # be needed.
 
-    i1 = lp.CInstruction("i",
+    i1 = lp.CStatement("i",
             "doSomethingToGetPhi();",
             assignees="phi")
 
@@ -1068,7 +1068,7 @@ def test_within_inames_and_reduction():
 
     k = lp.preprocess_kernel(k)
 
-    assert 'i' not in k.insn_inames("insn_0_j_update")
+    assert 'i' not in k.stmt_inames("stmt_0_j_update")
     print(k.stringify(with_dependencies=True))
 
 
@@ -1616,7 +1616,7 @@ def test_call_with_no_returned_value(ctx_factory):
 
     knl = lp.make_kernel(
         "{:}",
-        [lp.CallInstruction((), p.Call(p.Variable("f"), ()))]
+        [lp.CallStatement((), p.Call(p.Variable("f"), ()))]
         )
 
     from library_for_test import no_ret_f_mangler, no_ret_f_preamble_gen
@@ -1658,8 +1658,8 @@ def test_unschedulable_kernel_detection():
     assert not lp.has_schedulable_iname_nesting(knl)
     assert len(list(lp.get_iname_duplication_options(knl))) == 4
 
-    for inames, insns in lp.get_iname_duplication_options(knl):
-        fixed_knl = lp.duplicate_inames(knl, inames, insns)
+    for inames, stmts in lp.get_iname_duplication_options(knl):
+        fixed_knl = lp.duplicate_inames(knl, inames, stmts)
         assert lp.has_schedulable_iname_nesting(fixed_knl)
 
     knl = lp.make_kernel(["{[i,j,k,l,m]:0<=i,j,k,l,m<n}"],
@@ -1681,7 +1681,7 @@ def test_regression_no_ret_call_removal(ctx_factory):
             "f(sum(i, x[i]))")
     knl = lp.add_and_infer_dtypes(knl, {"x": np.float32})
     knl = lp.preprocess_kernel(knl)
-    assert len(knl.instructions) == 3
+    assert len(knl.statements) == 3
 
 
 def test_regression_persistent_hash():
@@ -1694,7 +1694,7 @@ def test_regression_persistent_hash():
             "cse_exprvar = d[0]*d[0]")
     from loopy.tools import LoopyKeyBuilder
     lkb = LoopyKeyBuilder()
-    assert lkb(knl1.instructions[0]) != lkb(knl2.instructions[0])
+    assert lkb(knl1.statements[0]) != lkb(knl2.statements[0])
     assert lkb(knl1) != lkb(knl2)
 
 
@@ -2051,7 +2051,7 @@ def test_tight_loop_bounds(ctx_factory):
           end
         end
         """,
-        silenced_warnings="write_race(insn)")
+        silenced_warnings="write_race(stmt)")
 
     knl = lp.split_iname(knl, "i", 5, inner_tag="l.0", outer_tag="g.0")
 
@@ -2071,7 +2071,7 @@ def test_tight_loop_bounds_codegen():
           end
         end
         """,
-        silenced_warnings="write_race(insn)",
+        silenced_warnings="write_race(stmt)",
         target=lp.OpenCLTarget())
 
     knl = lp.split_iname(knl, "i", 5, inner_tag="l.0", outer_tag="g.0")
@@ -2087,20 +2087,20 @@ def test_tight_loop_bounds_codegen():
     assert for_loop in cgr.device_code()
 
 
-def test_unscheduled_insn_detection():
+def test_unscheduled_stmt_detection():
     knl = lp.make_kernel(
         "{ [i]: 0 <= i < 10 }",
         """
-        out[i] = i {id=insn1}
+        out[i] = i {id=stmt1}
         """,
         "...")
 
     knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl))
-    insn1, = lp.find_instructions(knl, "id:insn1")
-    knl.instructions.append(insn1.copy(id="insn2"))
+    stmt1, = lp.find_statements(knl, "id:stmt1")
+    knl.statements.append(stmt1.copy(id="stmt2"))
 
-    from loopy.diagnostic import UnscheduledInstructionError
-    with pytest.raises(UnscheduledInstructionError):
+    from loopy.diagnostic import UnscheduledStatementError
+    with pytest.raises(UnscheduledStatementError):
         lp.generate_code(knl)
 
 
@@ -2198,33 +2198,33 @@ def test_nosync_option_parsing():
     knl = lp.make_kernel(
         "{[i]: 0 <= i < 10}",
         """
-        <>t = 1 {id=insn1,nosync=insn1}
-        t = 2   {id=insn2,nosync=insn1:insn2}
-        t = 3   {id=insn3,nosync=insn1@local:insn2@global:insn3@any}
-        t = 4   {id=insn4,nosync_query=id:insn*@local}
-        t = 5   {id=insn5,nosync_query=id:insn1}
+        <>t = 1 {id=stmt1,nosync=stmt1}
+        t = 2   {id=stmt2,nosync=stmt1:stmt2}
+        t = 3   {id=stmt3,nosync=stmt1@local:stmt2@global:stmt3@any}
+        t = 4   {id=stmt4,nosync_query=id:stmt*@local}
+        t = 5   {id=stmt5,nosync_query=id:stmt1}
         """,
         options=lp.Options(allow_terminal_colors=False))
     kernel_str = str(knl)
     print(kernel_str)
-    assert "id=insn1, no_sync_with=insn1@any" in kernel_str
-    assert "id=insn2, no_sync_with=insn1@any:insn2@any" in kernel_str
-    assert "id=insn3, no_sync_with=insn1@local:insn2@global:insn3@any" in kernel_str
-    assert "id=insn4, no_sync_with=insn1@local:insn2@local:insn3@local:insn5@local" in kernel_str  # noqa
-    assert "id=insn5, no_sync_with=insn1@any" in kernel_str
+    assert "id=stmt1, no_sync_with=stmt1@any" in kernel_str
+    assert "id=stmt2, no_sync_with=stmt1@any:stmt2@any" in kernel_str
+    assert "id=stmt3, no_sync_with=stmt1@local:stmt2@global:stmt3@any" in kernel_str
+    assert "id=stmt4, no_sync_with=stmt1@local:stmt2@local:stmt3@local:stmt5@local" in kernel_str  # noqa
+    assert "id=stmt5, no_sync_with=stmt1@any" in kernel_str
 
 
 def assert_barrier_between(knl, id1, id2, ignore_barriers_in_levels=()):
-    from loopy.schedule import (RunInstruction, Barrier, EnterLoop, LeaveLoop)
+    from loopy.schedule import (RunStatement, Barrier, EnterLoop, LeaveLoop)
     watch_for_barrier = False
     seen_barrier = False
     loop_level = 0
 
     for sched_item in knl.schedule:
-        if isinstance(sched_item, RunInstruction):
-            if sched_item.insn_id == id1:
+        if isinstance(sched_item, RunStatement):
+            if sched_item.stmt_id == id1:
                 watch_for_barrier = True
-            elif sched_item.insn_id == id2:
+            elif sched_item.stmt_id == id2:
                 assert watch_for_barrier
                 assert seen_barrier
                 return
@@ -2313,17 +2313,17 @@ def test_barrier_in_overridden_get_grid_size_expanded_kernel():
     vecsize = 16
     knl = lp.split_iname(knl, 'i', vecsize, inner_tag='l.0')
 
-    # artifically expand via overridden_get_grid_sizes_for_insn_ids
+    # artifically expand via overridden_get_grid_sizes_for_stmt_ids
     class GridOverride(object):
         def __init__(self, clean, vecsize=vecsize):
             self.clean = clean
             self.vecsize = vecsize
 
-        def __call__(self, insn_ids, ignore_auto=True):
-            gsize, _ = self.clean.get_grid_sizes_for_insn_ids(insn_ids, ignore_auto)
+        def __call__(self, stmt_ids, ignore_auto=True):
+            gsize, _ = self.clean.get_grid_sizes_for_stmt_ids(stmt_ids, ignore_auto)
             return gsize, (self.vecsize,)
 
-    knl = knl.copy(overridden_get_grid_sizes_for_insn_ids=GridOverride(
+    knl = knl.copy(overridden_get_grid_sizes_for_stmt_ids=GridOverride(
         knl.copy(), vecsize))
     # make sure we can generate the code
     lp.generate_code_v2(knl)
@@ -2384,7 +2384,7 @@ def test_global_barrier_order_finding():
 
     assert lp.get_global_barrier_order(knl) == ("top", "yoink", "postloop")
 
-    for insn, barrier in (
+    for stmt, barrier in (
             ("nop", None),
             ("top", None),
             ("wr_z", "top"),
@@ -2392,7 +2392,7 @@ def test_global_barrier_order_finding():
             ("yoink", "top"),
             ("postloop", "yoink"),
             ("zzzv", "postloop")):
-        assert lp.find_most_recent_global_barrier(knl, insn) == barrier
+        assert lp.find_most_recent_global_barrier(knl, stmt) == barrier
 
 
 def test_global_barrier_error_if_unordered():
diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py
index 0de08f5f6..5f0d03e72 100644
--- a/test/test_numa_diff.py
+++ b/test/test_numa_diff.py
@@ -59,8 +59,8 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level):  # noqa
            knl for knl in lp.parse_fortran(source, filename, auto_dependencies=False)
            if "KernelR" in knl.name or "KernelS" in knl.name
            ]
-    hsv_r = lp.tag_instructions(hsv_r, "rknl")
-    hsv_s = lp.tag_instructions(hsv_s, "sknl")
+    hsv_r = lp.tag_statements(hsv_r, "rknl")
+    hsv_s = lp.tag_statements(hsv_s, "sknl")
     hsv = lp.fuse_kernels([hsv_r, hsv_s], ["_r", "_s"])
     #hsv = hsv_s
 
@@ -92,8 +92,8 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level):  # noqa
 
     # turn the first reads into subst rules
     local_prep_var_names = set()
-    for insn in lp.find_instructions(hsv, "tag:local_prep"):
-        assignee, = insn.assignee_var_names()
+    for stmt in lp.find_statements(hsv, "tag:local_prep"):
+        assignee, = stmt.assignee_var_names()
         local_prep_var_names.add(assignee)
         hsv = lp.assignment_to_subst(hsv, assignee)
 
@@ -101,8 +101,8 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level):  # noqa
     hsv = lp.assignment_to_subst(hsv, "JinvD_r")
     hsv = lp.assignment_to_subst(hsv, "JinvD_s")
 
-    r_fluxes = lp.find_instructions(hsv, "tag:compute_fluxes and tag:rknl")
-    s_fluxes = lp.find_instructions(hsv, "tag:compute_fluxes and tag:sknl")
+    r_fluxes = lp.find_statements(hsv, "tag:compute_fluxes and tag:rknl")
+    s_fluxes = lp.find_statements(hsv, "tag:compute_fluxes and tag:sknl")
 
     if ilp_multiple > 1:
         hsv = lp.split_iname(hsv, "k", 2, inner_tag="ilp")
@@ -117,15 +117,15 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level):  # noqa
 
     flux_store_idx = 0
 
-    for rflux_insn, sflux_insn in zip(r_fluxes, s_fluxes):
-        for knl_tag, insn, flux_inames, tmps, flux_precomp_inames in [
-                  ("rknl", rflux_insn, ("j", "n",), rtmps, ("jj", "ii",)),
-                  ("sknl", sflux_insn, ("i", "n",), stmps, ("ii", "jj",)),
+    for rflux_stmt, sflux_stmt in zip(r_fluxes, s_fluxes):
+        for knl_tag, stmt, flux_inames, tmps, flux_precomp_inames in [
+                  ("rknl", rflux_stmt, ("j", "n",), rtmps, ("jj", "ii",)),
+                  ("sknl", sflux_stmt, ("i", "n",), stmps, ("ii", "jj",)),
                   ]:
-            flux_var, = insn.assignee_var_names()
-            print(insn)
+            flux_var, = stmt.assignee_var_names()
+            print(stmt)
 
-            reader, = lp.find_instructions(hsv,
+            reader, = lp.find_statements(hsv,
                   "tag:{knl_tag} and reads:{flux_var}"
                   .format(knl_tag=knl_tag, flux_var=flux_var))
 
diff --git a/test/test_reduction.py b/test/test_reduction.py
index 555b8c0cc..86b917a42 100644
--- a/test/test_reduction.py
+++ b/test/test_reduction.py
@@ -415,7 +415,7 @@ def test_parallel_multi_output_reduction(ctx_factory):
 
 def test_reduction_with_conditional():
     # Test whether realization of a reduction inherits predicates
-    # of the original instruction. Tested with the CTarget, because
+    # of the original statement. Tested with the CTarget, because
     # the PyOpenCL target will hoist the conditional into the host
     # code in this minimal example.
     knl = lp.make_kernel(
diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py
index 0571e4191..7056c25a3 100644
--- a/test/test_sem_reagan.py
+++ b/test/test_sem_reagan.py
@@ -94,7 +94,7 @@ def test_tim2d(ctx_factory):
         knl = lp.tag_inames(knl, dict(o="unr"))
         knl = lp.tag_inames(knl, dict(m="unr"))
 
-        knl = lp.set_instruction_priority(knl, "id:D_fetch", 5)
+        knl = lp.set_statement_priority(knl, "id:D_fetch", 5)
         print(knl)
 
         return knl
diff --git a/test/test_transform.py b/test/test_transform.py
index d17f6c707..5bd140e0d 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -210,8 +210,8 @@ def test_extract_subst(ctx_factory):
 
     from loopy.symbolic import parse
 
-    insn, = knl.instructions
-    assert insn.expression == parse("bsquare(23) + bsquare(25)")
+    stmt, = knl.statements
+    assert stmt.expression == parse("bsquare(23) + bsquare(25)")
 
 
 def test_join_inames(ctx_factory):
@@ -405,14 +405,14 @@ def test_precompute_with_preexisting_inames_fail():
 def test_add_nosync():
     orig_knl = lp.make_kernel("{[i]: 0<=i<10}",
         """
-        <>tmp[i] = 10 {id=insn1}
-        <>tmp2[i] = 10 {id=insn2}
+        <>tmp[i] = 10 {id=stmt1}
+        <>tmp2[i] = 10 {id=stmt2}
 
-        <>tmp3[2*i] = 0 {id=insn3}
-        <>tmp4 = 1 + tmp3[2*i] {id=insn4}
+        <>tmp3[2*i] = 0 {id=stmt3}
+        <>tmp4 = 1 + tmp3[2*i] {id=stmt4}
 
-        <>tmp5[i] = 0 {id=insn5,groups=g1}
-        tmp5[i] = 1 {id=insn6,conflicts=g1}
+        <>tmp5[i] = 0 {id=stmt5,groups=g1}
+        tmp5[i] = 1 {id=stmt6,conflicts=g1}
         """)
 
     orig_knl = lp.set_temporary_scope(orig_knl, "tmp3", "local")
@@ -420,39 +420,39 @@ def test_add_nosync():
 
     # No dependency present - don't add nosync
     knl = lp.add_nosync(orig_knl, "any", "writes:tmp", "writes:tmp2")
-    assert frozenset() == knl.id_to_insn["insn2"].no_sync_with
+    assert frozenset() == knl.id_to_stmt["stmt2"].no_sync_with
 
     # Dependency present
     knl = lp.add_nosync(orig_knl, "local", "writes:tmp3", "reads:tmp3")
-    assert frozenset() == knl.id_to_insn["insn3"].no_sync_with
-    assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with
+    assert frozenset() == knl.id_to_stmt["stmt3"].no_sync_with
+    assert frozenset([("stmt3", "local")]) == knl.id_to_stmt["stmt4"].no_sync_with
 
     # Bidirectional
     knl = lp.add_nosync(
             orig_knl, "local", "writes:tmp3", "reads:tmp3", bidirectional=True)
-    assert frozenset([("insn4", "local")]) == knl.id_to_insn["insn3"].no_sync_with
-    assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with
+    assert frozenset([("stmt4", "local")]) == knl.id_to_stmt["stmt3"].no_sync_with
+    assert frozenset([("stmt3", "local")]) == knl.id_to_stmt["stmt4"].no_sync_with
 
     # Groups
-    knl = lp.add_nosync(orig_knl, "local", "insn5", "insn6")
-    assert frozenset([("insn5", "local")]) == knl.id_to_insn["insn6"].no_sync_with
+    knl = lp.add_nosync(orig_knl, "local", "stmt5", "stmt6")
+    assert frozenset([("stmt5", "local")]) == knl.id_to_stmt["stmt6"].no_sync_with
 
 
-def test_uniquify_instruction_ids():
+def test_uniquify_statement_ids():
     i1 = lp.Assignment("b", 1, id=None)
     i2 = lp.Assignment("b", 1, id=None)
     i3 = lp.Assignment("b", 1, id=lp.UniqueName("b"))
     i4 = lp.Assignment("b", 1, id=lp.UniqueName("b"))
 
-    knl = lp.make_kernel("{[i]: i = 1}", []).copy(instructions=[i1, i2, i3, i4])
+    knl = lp.make_kernel("{[i]: i = 1}", []).copy(statements=[i1, i2, i3, i4])
 
-    from loopy.transform.instruction import uniquify_instruction_ids
-    knl = uniquify_instruction_ids(knl)
+    from loopy.transform.statement import uniquify_statement_ids
+    knl = uniquify_statement_ids(knl)
 
-    insn_ids = set(insn.id for insn in knl.instructions)
+    stmt_ids = set(stmt.id for stmt in knl.statements)
 
-    assert len(insn_ids) == 4
-    assert all(isinstance(id, str) for id in insn_ids)
+    assert len(stmt_ids) == 4
+    assert all(isinstance(id, str) for id in stmt_ids)
 
 
 if __name__ == "__main__":
-- 
GitLab