From ca62fb99bf671a534122a67e4e6065651962f38e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 16 Oct 2017 19:42:56 -0500 Subject: [PATCH 1/8] Add initial version of insn-to-statement script --- insn-to-statement.sh | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 insn-to-statement.sh diff --git a/insn-to-statement.sh b/insn-to-statement.sh new file mode 100644 index 000000000..b89796cbc --- /dev/null +++ b/insn-to-statement.sh @@ -0,0 +1,8 @@ +set -x +set -e +sed -i s/Instruction/Statement/g $(git ls-files | grep -v compyte) +sed -i s/instruction/statement/g $(git ls-files | grep -v compyte) +sed -i s/insn/stmt/g $(git ls-files | grep -v compyte) +for d in kernel codegen transform; do + git mv loopy/$d/instruction.py loopy/$d/statement.py +done -- GitLab From e2261aa78d0f4ea0386ff323a8357ae321372eb4 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 17 Oct 2017 09:19:03 -0500 Subject: [PATCH 2/8] Add stringify stmt patch --- 0001-fix-stringify.patch | 27 +++++++++++++++++++++++++++ insn-to-statement.sh | 1 + 2 files changed, 28 insertions(+) create mode 100644 0001-fix-stringify.patch diff --git a/0001-fix-stringify.patch b/0001-fix-stringify.patch new file mode 100644 index 000000000..66906dc1c --- /dev/null +++ b/0001-fix-stringify.patch @@ -0,0 +1,27 @@ +diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py +index 481f567..ae94b07 100644 +--- a/loopy/kernel/__init__.py ++++ b/loopy/kernel/__init__.py +@@ -1092,7 +1092,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): + "tags", + "variables", + "rules", +- "statements", ++ "Statements", ++ "instructions", + "Dependencies", + "schedule", + ]) +@@ -1183,10 +1184,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): + for rule_name in natsorted(six.iterkeys(kernel.substitutions)): + lines.append(str(kernel.substitutions[rule_name])) + +- if "statements" in what: ++ if "Statements" in what or "instructions" in what: + lines.extend(sep) + if show_labels: +- lines.append("INSTRUCTIONS:") ++ lines.append("STATEMENTS:") + loop_list_width = 35 + + # {{{ topological sort diff --git a/insn-to-statement.sh b/insn-to-statement.sh index b89796cbc..68a8264e4 100644 --- a/insn-to-statement.sh +++ b/insn-to-statement.sh @@ -6,3 +6,4 @@ sed -i s/insn/stmt/g $(git ls-files | grep -v compyte) for d in kernel codegen transform; do git mv loopy/$d/instruction.py loopy/$d/statement.py done +patch -p1 < ./0001-fix-stringify.patch -- GitLab From 568e31a2319c0d05ae230dc4c6a367da26175d59 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 17 Oct 2017 10:00:35 -0500 Subject: [PATCH 3/8] Add another stringify fix --- 0002-fix-stringify.patch | 12 ++++++++++++ insn-to-statement.sh | 1 + 2 files changed, 13 insertions(+) create mode 100644 0002-fix-stringify.patch diff --git a/0002-fix-stringify.patch b/0002-fix-stringify.patch new file mode 100644 index 000000000..65801b762 --- /dev/null +++ b/0002-fix-stringify.patch @@ -0,0 +1,12 @@ +diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py +index e71655f..555bb2e 100644 +--- a/loopy/kernel/__init__.py ++++ b/loopy/kernel/__init__.py +@@ -1093,7 +1093,6 @@ class LoopKernel(ImmutableRecordWithoutPickling): + "variables", + "rules", + "Statements", +- "statements", + "Dependencies", + "schedule", + ]) diff --git a/insn-to-statement.sh b/insn-to-statement.sh index 68a8264e4..0a857dac3 100644 --- a/insn-to-statement.sh +++ b/insn-to-statement.sh @@ -7,3 +7,4 @@ for d in kernel codegen transform; do git mv loopy/$d/instruction.py loopy/$d/statement.py done patch -p1 < ./0001-fix-stringify.patch +patch -p1 < ./0002-fix-stringify.patch -- GitLab From 286a9c56a26cf07d539be07615bcc60906f30e28 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 17 Oct 2017 11:36:02 -0500 Subject: [PATCH 4/8] Add all-caps spelling to instruction->stmt replace --- insn-to-statement.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/insn-to-statement.sh b/insn-to-statement.sh index 0a857dac3..4770f10ca 100644 --- a/insn-to-statement.sh +++ b/insn-to-statement.sh @@ -2,6 +2,7 @@ set -x set -e sed -i s/Instruction/Statement/g $(git ls-files | grep -v compyte) sed -i s/instruction/statement/g $(git ls-files | grep -v compyte) +sed -i s/INSTRUCTION/STATEMENT/g $(git ls-files | grep -v compyte) sed -i s/insn/stmt/g $(git ls-files | grep -v compyte) for d in kernel codegen transform; do git mv loopy/$d/instruction.py loopy/$d/statement.py -- GitLab From 6777c90dab16a4cdf25e64b94ac69f0f31e50ca3 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 19 Oct 2017 12:50:25 -0500 Subject: [PATCH 5/8] Update insn-to-statement patches --- 0002-fix-stringify.patch | 12 ------------ 0001-fix-stringify.patch => 0003-fix-stringify.patch | 12 ++++-------- insn-to-statement.sh | 3 +-- 3 files changed, 5 insertions(+), 22 deletions(-) delete mode 100644 0002-fix-stringify.patch rename 0001-fix-stringify.patch => 0003-fix-stringify.patch (66%) diff --git a/0002-fix-stringify.patch b/0002-fix-stringify.patch deleted file mode 100644 index 65801b762..000000000 --- a/0002-fix-stringify.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py -index e71655f..555bb2e 100644 ---- a/loopy/kernel/__init__.py -+++ b/loopy/kernel/__init__.py -@@ -1093,7 +1093,6 @@ class LoopKernel(ImmutableRecordWithoutPickling): - "variables", - "rules", - "Statements", -- "statements", - "Dependencies", - "schedule", - ]) diff --git a/0001-fix-stringify.patch b/0003-fix-stringify.patch similarity index 66% rename from 0001-fix-stringify.patch rename to 0003-fix-stringify.patch index 66906dc1c..b9ad0739a 100644 --- a/0001-fix-stringify.patch +++ b/0003-fix-stringify.patch @@ -1,8 +1,8 @@ diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py -index 481f567..ae94b07 100644 +index 68fcca1..d20dddf 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py -@@ -1092,7 +1092,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): +@@ -1094,7 +1094,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): "tags", "variables", "rules", @@ -12,7 +12,7 @@ index 481f567..ae94b07 100644 "Dependencies", "schedule", ]) -@@ -1183,10 +1184,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): +@@ -1171,7 +1172,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): for rule_name in natsorted(six.iterkeys(kernel.substitutions)): lines.append(str(kernel.substitutions[rule_name])) @@ -20,8 +20,4 @@ index 481f567..ae94b07 100644 + if "Statements" in what or "instructions" in what: lines.extend(sep) if show_labels: -- lines.append("INSTRUCTIONS:") -+ lines.append("STATEMENTS:") - loop_list_width = 35 - - # {{{ topological sort + lines.append("STATEMENTS:") diff --git a/insn-to-statement.sh b/insn-to-statement.sh index 4770f10ca..a6f2eab0c 100644 --- a/insn-to-statement.sh +++ b/insn-to-statement.sh @@ -7,5 +7,4 @@ sed -i s/insn/stmt/g $(git ls-files | grep -v compyte) for d in kernel codegen transform; do git mv loopy/$d/instruction.py loopy/$d/statement.py done -patch -p1 < ./0001-fix-stringify.patch -patch -p1 < ./0002-fix-stringify.patch +patch -p1 < ./0003-fix-stringify.patch -- GitLab From 36d789d0b02fe7cfb39286c69d9d25f318151fdf Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 19 Oct 2017 13:17:07 -0500 Subject: [PATCH 6/8] Update insn-to-statement patches --- 0003-fix-stringify.patch | 23 --------- insn-to-statement.sh | 2 +- stmt-compat-fixes.patch | 100 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 24 deletions(-) delete mode 100644 0003-fix-stringify.patch create mode 100644 stmt-compat-fixes.patch diff --git a/0003-fix-stringify.patch b/0003-fix-stringify.patch deleted file mode 100644 index b9ad0739a..000000000 --- a/0003-fix-stringify.patch +++ /dev/null @@ -1,23 +0,0 @@ -diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py -index 68fcca1..d20dddf 100644 ---- a/loopy/kernel/__init__.py -+++ b/loopy/kernel/__init__.py -@@ -1094,7 +1094,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): - "tags", - "variables", - "rules", -- "statements", -+ "Statements", -+ "instructions", - "Dependencies", - "schedule", - ]) -@@ -1171,7 +1172,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): - for rule_name in natsorted(six.iterkeys(kernel.substitutions)): - lines.append(str(kernel.substitutions[rule_name])) - -- if "statements" in what: -+ if "Statements" in what or "instructions" in what: - lines.extend(sep) - if show_labels: - lines.append("STATEMENTS:") diff --git a/insn-to-statement.sh b/insn-to-statement.sh index a6f2eab0c..26ca317aa 100644 --- a/insn-to-statement.sh +++ b/insn-to-statement.sh @@ -7,4 +7,4 @@ sed -i s/insn/stmt/g $(git ls-files | grep -v compyte) for d in kernel codegen transform; do git mv loopy/$d/instruction.py loopy/$d/statement.py done -patch -p1 < ./0003-fix-stringify.patch +patch -p1 < ./stmt-compat-fixes.patch diff --git a/stmt-compat-fixes.patch b/stmt-compat-fixes.patch new file mode 100644 index 000000000..738a37241 --- /dev/null +++ b/stmt-compat-fixes.patch @@ -0,0 +1,100 @@ +diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py +index 68fcca1..6d788df 100644 +--- a/loopy/kernel/__init__.py ++++ b/loopy/kernel/__init__.py +@@ -186,7 +186,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): + + # {{{ constructor + +- def __init__(self, domains, statements, args=[], schedule=None, ++ def __init__(self, domains, statements=None, args=[], schedule=None, + name="loopy_kernel", + preambles=[], + preamble_generators=[], +@@ -213,7 +213,12 @@ class LoopKernel(ImmutableRecordWithoutPickling): + state=kernel_state.INITIAL, + target=None, + +- overridden_get_grid_sizes_for_stmt_ids=None): ++ overridden_get_grid_sizes_for_stmt_ids=None, ++ ++ # compat ++ instructions=None, ++ overridden_get_grid_sizes_for_insn_ids=None, ++ ): + """ + :arg overridden_get_grid_sizes_for_stmt_ids: A callable. When kernels get + intersected in slab decomposition, their grid sizes shouldn't +@@ -224,6 +229,23 @@ class LoopKernel(ImmutableRecordWithoutPickling): + from loopy.kernel.tools import SetOperationCacheManager + cache_manager = SetOperationCacheManager() + ++ if statements is not None and instructions is not None: ++ raise TypeError("may not specify both instructions and statements") ++ elif statements is None and instructions is None: ++ raise TypeError( ++ "must specify exactly one of instructions and statements") ++ elif instructions is not None: ++ statements = instructions ++ ++ if (overridden_get_grid_sizes_for_stmt_ids is not None ++ and overridden_get_grid_sizes_for_stmt_ids is not None): ++ raise TypeError("may not specify both " ++ "overridden_get_grid_sizes_for_stmt_ids " ++ "and overridden_get_grid_sizes_for_insn_ids{") ++ elif overridden_get_grid_sizes_for_insn_ids is not None: ++ overridden_get_grid_sizes_for_stmt_ids = \ ++ overridden_get_grid_sizes_for_insn_ids ++ + # {{{ process assumptions + + if assumptions is None: +@@ -264,6 +286,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): + assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains) + assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT + ++ + ImmutableRecordWithoutPickling.__init__(self, + domains=domains, + statements=statements, +@@ -1094,7 +1117,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): + "tags", + "variables", + "rules", +- "statements", ++ "Statements", ++ "instructions", + "Dependencies", + "schedule", + ]) +@@ -1171,7 +1195,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): + for rule_name in natsorted(six.iterkeys(kernel.substitutions)): + lines.append(str(kernel.substitutions[rule_name])) + +- if "statements" in what: ++ if "Statements" in what or "instructions" in what: + lines.extend(sep) + if show_labels: + lines.append("STATEMENTS:") +@@ -1387,6 +1411,21 @@ class LoopKernel(ImmutableRecordWithoutPickling): + + # }}} + ++ # {{{ "instruction" compat goop ++ ++ @property ++ def id_to_insn(self): ++ return self.id_to_stmt ++ ++ @property ++ def instructions(self): ++ return self.statements ++ ++ def get_instruction_id_generator(self, based_on="insn"): ++ return self.get_statement_id_generator(based_on) ++ ++ # }}} ++ + # }}} + + # vim: foldmethod=marker -- GitLab From 6f8e4fd689b1a97e82bab4adf1ed50e2018d9756 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 19 Oct 2017 13:18:23 -0500 Subject: [PATCH 7/8] Update insn-to-statement patches --- stmt-compat-fixes.patch | 8 -------- 1 file changed, 8 deletions(-) diff --git a/stmt-compat-fixes.patch b/stmt-compat-fixes.patch index 738a37241..7c7c9322e 100644 --- a/stmt-compat-fixes.patch +++ b/stmt-compat-fixes.patch @@ -49,14 +49,6 @@ index 68fcca1..6d788df 100644 # {{{ process assumptions if assumptions is None: -@@ -264,6 +286,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): - assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains) - assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT - -+ - ImmutableRecordWithoutPickling.__init__(self, - domains=domains, - statements=statements, @@ -1094,7 +1117,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): "tags", "variables", -- GitLab From 7533143210099e2ccc48e910e1e2ba77d194228f Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 19 Oct 2017 13:18:36 -0500 Subject: [PATCH 8/8] Rename instruction -> statement --- MEMO | 22 +- README.rst | 2 +- doc/images/dep-graph-correct.svg | 10 +- doc/images/dep-graph-incorrect.svg | 10 +- doc/images/dep-graph-nesting.svg | 6 +- doc/misc.rst | 12 +- doc/ref_creation.rst | 2 +- doc/ref_kernel.rst | 96 +-- doc/ref_transform.rst | 12 +- doc/tutorial.rst | 76 +- .../fortran/ipython-integration-demo.ipynb | 8 +- examples/python/ispc-stream-harness.py | 4 +- insn-to-statement.sh | 10 +- loopy/__init__.py | 48 +- loopy/check.py | 310 ++++----- loopy/codegen/bounds.py | 10 +- loopy/codegen/control.py | 46 +- loopy/codegen/loop.py | 16 +- loopy/codegen/result.py | 10 +- .../codegen/{instruction.py => statement.py} | 68 +- loopy/diagnostic.py | 2 +- loopy/frontend/fortran/translator.py | 36 +- loopy/kernel/__init__.py | 230 +++--- loopy/kernel/creation.py | 620 ++++++++--------- loopy/kernel/data.py | 12 +- loopy/kernel/{instruction.py => statement.py} | 186 ++--- loopy/kernel/tools.py | 398 +++++------ loopy/loop.py | 4 +- loopy/match.py | 10 +- loopy/maxima.py | 36 +- loopy/options.py | 4 +- loopy/preprocess.py | 654 +++++++++--------- loopy/schedule/__init__.py | 538 +++++++------- loopy/schedule/device_mapping.py | 6 +- loopy/schedule/tools.py | 18 +- loopy/statistics.py | 68 +- loopy/symbolic.py | 42 +- loopy/target/__init__.py | 4 +- loopy/target/c/__init__.py | 36 +- loopy/target/cuda.py | 8 +- loopy/target/ispc.py | 14 +- loopy/target/opencl.py | 8 +- loopy/target/python.py | 8 +- loopy/tools.py | 4 +- loopy/transform/arithmetic.py | 66 +- loopy/transform/batch.py | 6 +- loopy/transform/buffer.py | 68 +- loopy/transform/data.py | 46 +- loopy/transform/diff.py | 60 +- loopy/transform/fusion.py | 54 +- loopy/transform/ilp.py | 44 +- loopy/transform/iname.py | 200 +++--- loopy/transform/instruction.py | 339 --------- loopy/transform/padding.py | 2 +- loopy/transform/precompute.py | 138 ++-- loopy/transform/save.py | 146 ++-- loopy/transform/statement.py | 339 +++++++++ loopy/transform/subst.py | 110 +-- loopy/type_inference.py | 18 +- proto-tests/test_fem_assembly.py | 10 +- stmt-compat-fixes.patch | 34 +- test/test_diff.py | 2 +- test/test_fortran.py | 2 +- test/test_linalg.py | 2 +- test/test_loopy.py | 84 +-- test/test_numa_diff.py | 26 +- test/test_reduction.py | 2 +- test/test_sem_reagan.py | 2 +- test/test_transform.py | 44 +- 69 files changed, 2803 insertions(+), 2765 deletions(-) rename loopy/codegen/{instruction.py => statement.py} (81%) rename loopy/kernel/{instruction.py => statement.py} (88%) delete mode 100644 loopy/transform/instruction.py create mode 100644 loopy/transform/statement.py diff --git a/MEMO b/MEMO index f4e5c34e4..340a3da17 100644 --- a/MEMO +++ b/MEMO @@ -10,7 +10,7 @@ Things to consider - Depedencies are pointwise for shared loop dimensions and global over non-shared ones (between dependent and ancestor) -- multiple insns could fight over which iname gets local axis 0 +- multiple stmts could fight over which iname gets local axis 0 -> complicated optimization problem - Every loop in loopy is opened at most once. @@ -35,7 +35,7 @@ Things to consider - Loopy as a data model for implementing custom rewritings - We won't generate WAW barrier-needing dependencies - from one instruction to itself. + from one statement to itself. - Loopy is semi-interactive. @@ -45,7 +45,7 @@ Things to consider - Dependency on non-local global writes is ill-formed -- No substitution rules allowed on lhs of insns +- No substitution rules allowed on lhs of stmts To-do ^^^^^ @@ -78,7 +78,7 @@ Fixes: old inames may still be around, so the rewrite may or may not have to be applied. -- Group instructions by dependency/inames for scheduling, to +- Group statements by dependency/inames for scheduling, to increase sched. scalability - What if no universally valid precompute base index expression is found? @@ -109,7 +109,7 @@ Future ideas - Check for unordered (no-dependency) writes to the same location -- Vanilla C string instructions? +- Vanilla C string statements? - Barriers for data exchanged via global vars? @@ -183,7 +183,7 @@ Dealt with - Add dependencies after the fact -- Scalar insn priority +- Scalar stmt priority - ScalarArg is a bad name -> renamed to ValueArg @@ -209,8 +209,8 @@ Dealt with -> pending better prefetch spec - Prefetch by sample access -- How is intra-instruction ordering of ILP loops going to be determined? - (taking into account that it could vary even per-instruction?) +- How is intra-statement ordering of ILP loops going to be determined? + (taking into account that it could vary even per-statement?) - Sharing of checks across ILP instances @@ -257,7 +257,7 @@ Dealt with property. - Just touching a variable written to by a non-idempotent - instruction makes that instruction also not idempotent + statement makes that statement also not idempotent -> Idempotent renamed to boostable. -> Done. @@ -274,7 +274,7 @@ Dealt with - Slab decomposition for ILP -> I don't think that's possible. -- It is hard to understand error messages that referred to instructions that +- It is hard to understand error messages that referred to statements that are generated during preprocessing. -> Expose preprocessing to the user so she can inspect the preprocessed @@ -314,7 +314,7 @@ Dealt with - Make syntax for iname dependencies -- make syntax for insn dependencies +- make syntax for stmt dependencies - Implement get_problems() diff --git a/README.rst b/README.rst index 0e551fbed..f58a75de1 100644 --- a/README.rst +++ b/README.rst @@ -28,7 +28,7 @@ It can capture the following types of optimizations: * Loopy Unrolling * Loop tiling with efficient handling of boundary cases * Prefetching/copy optimizations -* Instruction level parallelism +* Statement level parallelism * and many more Loopy targets array-type computations, such as the following: diff --git a/doc/images/dep-graph-correct.svg b/doc/images/dep-graph-correct.svg index 397cb2d10..0bd743391 100644 --- a/doc/images/dep-graph-correct.svg +++ b/doc/images/dep-graph-correct.svg @@ -32,15 +32,15 @@ out[(j, i)] <- a[(i, j)] - -insn - + +stmt + out[(ii, jj)] <- 2*out[(ii, jj)] - -transpose->insn + +transpose->stmt diff --git a/doc/images/dep-graph-incorrect.svg b/doc/images/dep-graph-incorrect.svg index 363080aef..d072248af 100644 --- a/doc/images/dep-graph-incorrect.svg +++ b/doc/images/dep-graph-incorrect.svg @@ -24,15 +24,15 @@ out[(j, i)] <- a[(i, j)] - -insn - + +stmt + out[(i, j)] <- 2*out[(i, j)] - -transpose->insn + +transpose->stmt diff --git a/doc/images/dep-graph-nesting.svg b/doc/images/dep-graph-nesting.svg index 72cb9c463..a50ca0509 100644 --- a/doc/images/dep-graph-nesting.svg +++ b/doc/images/dep-graph-nesting.svg @@ -17,9 +17,9 @@ i - -insn - + +stmt + a[(i, j)] <- 0 diff --git a/doc/misc.rst b/doc/misc.rst index 9db3b85a7..4dba9c7c1 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -138,11 +138,11 @@ This example is included in the :mod:`loopy` distribution as What this does is find nearby "centers" satisfying some criteria for an array of points ("targets"). -Specifying dependencies for groups of instructions is cumbersome. Help? +Specifying dependencies for groups of statements is cumbersome. Help? ----------------------------------------------------------------------- -You can now specify instruction ID prefixes and dependencies for groups -of instructions, like this:: +You can now specify statement ID prefixes and dependencies for groups +of statements, like this:: with {id_prefix=init_m} <> m[0] = ... @@ -253,7 +253,7 @@ This list is always growing, but here are a few pointers: Separated array axes must have a fixed size. (See either :func:`loopy.split_array_axis`.) -* Realization of Instruction-level parallelism +* Realization of Statement-level parallelism Use :func:`loopy.tag_inames` with the ``"ilp"`` tag. ILP loops must have a fixed size. (See either @@ -284,7 +284,7 @@ This list is always growing, but here are a few pointers: Uh-oh. I got a scheduling error. Any hints? ------------------------------------------- -* Make sure that dependencies between instructions are as +* Make sure that dependencies between statements are as you intend. Use :func:`loopy.show_dependency_graph` to check. @@ -304,7 +304,7 @@ Uh-oh. I got a scheduling error. Any hints? * Make sure that your loops are correctly nested. - Print the kernel to make sure all instructions are within + Print the kernel to make sure all statements are within the set of inames you intend them to be in. * One iname is one for loop. diff --git a/doc/ref_creation.rst b/doc/ref_creation.rst index 92eff09c9..9cc02be74 100644 --- a/doc/ref_creation.rst +++ b/doc/ref_creation.rst @@ -6,7 +6,7 @@ Reference: Creating Kernels =========================== -From Loop Domains and Instructions +From Loop Domains and Statements ---------------------------------- .. autofunction:: make_kernel diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index 3f01b0764..cbf881354 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -17,7 +17,7 @@ Example:: A kernel's iteration domain is given by a list of :class:`islpy.BasicSet` instances (which parametrically represent multi-dimensional sets of tuples of integers). They define the integer values of the loop variables -for which instructions (see below) will be executed. +for which statements (see below) will be executed. It is written in :ref:`isl-syntax`. :mod:`loopy` calls the loop variables *inames*. In this case, *i* is the sole iname. The loop domain is given as a conjunction of affine equality @@ -46,7 +46,7 @@ inside of the 'l' loop. The idea is that domains form a forest (a collection of trees), and a "sub-forest" is extracted that covers all the inames for each -instruction. Each individual sub-tree is then checked for branching, +statement. Each individual sub-tree is then checked for branching, which is ill-formed. It is declared ill-formed because intersecting, in the above case, the l, i, and j domains could result in restrictions from the i domain affecting the j domain by way of how i affects l--which would @@ -59,7 +59,7 @@ Inames Loops are (by default) entered exactly once. This is necessary to preserve dependency semantics--otherwise e.g. a fetch could happen inside one loop nest, -and then the instruction using that fetch could be inside a wholly different +and then the statement using that fetch could be inside a wholly different loop nest. .. _isl-syntax: @@ -134,7 +134,7 @@ Tag Meaning ``"l.N"`` Local (intra-group) axis N ("local") ``"g.N"`` Group-number axis N ("group") ``"unr"`` Unroll -``"ilp"`` | ``"ilp.unr"`` Unroll using instruction-level parallelism +``"ilp"`` | ``"ilp.unr"`` Unroll using statement-level parallelism ``"ilp.seq"`` Realize parallel iname as innermost loop ``"like.INAME"`` Can be used when tagging inames to tag like another ``"unused.g"`` | ``"unused.l"`` Can be to tag as the next unused group/local axis @@ -147,18 +147,18 @@ Tag Meaning * Restricts loops to be innermost * Duplicates reduction storage for any reductions nested around ILP usage * Causes a loop (unrolled or not) to be opened/generated for each - involved instruction + involved statement .. }}} -.. _instructions: +.. _statements: -Instructions +Statements ------------ .. {{{ -.. autoclass:: InstructionBase +.. autoclass:: StatementBase .. _assignments: @@ -172,18 +172,18 @@ Assignment objects Textual Assignment Syntax ^^^^^^^^^^^^^^^^^^^^^^^^^ -The general syntax of an instruction is a simple assignment:: +The general syntax of an statement is a simple assignment:: LHS[i,j,k] = EXPRESSION Several extensions of this syntax are defined, as discussed below. They may be combined freely. -You can also use an instruction to declare a new temporary variable. (See +You can also use an statement to declare a new temporary variable. (See :ref:`temporaries`.) See :ref:`types` for what types are acceptable. If the ``LHS`` has a subscript, bounds on the indices are inferred (which must be constants at the time of kernel creation) and the declared temporary is -created as an array. Instructions declaring temporaries have the following +created as an array. Statements declaring temporaries have the following form:: LHS[i,j,k] = EXPRESSION @@ -193,31 +193,31 @@ automatically. This uses the following syntax:: <> LHS[i,j,k] = EXPRESSION -Lastly, each instruction may optionally have a number of attributes +Lastly, each statement may optionally have a number of attributes specified, using the following format:: LHS[i,j,k] = EXPRESSION {attr1,attr2=value1:value2} These are usually key-value pairs. The following attributes are recognized: -* ``id=value`` sets the instruction's identifier to ``value``. ``value`` +* ``id=value`` sets the statement's identifier to ``value``. ``value`` must be unique within the kernel. This identifier is used to refer to the - instruction after it has been created, such as from ``dep`` attributes + statement after it has been created, such as from ``dep`` attributes (see below) or from :mod:`context matches `. -* ``id_prefix=value`` also sets the instruction's identifier, however +* ``id_prefix=value`` also sets the statement's identifier, however uniqueness is ensured by loopy itself, by appending further components (often numbers) to the given ``id_prefix``. -* ``inames=i:j:k`` forces the instruction to reside within the loops over +* ``inames=i:j:k`` forces the statement to reside within the loops over :ref:`inames` ``i``, ``j`` and ``k`` (and only those). .. note:: - The default for the inames that the instruction depends on is - the inames used in the instruction itself plus the common + The default for the inames that the statement depends on is + the inames used in the statement itself plus the common subset of inames shared by writers of all variables read by the - instruction. + statement. You can add a plus sign ("``+``") to the front of this option value to indicate that you would like the inames you specify here @@ -232,9 +232,9 @@ These are usually key-value pairs. The following attributes are recognized: This is a shortcut for calling :func:`loopy.duplicate_inames` later (once the kernel is created). -* ``dep=id1:id2`` creates a dependency of this instruction on the - instructions with identifiers ``id1`` and ``id2``. The meaning of this - dependency is that the code generated for this instruction is required to +* ``dep=id1:id2`` creates a dependency of this statement on the + statements with identifiers ``id1`` and ``id2``. The meaning of this + dependency is that the code generated for this statement is required to appear textually after all of these dependees' generated code. Identifiers here are allowed to be wildcards as defined by the Python @@ -246,14 +246,14 @@ These are usually key-value pairs. The following attributes are recognized: Since specifying all possible dependencies is cumbersome and error-prone, :mod:`loopy` employs a heuristic to automatically find dependencies. Specifically, :mod:`loopy` will automatically add - a dependency to an instruction reading a variable if there is - exactly one instruction writing that variable. ("Variable" here may + a dependency to an statement reading a variable if there is + exactly one statement writing that variable. ("Variable" here may mean either temporary variable or kernel argument.) If each variable in a kernel is only written once, then this heuristic should be able to compute all required dependencies. - Conversely, if a variable is written by two different instructions, + Conversely, if a variable is written by two different statements, all ordering around that variable needs to be specified explicitly. It is recommended to use :func:`get_dot_dependency_graph` to visualize the dependency graph of possible orderings. @@ -262,14 +262,14 @@ These are usually key-value pairs. The following attributes are recognized: heuristic and indicate that the specified list of dependencies is exhaustive. -* ``dep_query=...`` provides an alternative way of specifying instruction +* ``dep_query=...`` provides an alternative way of specifying statement dependencies. The given string is parsed as a match expression object by :func:`loopy.match.parse_match`. Upon kernel generation, this match - expression is used to match instructions in the kernel and add them as + expression is used to match statements in the kernel and add them as dependencies. * ``nosync=id1:id2`` prescribes that no barrier synchronization is necessary - for the instructions with identifiers ``id1`` and ``id2``, even if a + for the statements with identifiers ``id1`` and ``id2``, even if a dependency chain exists and variables are accessed in an apparently racy way. @@ -287,8 +287,8 @@ These are usually key-value pairs. The following attributes are recognized: * `any` As an example, ``nosync=id1@local:id2@global`` prescribes that no local - synchronization is needed with instruction ``id1`` and no global - synchronization is needed with instruction ``id2``. + synchronization is needed with statement ``id1`` and no global + synchronization is needed with statement ``id2``. ``nosync=id1@any`` has the same effect as ``nosync=id1``. @@ -296,25 +296,25 @@ These are usually key-value pairs. The following attributes are recognized: just like ``dep_query`` and ``dep``. As with ``nosync``, ``nosync_query`` accepts an optional `@scope` suffix. -* ``priority=integer`` sets the instructions priority to the value - ``integer``. Instructions with higher priority will be scheduled sooner, +* ``priority=integer`` sets the statements priority to the value + ``integer``. Statements with higher priority will be scheduled sooner, if possible. Note that the scheduler may still schedule a lower-priority - instruction ahead of a higher-priority one if loop orders or dependencies + statement ahead of a higher-priority one if loop orders or dependencies require it. -* ``if=variable1:variable2`` Only execute this instruction if all condition +* ``if=variable1:variable2`` Only execute this statement if all condition variables (which must be scalar variables) evaluate to ``true`` (as defined by C). -* ``tags=tag1:tag2`` Apply tags to this instruction that can then be used +* ``tags=tag1:tag2`` Apply tags to this statement that can then be used for :ref:`context-matching`. -* ``groups=group1:group2`` Make this instruction part of the given - instruction groups. See :class:`InstructionBase.groups`. +* ``groups=group1:group2`` Make this statement part of the given + statement groups. See :class:`StatementBase.groups`. -* ``conflicts_grp=group1:group2`` Make this instruction conflict with the - given instruction groups. See - :class:`InstructionBase.conflicts_with_groups`. +* ``conflicts_grp=group1:group2`` Make this statement conflict with the + given statement groups. See + :class:`StatementBase.conflicts_with_groups`. * ``atomic`` The update embodied by the assignment is carried out atomically. See :attr:`Assignment.atomicity` for precise semantics. @@ -340,15 +340,15 @@ Loopy's expressions are a slight superset of the expressions supported by TODO: Functions TODO: Reductions -Function Call Instructions +Function Call Statements ^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. autoclass:: CallInstruction +.. autoclass:: CallStatement -C Block Instructions +C Block Statements ^^^^^^^^^^^^^^^^^^^^ -.. autoclass:: CInstruction +.. autoclass:: CStatement Atomic Operations ^^^^^^^^^^^^^^^^^ @@ -363,15 +363,15 @@ Atomic Operations .. autoclass:: AtomicUpdate -No-Op Instruction +No-Op Statement ^^^^^^^^^^^^^^^^^ -.. autoclass:: NoOpInstruction +.. autoclass:: NoOpStatement -Barrier Instructions +Barrier Statements ^^^^^^^^^^^^^^^^^^^^ -.. autoclass:: BarrierInstruction +.. autoclass:: BarrierStatement .. }}} diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst index d293e3ebe..a3e43c1fb 100644 --- a/doc/ref_transform.rst +++ b/doc/ref_transform.rst @@ -59,18 +59,18 @@ Padding Data .. autofunction:: add_padding -Manipulating Instructions +Manipulating Statements ------------------------- -.. autofunction:: set_instruction_priority +.. autofunction:: set_statement_priority .. autofunction:: add_dependency -.. autofunction:: remove_instructions +.. autofunction:: remove_statements -.. autofunction:: replace_instruction_ids +.. autofunction:: replace_statement_ids -.. autofunction:: tag_instructions +.. autofunction:: tag_statements .. autofunction:: add_nosync @@ -135,7 +135,7 @@ Setting options Matching contexts ----------------- -TODO: Matching instruction tags +TODO: Matching statement tags .. automodule:: loopy.match diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 8b8538725..20b3610e0 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -88,7 +88,7 @@ The parts that you see here are the two main components of a loopy kernel: passed to the kernel by the user that, in this case, determines the length of the vector being multiplied. -* The **instructions** to be executed. These are generally scalar +* The **statements** to be executed. These are generally scalar assignments between array elements, consisting of a left hand side and a right hand side. See :ref:`assignments` for the full syntax of an assignment. @@ -121,9 +121,9 @@ always see loopy's view of a kernel by printing it. INAME IMPLEMENTATION TAGS: i: None --------------------------------------------------------------------------- - INSTRUCTIONS: + STATEMENTS: for i - out[i] = 2*a[i] {id=insn} + out[i] = 2*a[i] {id=stmt} end i --------------------------------------------------------------------------- @@ -132,7 +132,7 @@ than there was in the input. Most of this comes from default values that loopy assumes to cover common use cases. These defaults can all be overridden. -We've seen the domain and the instructions above, and we'll discuss the +We've seen the domain and the statements above, and we'll discuss the 'iname-to-tag-map' in :ref:`implementing-inames`. The remaining big chunk of added information is in the 'arguments' section, where we observe the following: @@ -307,13 +307,13 @@ that: ``i==17``. Your program is only correct if it produces a valid result irrespective of this ordering. -* In addition, there is (by default) no ordering between instructions - either. In other words, loopy is free to execute the instructions above +* In addition, there is (by default) no ordering between statements + either. In other words, loopy is free to execute the statements above in any order whatsoever. Reading the above two rules, you'll notice that our transpose-and-multiply kernel is incorrect, because it only computes the desired result if the -first instruction completes before the second one. To fix this, we declare +first statement completes before the second one. To fix this, we declare an explicit dependency: .. doctest:: @@ -327,8 +327,8 @@ an explicit dependency: ... """) ``{id=transpose}`` assigns the identifier *transpose* to the first -instruction, and ``{dep=transpose}`` declares a dependency of the second -instruction on the first. Looking at loopy's view of this kernel, we see +statement, and ``{dep=transpose}`` declares a dependency of the second +statement on the first. Looking at loopy's view of this kernel, we see that these dependencies show up there, too: .. doctest:: @@ -340,14 +340,14 @@ that these dependencies show up there, too: ... --------------------------------------------------------------------------- DEPENDENCIES: (use loopy.show_dependency_graph to visualize) - insn : transpose + stmt : transpose --------------------------------------------------------------------------- These dependencies are in a ``dependent : prerequisite`` format that should be familiar if you have previously dealt with Makefiles. For larger kernels, these dependency lists can become quite verbose, and there is an increasing risk that required dependencies are missed. To help catch these, -loopy can also show an instruction dependency graph, using +loopy can also show an statement dependency graph, using :func:`loopy.show_dependency_graph`: .. image:: images/dep-graph-incorrect.svg @@ -360,16 +360,16 @@ graph will open in a browser window. Since manually notating lots of dependencies is cumbersome, loopy has a heuristic: - If a variable is written by exactly one instruction, then all - instructions reading that variable will automatically depend on the - writing instruction. + If a variable is written by exactly one statement, then all + statements reading that variable will automatically depend on the + writing statement. The intent of this heuristic is to cover the common case of a precomputed result being stored and used many times. Generally, these dependencies are *in addition* to any manual dependencies added via ``{dep=...}``. It is possible (but rare) that the heuristic adds undesired dependencies. In this case, ``{dep=*...}`` (i.e. a leading asterisk) to -prevent the heuristic from adding dependencies for this instruction. +prevent the heuristic from adding dependencies for this statement. Loops and dependencies ~~~~~~~~~~~~~~~~~~~~~~ @@ -395,7 +395,7 @@ Let us take a look at the generated code for the above kernel: } } -While our requested instruction ordering has been obeyed, something is +While our requested statement ordering has been obeyed, something is still not right: .. doctest:: @@ -404,7 +404,7 @@ still not right: False For the kernel to perform the desired computation, *all -instances* (loop iterations) of the first instruction need to be completed, +instances* (loop iterations) of the first statement need to be completed, not just the one for the current values of *(i, j)*. Dependencies in loopy act *within* the largest common set of shared @@ -960,7 +960,7 @@ Consider the following example: a_temp[lid(0)] = a[16 * gid(0) + lid(0)]; acc_k = 0.0f; } - barrier(CLK_LOCAL_MEM_FENCE) /* for a_temp (insn_0_k_update depends on insn) */; + barrier(CLK_LOCAL_MEM_FENCE) /* for a_temp (stmt_0_k_update depends on stmt) */; if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0) { for (int k = 0; k <= 15; ++k) @@ -971,8 +971,8 @@ Consider the following example: Observe that *a_temp* was automatically placed in local memory, because it is written in parallel across values of the group-local iname -*i_inner*. In addition, :mod:`loopy` has emitted a barrier instruction to -achieve the :ref:`ordering` specified by the instruction dependencies. +*i_inner*. In addition, :mod:`loopy` has emitted a barrier statement to +achieve the :ref:`ordering` specified by the statement dependencies. (The ``priority=10`` attribute was added to make the output of the test deterministic.) @@ -1045,7 +1045,7 @@ earlier: acc_k = 0.0f; if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0) a_fetch[lid(0)] = a[16 * gid(0) + lid(0)]; - barrier(CLK_LOCAL_MEM_FENCE) /* for a_fetch (insn_k_update depends on a_fetch_rule) */; + barrier(CLK_LOCAL_MEM_FENCE) /* for a_fetch (stmt_k_update depends on a_fetch_rule) */; if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0) { for (int k = 0; k <= 15; ++k) @@ -1108,7 +1108,7 @@ work item: :mod:`loopy` supports two kinds of barriers: * *Local barriers* ensure consistency of local memory accesses to items within - *the same* work group. This synchronizes with all instructions in the work + *the same* work group. This synchronizes with all statements in the work group. * *Global barriers* ensure consistency of global memory accesses @@ -1123,7 +1123,7 @@ all work items reach the same barrier, the kernel will hang during execution. Barrier insertion ~~~~~~~~~~~~~~~~~ -By default, :mod:`loopy` inserts local barriers between two instructions when it +By default, :mod:`loopy` inserts local barriers between two statements when it detects that a dependency involving local memory may occur across work items. To see this in action, take a look at the section on :ref:`local_temporaries`. @@ -1156,11 +1156,11 @@ this, :mod:`loopy` will complain that global barrier needs to be inserted: >>> cgr = lp.generate_code_v2(knl) Traceback (most recent call last): ... - MissingBarrierError: Dependency 'rotate depends on maketmp' (for variable 'arr') requires synchronization by a global barrier (add a 'no_sync_with' instruction option to state that no synchronization is needed) + MissingBarrierError: Dependency 'rotate depends on maketmp' (for variable 'arr') requires synchronization by a global barrier (add a 'no_sync_with' statement option to state that no synchronization is needed) -The syntax for a inserting a global barrier instruction is +The syntax for a inserting a global barrier statement is ``... gbarrier``. :mod:`loopy` also supports manually inserting local -barriers. The syntax for a local barrier instruction is ``... lbarrier``. +barriers. The syntax for a local barrier statement is ``... lbarrier``. Saving temporaries across global barriers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1173,7 +1173,7 @@ of how to use :func:`loopy.save_and_reload_temporaries` which is helpful for that purpose. Let us start with an example. Consider the kernel from above with a -``... gbarrier`` instruction that has already been inserted. +``... gbarrier`` statement that has already been inserted. .. doctest:: @@ -1202,7 +1202,7 @@ Here is what happens when we try to generate code for the kernel: MissingDefinitionError: temporary variable 'tmp' gets used in subkernel 'rotate_v2_0' without a definition (maybe you forgot to call loopy.save_and_reload_temporaries?) This happens due to the kernel splitting done by :mod:`loopy`. The splitting -happens when the instruction schedule is generated. To see the schedule, we +happens when the statement schedule is generated. To see the schedule, we should call :func:`loopy.get_one_scheduled_kernel`: >>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) @@ -1222,7 +1222,7 @@ should call :func:`loopy.get_one_scheduled_kernel`: 6: RETURN FROM KERNEL rotate_v2_0 --------------------------------------------------------------------------- -As the error message suggests, taking a look at the generated instruction +As the error message suggests, taking a look at the generated statement schedule will show that while ``tmp`` is assigned in the first kernel, the assignment to ``tmp`` is not seen by the second kernel. Because the temporary is in private memory, it does not persist across calls to device kernels (the same @@ -1231,13 +1231,13 @@ goes for local temporaries). :mod:`loopy` provides a function called :func:`loopy.save_and_reload_temporaries` for the purpose of handling the task of saving and restoring temporary values across global barriers. This -function adds instructions to the kernel without scheduling them. That means +function adds statements to the kernel without scheduling them. That means that :func:`loopy.get_one_scheduled_kernel` needs to be called one more time to -put those instructions into the schedule. +put those statements into the schedule. >>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl)) >>> knl = lp.save_and_reload_temporaries(knl) - >>> knl = lp.get_one_scheduled_kernel(knl) # Schedule added instructions + >>> knl = lp.get_one_scheduled_kernel(knl) # Schedule added statements >>> print(knl) --------------------------------------------------------------------------- KERNEL: rotate_v2 @@ -1461,7 +1461,7 @@ sign that something is amiss: >>> evt, (out,) = knl(queue, a=a_mat_dev) Traceback (most recent call last): ... - WriteRaceConditionWarning: in kernel transpose: instruction 'a_fetch_rule' looks invalid: it assigns to indices based on local IDs, but its temporary 'a_fetch' cannot be made local because a write race across the iname(s) 'j_inner' would emerge. (Do you need to add an extra iname to your prefetch?) (add 'write_race_local(a_fetch_rule)' to silenced_warnings kernel attribute to disable) + WriteRaceConditionWarning: in kernel transpose: statement 'a_fetch_rule' looks invalid: it assigns to indices based on local IDs, but its temporary 'a_fetch' cannot be made local because a write race across the iname(s) 'j_inner' would emerge. (Do you need to add an extra iname to your prefetch?) (add 'write_race_local(a_fetch_rule)' to silenced_warnings kernel attribute to disable) When we ask to see the code, the issue becomes apparent: @@ -1495,7 +1495,7 @@ Barriers ~~~~~~~~ :mod:`loopy` may infer the need for a barrier when it is not necessary. The -``no_sync_with`` instruction attribute can be used to resolve this. +``no_sync_with`` statement attribute can be used to resolve this. See also :func:`loopy.add_nosync`. @@ -1868,16 +1868,16 @@ Now to make things more interesting, we'll create a kernel with barriers: for (int j = 0; j <= 9; ++j) for (int i = 0; i <= 49; ++i) { - barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn rev-depends on insn_0) */; + barrier(CLK_LOCAL_MEM_FENCE) /* for c (stmt rev-depends on stmt_0) */; c[990 * i + 99 * j + lid(0) + 1] = 2 * a[980 * i + 98 * j + lid(0) + 1]; - barrier(CLK_LOCAL_MEM_FENCE) /* for c (insn_0 depends on insn) */; + barrier(CLK_LOCAL_MEM_FENCE) /* for c (stmt_0 depends on stmt) */; e[980 * i + 98 * j + lid(0) + 1] = c[990 * i + 99 * j + 1 + lid(0) + 1] + c[990 * i + 99 * j + -1 + lid(0) + 1]; } } } -In this kernel, when a thread performs the second instruction it uses data -produced by *different* threads during the first instruction. Because of this, +In this kernel, when a thread performs the second statement it uses data +produced by *different* threads during the first statement. Because of this, barriers are required for correct execution, so loopy inserts them. Now we'll count the barriers using :func:`loopy.get_synchronization_map`: diff --git a/examples/fortran/ipython-integration-demo.ipynb b/examples/fortran/ipython-integration-demo.ipynb index c2b34f1d1..27347bd94 100644 --- a/examples/fortran/ipython-integration-demo.ipynb +++ b/examples/fortran/ipython-integration-demo.ipynb @@ -82,8 +82,8 @@ "INAME IMPLEMENTATION TAGS:\n", "i: None\n", "---------------------------------------------------------------------------\n", - "INSTRUCTIONS:\n", - "[i] out[i] <- a # insn0\n", + "STATEMENTS:\n", + "[i] out[i] <- a # stmt0\n", "---------------------------------------------------------------------------\n" ] } @@ -167,8 +167,8 @@ "i_inner: l.0\n", "i_outer: g.0\n", "---------------------------------------------------------------------------\n", - "INSTRUCTIONS:\n", - "[i_inner,i_outer] out[i_inner + i_outer*128] <- a # insn0\n", + "STATEMENTS:\n", + "[i_inner,i_outer] out[i_inner + i_outer*128] <- a # stmt0\n", "---------------------------------------------------------------------------\n" ] } diff --git a/examples/python/ispc-stream-harness.py b/examples/python/ispc-stream-harness.py index fa581d426..edf49c8aa 100644 --- a/examples/python/ispc-stream-harness.py +++ b/examples/python/ispc-stream-harness.py @@ -59,10 +59,10 @@ def main(): with open("tasksys.cpp", "r") as ts_file: tasksys_source = ts_file.read() - def make_knl(name, insn, vars): + def make_knl(name, stmt, vars): knl = lp.make_kernel( "{[i]: 0<=i helps find segfaults - result = Block([printf_insn, result]) + result = Block([printf_stmt, result]) # }}} return result -def generate_call_code(codegen_state, insn): +def generate_call_code(codegen_state, stmt): kernel = codegen_state.kernel # {{{ vectorization handling if codegen_state.vectorization_info: - if insn.atomicity: + if stmt.atomicity: raise Unvectorizable("atomic operation") # }}} result = codegen_state.ast_builder.emit_multiple_assignment( - codegen_state, insn) + codegen_state, stmt) # {{{ tracing @@ -243,11 +243,11 @@ def generate_call_code(codegen_state, insn): return result -def generate_c_instruction_code(codegen_state, insn): +def generate_c_statement_code(codegen_state, stmt): kernel = codegen_state.kernel if codegen_state.vectorization_info is not None: - raise Unvectorizable("C instructions cannot be vectorized") + raise Unvectorizable("C statements cannot be vectorized") body = [] @@ -255,7 +255,7 @@ def generate_c_instruction_code(codegen_state, insn): from cgen import Initializer, Block, Line from pymbolic.primitives import Variable - for name, iname_expr in insn.iname_exprs: + for name, iname_expr in stmt.iname_exprs: if (isinstance(iname_expr, Variable) and name not in codegen_state.var_subst_map): # No need, the bare symbol will work @@ -270,7 +270,7 @@ def generate_c_instruction_code(codegen_state, insn): if body: body.append(Line()) - body.extend(Line(l) for l in insn.code.split("\n")) + body.extend(Line(l) for l in stmt.code.split("\n")) return Block(body) diff --git a/loopy/diagnostic.py b/loopy/diagnostic.py index 512e4ac86..1c30de8a3 100644 --- a/loopy/diagnostic.py +++ b/loopy/diagnostic.py @@ -100,7 +100,7 @@ class MissingDefinitionError(LoopyError): pass -class UnscheduledInstructionError(LoopyError): +class UnscheduledStatementError(LoopyError): pass diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py index e801d09dc..391ec1414 100644 --- a/loopy/frontend/fortran/translator.py +++ b/loopy/frontend/fortran/translator.py @@ -113,12 +113,12 @@ class Scope(object): self.active_loopy_inames = set() - self.instructions = [] + self.statements = [] self.temporary_variables = [] self.used_names = set() - self.previous_instruction_id = None + self.previous_statement_id = None def known_names(self): return (self.used_names @@ -205,12 +205,12 @@ class F2LoopyTranslator(FTreeWalkerBase): self.scope_stack = [] - self.insn_id_counter = 0 + self.stmt_id_counter = 0 self.condition_id_counter = 0 self.kernels = [] - self.instruction_tags = [] + self.statement_tags = [] self.conditions = [] self.conditions_data = [] @@ -220,23 +220,23 @@ class F2LoopyTranslator(FTreeWalkerBase): self.block_nest = [] - def add_expression_instruction(self, lhs, rhs): + def add_expression_statement(self, lhs, rhs): scope = self.scope_stack[-1] - new_id = intern("insn%d" % self.insn_id_counter) - self.insn_id_counter += 1 + new_id = intern("stmt%d" % self.stmt_id_counter) + self.stmt_id_counter += 1 from loopy.kernel.data import Assignment - insn = Assignment( + stmt = Assignment( lhs, rhs, within_inames=frozenset( scope.active_loopy_inames), id=new_id, predicates=frozenset(self.conditions), - tags=tuple(self.instruction_tags)) + tags=tuple(self.statement_tags)) - scope.previous_instruction_id = new_id - scope.instructions.append(insn) + scope.previous_statement_id = new_id + scope.statements.append(stmt) # {{{ map_XXX functions @@ -413,7 +413,7 @@ class F2LoopyTranslator(FTreeWalkerBase): rhs = scope.process_expression_for_loopy(self.parse_expr(node, node.expr)) - self.add_expression_instruction(lhs, rhs) + self.add_expression_statement(lhs, rhs) def map_Allocate(self, node): raise NotImplementedError("allocate") @@ -464,7 +464,7 @@ class F2LoopyTranslator(FTreeWalkerBase): from pymbolic import var cond_var = var(cond_name) - self.add_expression_instruction( + self.add_expression_statement( cond_var, self.parse_expr(node, node.expr)) cond_expr = cond_var @@ -646,16 +646,16 @@ class F2LoopyTranslator(FTreeWalkerBase): if begin_tag_match: tag = begin_tag_match.group(1) - if tag in self.instruction_tags: + if tag in self.statement_tags: raise TranslationError("nested begin tag for tag '%s'" % tag) - self.instruction_tags.append(tag) + self.statement_tags.append(tag) elif end_tag_match: tag = end_tag_match.group(1) - if tag not in self.instruction_tags: + if tag not in self.statement_tags: raise TranslationError( "end tag without begin tag for tag '%s'" % tag) - self.instruction_tags.remove(tag) + self.statement_tags.remove(tag) elif faulty_loopy_pragma_match is not None: from warnings import warn @@ -710,7 +710,7 @@ class F2LoopyTranslator(FTreeWalkerBase): knl = lp.make_kernel( sub.index_sets, - sub.instructions, + sub.statements, kernel_data, name=sub.subprogram_name, default_order="F", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index bdef1133e..9005fcbfa 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -109,10 +109,10 @@ class LoopKernel(ImmutableRecordWithoutPickling): a list of :class:`islpy.BasicSet` instances representing the :ref:`domain-tree`. - .. attribute:: instructions + .. attribute:: statements - A list of :class:`InstructionBase` instances, e.g. - :class:`Assignment`. See :ref:`instructions`. + A list of :class:`StatementBase` instances, e.g. + :class:`Assignment`. See :ref:`statements`. .. attribute:: args @@ -186,7 +186,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): # {{{ constructor - def __init__(self, domains, instructions, args=[], schedule=None, + def __init__(self, domains, statements=None, args=[], schedule=None, name="loopy_kernel", preambles=[], preamble_generators=[], @@ -213,9 +213,14 @@ class LoopKernel(ImmutableRecordWithoutPickling): state=kernel_state.INITIAL, target=None, - overridden_get_grid_sizes_for_insn_ids=None): + overridden_get_grid_sizes_for_stmt_ids=None, + + # compat + statements=None, + overridden_get_grid_sizes_for_stmt_ids=None, + ): """ - :arg overridden_get_grid_sizes_for_insn_ids: A callable. When kernels get + :arg overridden_get_grid_sizes_for_stmt_ids: A callable. When kernels get intersected in slab decomposition, their grid sizes shouldn't change. This provides a way to forward sub-kernel grid size requests. """ @@ -224,6 +229,23 @@ class LoopKernel(ImmutableRecordWithoutPickling): from loopy.kernel.tools import SetOperationCacheManager cache_manager = SetOperationCacheManager() + if statements is not None and statements is not None: + raise TypeError("may not specify both statements and statements") + elif statements is None and statements is None: + raise TypeError( + "must specify exactly one of statements and statements") + elif statements is not None: + statements = statements + + if (overridden_get_grid_sizes_for_stmt_ids is not None + and overridden_get_grid_sizes_for_stmt_ids is not None): + raise TypeError("may not specify both " + "overridden_get_grid_sizes_for_stmt_ids " + "and overridden_get_grid_sizes_for_stmt_ids{") + elif overridden_get_grid_sizes_for_stmt_ids is not None: + overridden_get_grid_sizes_for_stmt_ids = \ + overridden_get_grid_sizes_for_stmt_ids + # {{{ process assumptions if assumptions is None: @@ -266,7 +288,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): ImmutableRecordWithoutPickling.__init__(self, domains=domains, - instructions=instructions, + statements=statements, args=args, schedule=schedule, name=name, @@ -288,8 +310,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): options=options, state=state, target=target, - overridden_get_grid_sizes_for_insn_ids=( - overridden_get_grid_sizes_for_insn_ids)) + overridden_get_grid_sizes_for_stmt_ids=( + overridden_get_grid_sizes_for_stmt_ids)) self._kernel_executor_cache = {} @@ -375,17 +397,17 @@ class LoopKernel(ImmutableRecordWithoutPickling): def get_var_name_generator(self): return _UniqueVarNameGenerator(self.all_variable_names()) - def get_instruction_id_generator(self, based_on="insn"): - used_ids = set(insn.id for insn in self.instructions) + def get_statement_id_generator(self, based_on="stmt"): + used_ids = set(stmt.id for stmt in self.statements) return UniqueNameGenerator(used_ids) - def make_unique_instruction_id(self, insns=None, based_on="insn", + def make_unique_statement_id(self, stmts=None, based_on="stmt", extra_used_ids=set()): - if insns is None: - insns = self.instructions + if stmts is None: + stmts = self.statements - used_ids = set(insn.id for insn in insns) | extra_used_ids + used_ids = set(stmt.id for stmt in stmts) | extra_used_ids for id_str in generate_unique_names(based_on): if id_str not in used_ids: @@ -393,9 +415,9 @@ class LoopKernel(ImmutableRecordWithoutPickling): def all_group_names(self): result = set() - for insn in self.instructions: - result.update(insn.groups) - result.update(insn.conflicts_with_groups) + for stmt in self.statements: + result.update(stmt.groups) + result.update(stmt.conflicts_with_groups) return frozenset(result) @@ -417,8 +439,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): @property @memoize_method - def id_to_insn(self): - return dict((insn.id, insn) for insn in self.instructions) + def id_to_stmt(self): + return dict((stmt.id, stmt) for stmt in self.statements) # }}} @@ -659,35 +681,35 @@ class LoopKernel(ImmutableRecordWithoutPickling): return intern_frozenset_of_ids(all_params-all_inames) @memoize_method - def all_insn_inames(self): - """Return a mapping from instruction ids to inames inside which + def all_stmt_inames(self): + """Return a mapping from statement ids to inames inside which they should be run. """ result = {} - for insn in self.instructions: - result[insn.id] = insn.within_inames + for stmt in self.statements: + result[stmt.id] = stmt.within_inames return result @memoize_method def all_referenced_inames(self): result = set() - for inames in six.itervalues(self.all_insn_inames()): + for inames in six.itervalues(self.all_stmt_inames()): result.update(inames) return result - def insn_inames(self, insn): - if isinstance(insn, str): - insn = self.id_to_insn[insn] - return insn.within_inames + def stmt_inames(self, stmt): + if isinstance(stmt, str): + stmt = self.id_to_stmt[stmt] + return stmt.within_inames @memoize_method - def iname_to_insns(self): + def iname_to_stmts(self): result = dict( (iname, set()) for iname in self.all_inames()) - for insn in self.instructions: - for iname in self.insn_inames(insn): - result[iname].add(insn.id) + for stmt in self.statements: + for iname in self.stmt_inames(stmt): + result[iname].add(stmt.id) return result @@ -727,31 +749,31 @@ class LoopKernel(ImmutableRecordWithoutPickling): # {{{ dependency wrangling @memoize_method - def recursive_insn_dep_map(self): - """Returns a :class:`dict` mapping an instruction IDs *a* - to all instruction IDs it directly or indirectly depends + def recursive_stmt_dep_map(self): + """Returns a :class:`dict` mapping an statement IDs *a* + to all statement IDs it directly or indirectly depends on. """ result = {} - def compute_deps(insn_id): + def compute_deps(stmt_id): try: - return result[insn_id] + return result[stmt_id] except KeyError: pass - insn = self.id_to_insn[insn_id] - insn_result = set(insn.depends_on) + stmt = self.id_to_stmt[stmt_id] + stmt_result = set(stmt.depends_on) - for dep in list(insn.depends_on): - insn_result.update(compute_deps(dep)) + for dep in list(stmt.depends_on): + stmt_result.update(compute_deps(dep)) - result[insn_id] = frozenset(insn_result) - return insn_result + result[stmt_id] = frozenset(stmt_result) + return stmt_result - for insn in self.instructions: - compute_deps(insn.id) + for stmt in self.statements: + compute_deps(stmt.id) return result @@ -762,7 +784,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): @memoize_method def reader_map(self): """ - :return: a dict that maps variable names to ids of insns that read that + :return: a dict that maps variable names to ids of stmts that read that variable. """ result = {} @@ -771,39 +793,39 @@ class LoopKernel(ImmutableRecordWithoutPickling): set(arg.name for arg in self.args) | set(six.iterkeys(self.temporary_variables))) - for insn in self.instructions: - for var_name in insn.read_dependency_names() & admissible_vars: - result.setdefault(var_name, set()).add(insn.id) + for stmt in self.statements: + for var_name in stmt.read_dependency_names() & admissible_vars: + result.setdefault(var_name, set()).add(stmt.id) return result @memoize_method def writer_map(self): """ - :return: a dict that maps variable names to ids of insns that write + :return: a dict that maps variable names to ids of stmts that write to that variable. """ result = {} - for insn in self.instructions: - for var_name in insn.assignee_var_names(): - result.setdefault(var_name, set()).add(insn.id) + for stmt in self.statements: + for var_name in stmt.assignee_var_names(): + result.setdefault(var_name, set()).add(stmt.id) return result @memoize_method def get_read_variables(self): result = set() - for insn in self.instructions: - result.update(insn.read_dependency_names()) + for stmt in self.statements: + result.update(stmt.read_dependency_names()) return result @memoize_method def get_written_variables(self): return frozenset( var_name - for insn in self.instructions - for var_name in insn.assignee_var_names()) + for stmt in self.statements + for var_name in stmt.assignee_var_names()) @memoize_method def get_temporary_to_base_storage_map(self): @@ -902,29 +924,29 @@ class LoopKernel(ImmutableRecordWithoutPickling): constants_only=True))) @memoize_method - def get_grid_sizes_for_insn_ids(self, insn_ids, ignore_auto=False): + def get_grid_sizes_for_stmt_ids(self, stmt_ids, ignore_auto=False): """Return a tuple (global_size, local_size) containing a grid that - could accommodate execution of all instructions whose IDs are given - in *insn_ids*. + could accommodate execution of all statements whose IDs are given + in *stmt_ids*. - :arg insn_ids: a :class:`frozenset` of instruction IDs + :arg stmt_ids: a :class:`frozenset` of statement IDs *global_size* and *local_size* are :class:`islpy.PwAff` objects. """ - if self.overridden_get_grid_sizes_for_insn_ids: - return self.overridden_get_grid_sizes_for_insn_ids( - insn_ids, + if self.overridden_get_grid_sizes_for_stmt_ids: + return self.overridden_get_grid_sizes_for_stmt_ids( + stmt_ids, ignore_auto=ignore_auto) - all_inames_by_insns = set() - for insn_id in insn_ids: - all_inames_by_insns |= self.insn_inames(insn_id) + all_inames_by_stmts = set() + for stmt_id in stmt_ids: + all_inames_by_stmts |= self.stmt_inames(stmt_id) - if not all_inames_by_insns <= self.all_inames(): - raise RuntimeError("some inames collected from instructions (%s) " + if not all_inames_by_stmts <= self.all_inames(): + raise RuntimeError("some inames collected from statements (%s) " "are not present in domain (%s)" - % (", ".join(sorted(all_inames_by_insns)), + % (", ".join(sorted(all_inames_by_stmts)), ", ".join(sorted(self.all_inames())))) global_sizes = {} @@ -934,7 +956,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): GroupIndexTag, LocalIndexTag, AutoLocalIndexTagBase) - for iname in all_inames_by_insns: + for iname in all_inames_by_stmts: tag = self.iname_to_tag.get(iname) if isinstance(tag, GroupIndexTag): @@ -995,18 +1017,18 @@ class LoopKernel(ImmutableRecordWithoutPickling): return (to_dim_tuple(global_sizes, "global"), to_dim_tuple(local_sizes, "local", forced_sizes=self.local_sizes)) - def get_grid_sizes_for_insn_ids_as_exprs(self, insn_ids, ignore_auto=False): + def get_grid_sizes_for_stmt_ids_as_exprs(self, stmt_ids, ignore_auto=False): """Return a tuple (global_size, local_size) containing a grid that - could accommodate execution of all instructions whose IDs are given - in *insn_ids*. + could accommodate execution of all statements whose IDs are given + in *stmt_ids*. - :arg insn_ids: a :class:`frozenset` of instruction IDs + :arg stmt_ids: a :class:`frozenset` of statement IDs *global_size* and *local_size* are :mod:`pymbolic` expressions """ - grid_size, group_size = self.get_grid_sizes_for_insn_ids( - insn_ids, ignore_auto) + grid_size, group_size = self.get_grid_sizes_for_stmt_ids( + stmt_ids, ignore_auto) def tup_to_exprs(tup): from loopy.symbolic import pw_aff_to_expr @@ -1016,23 +1038,23 @@ class LoopKernel(ImmutableRecordWithoutPickling): def get_grid_size_upper_bounds(self, ignore_auto=False): """Return a tuple (global_size, local_size) containing a grid that - could accommodate execution of *all* instructions in the kernel. + could accommodate execution of *all* statements in the kernel. *global_size* and *local_size* are :class:`islpy.PwAff` objects. """ - return self.get_grid_sizes_for_insn_ids( - frozenset(insn.id for insn in self.instructions), + return self.get_grid_sizes_for_stmt_ids( + frozenset(stmt.id for stmt in self.statements), ignore_auto=ignore_auto) def get_grid_size_upper_bounds_as_exprs(self, ignore_auto=False): """Return a tuple (global_size, local_size) containing a grid that - could accommodate execution of *all* instructions in the kernel. + could accommodate execution of *all* statements in the kernel. *global_size* and *local_size* are :mod:`pymbolic` expressions """ - return self.get_grid_sizes_for_insn_ids_as_exprs( - frozenset(insn.id for insn in self.instructions), + return self.get_grid_sizes_for_stmt_ids_as_exprs( + frozenset(stmt.id for stmt in self.statements), ignore_auto=ignore_auto) # }}} @@ -1058,12 +1080,12 @@ class LoopKernel(ImmutableRecordWithoutPickling): # {{{ nosync sets @memoize_method - def get_nosync_set(self, insn_id, scope): + def get_nosync_set(self, stmt_id, scope): assert scope in ("local", "global") return frozenset( - insn_id - for insn_id, nosync_scope in self.id_to_insn[insn_id].no_sync_with + stmt_id + for stmt_id, nosync_scope in self.id_to_stmt[stmt_id].no_sync_with if nosync_scope == scope or nosync_scope == "any") # }}} @@ -1094,7 +1116,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): "tags", "variables", "rules", - "instructions", + "Statements", + "statements", "Dependencies", "schedule", ]) @@ -1171,18 +1194,18 @@ class LoopKernel(ImmutableRecordWithoutPickling): for rule_name in natsorted(six.iterkeys(kernel.substitutions)): lines.append(str(kernel.substitutions[rule_name])) - if "instructions" in what: + if "Statements" in what or "statements" in what: lines.extend(sep) if show_labels: - lines.append("INSTRUCTIONS:") + lines.append("STATEMENTS:") - from loopy.kernel.tools import stringify_instruction_list - lines.extend(stringify_instruction_list(kernel)) + from loopy.kernel.tools import stringify_statement_list + lines.extend(stringify_statement_list(kernel)) dep_lines = [] - for insn in kernel.instructions: - if insn.depends_on: - dep_lines.append("%s : %s" % (insn.id, ",".join(insn.depends_on))) + for stmt in kernel.statements: + if stmt.depends_on: + dep_lines.append("%s : %s" % (stmt.id, ",".join(stmt.depends_on))) if "Dependencies" in what and dep_lines: lines.extend(sep) @@ -1307,7 +1330,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): hash_fields = ( "domains", - "instructions", + "statements", "args", "schedule", "name", @@ -1387,6 +1410,21 @@ class LoopKernel(ImmutableRecordWithoutPickling): # }}} + # {{{ "statement" compat goop + + @property + def id_to_stmt(self): + return self.id_to_stmt + + @property + def statements(self): + return self.statements + + def get_statement_id_generator(self, based_on="stmt"): + return self.get_statement_id_generator(based_on) + + # }}} + # }}} # vim: foldmethod=marker diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index c6618d62f..b49a7d9a9 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -31,7 +31,7 @@ from pymbolic.mapper import CSECachingMapperMixin from loopy.tools import intern_frozenset_of_ids from loopy.symbolic import IdentityMapper, WalkMapper from loopy.kernel.data import ( - InstructionBase, + StatementBase, MultiAssignmentBase, Assignment, SubstitutionRule) from loopy.diagnostic import LoopyError, warn_with_kernel @@ -73,7 +73,7 @@ WORD_RE = re.compile(r"\b([a-zA-Z0-9_]+)\b") BRACE_RE = re.compile(r"\$\{([a-zA-Z0-9_]+)\}") -def expand_defines(insn, defines, single_valued=True): +def expand_defines(stmt, defines, single_valued=True): replacements = [()] processed_defines = set() @@ -83,7 +83,7 @@ def expand_defines(insn, defines, single_valued=True): (WORD_RE, r"\b%s\b"), ]: - for match in find_regexp.finditer(insn): + for match in find_regexp.finditer(stmt): define_name = match.group(1) # {{{ don't process the same define multiple times @@ -118,7 +118,7 @@ def expand_defines(insn, defines, single_valued=True): for rep in replacements] for rep in replacements: - rep_value = insn + rep_value = stmt for pattern, val in rep: rep_value = re.sub(pattern, str(val), rep_value) @@ -147,16 +147,16 @@ def expand_defines_in_expr(expr, defines): # }}} -# {{{ instruction options +# {{{ statement options -def get_default_insn_options_dict(): +def get_default_stmt_options_dict(): return { "depends_on": frozenset(), "depends_on_is_final": False, "no_sync_with": frozenset(), "groups": frozenset(), "conflicts_with_groups": frozenset(), - "insn_id": None, + "stmt_id": None, "inames_to_dup": [], "priority": 0, "within_inames_is_final": False, @@ -172,7 +172,7 @@ from collections import namedtuple _NosyncParseResult = namedtuple("_NosyncParseResult", "expr, scope") -def parse_insn_options(opt_dict, options_str, assignee_names=None): +def parse_stmt_options(opt_dict, options_str, assignee_names=None): if options_str is None: return opt_dict @@ -212,10 +212,10 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): raise LoopyError("'id' option may not be specified " "in a 'with' block") - result["insn_id"] = intern(opt_value) + result["stmt_id"] = intern(opt_value) elif opt_key == "id_prefix" and opt_value is not None: - result["insn_id"] = UniqueName(opt_value) + result["stmt_id"] = UniqueName(opt_value) elif opt_key == "priority" and opt_value is not None: if is_with_block: @@ -354,7 +354,7 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): else: raise ValueError( - "unrecognized instruction option '%s' " + "unrecognized statement option '%s' " "(maybe a missing/extraneous =value?)" % opt_key) @@ -363,7 +363,7 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): # }}} -# {{{ parse one instruction +# {{{ parse one statement WITH_OPTIONS_RE = re.compile( r"^" @@ -420,10 +420,10 @@ SUBST_RE = re.compile( r"^\s*(?P.+?)\s*:=\s*(?P.+)\s*$") -def parse_insn(groups, insn_options): +def parse_stmt(groups, stmt_options): """ - :return: a tuple ``(insn, inames_to_dup)``, where insn is a - :class:`Assignment`, a :class:`CallInstruction`, + :return: a tuple ``(stmt, inames_to_dup)``, where stmt is a + :class:`Assignment`, a :class:`CallStatement`, or a :class:`SubstitutionRule` and *inames_to_dup* is None or a list of tuples `(old, new)`. """ @@ -488,22 +488,22 @@ def parse_insn(groups, insn_options): temp_var_types = tuple(temp_var_types) del new_lhs - insn_options = parse_insn_options( - insn_options.copy(), + stmt_options = parse_stmt_options( + stmt_options.copy(), groups["options"], assignee_names=assignee_names) - insn_id = insn_options.pop("insn_id", None) - inames_to_dup = insn_options.pop("inames_to_dup", []) + stmt_id = stmt_options.pop("stmt_id", None) + inames_to_dup = stmt_options.pop("inames_to_dup", []) kwargs = dict( id=( - intern(insn_id) - if isinstance(insn_id, str) - else insn_id), - **insn_options) + intern(stmt_id) + if isinstance(stmt_id, str) + else stmt_id), + **stmt_options) - from loopy.kernel.instruction import make_assignment + from loopy.kernel.statement import make_assignment return make_assignment( lhs, rhs, temp_var_types, **kwargs ), inames_to_dup @@ -556,47 +556,47 @@ def parse_subst_rule(groups): # }}} -# {{{ parse_special_insn +# {{{ parse_special_stmt -def parse_special_insn(groups, insn_options): - insn_options = parse_insn_options( - insn_options.copy(), +def parse_special_stmt(groups, stmt_options): + stmt_options = parse_stmt_options( + stmt_options.copy(), groups["options"], assignee_names=()) - del insn_options["atomicity"] + del stmt_options["atomicity"] - insn_id = insn_options.pop("insn_id", None) - inames_to_dup = insn_options.pop("inames_to_dup", []) + stmt_id = stmt_options.pop("stmt_id", None) + inames_to_dup = stmt_options.pop("inames_to_dup", []) kwargs = dict( id=( - intern(insn_id) - if isinstance(insn_id, str) - else insn_id), - **insn_options) + intern(stmt_id) + if isinstance(stmt_id, str) + else stmt_id), + **stmt_options) - from loopy.kernel.instruction import NoOpInstruction, BarrierInstruction - special_insn_kind = groups["kind"] + from loopy.kernel.statement import NoOpStatement, BarrierStatement + special_stmt_kind = groups["kind"] - if special_insn_kind == "gbarrier": - cls = BarrierInstruction + if special_stmt_kind == "gbarrier": + cls = BarrierStatement kwargs["kind"] = "global" - elif special_insn_kind == "lbarrier": - cls = BarrierInstruction + elif special_stmt_kind == "lbarrier": + cls = BarrierStatement kwargs["kind"] = "local" - elif special_insn_kind == "nop": - cls = NoOpInstruction + elif special_stmt_kind == "nop": + cls = NoOpStatement else: raise LoopyError( - "invalid kind of special instruction: '%s'" % special_insn_kind) + "invalid kind of special statement: '%s'" % special_stmt_kind) return cls(**kwargs), inames_to_dup # }}} -# {{{ parse_instructions +# {{{ parse_statements _PAREN_PAIRS = { "(": (+1, "("), @@ -619,184 +619,184 @@ def _count_open_paren_symbols(s): return result -def parse_instructions(instructions, defines): - if isinstance(instructions, str): - instructions = [instructions] +def parse_statements(statements, defines): + if isinstance(statements, str): + statements = [statements] substitutions = {} - new_instructions = [] + new_statements = [] # {{{ pass 1: interning, comments, whitespace - for insn in instructions: - if isinstance(insn, SubstitutionRule): - substitutions[insn.name] = insn + for stmt in statements: + if isinstance(stmt, SubstitutionRule): + substitutions[stmt.name] = stmt continue - elif isinstance(insn, InstructionBase): + elif isinstance(stmt, StatementBase): def intern_if_str(s): if isinstance(s, str): return intern(s) else: return s - new_instructions.append( - insn.copy( - id=intern(insn.id) if isinstance(insn.id, str) else insn.id, + new_statements.append( + stmt.copy( + id=intern(stmt.id) if isinstance(stmt.id, str) else stmt.id, depends_on=frozenset(intern_if_str(dep) - for dep in insn.depends_on), - groups=frozenset(intern(grp) for grp in insn.groups), + for dep in stmt.depends_on), + groups=frozenset(intern(grp) for grp in stmt.groups), conflicts_with_groups=frozenset( - intern(grp) for grp in insn.conflicts_with_groups), + intern(grp) for grp in stmt.conflicts_with_groups), within_inames=frozenset( - intern(iname) for iname in insn.within_inames), + intern(iname) for iname in stmt.within_inames), )) continue - elif not isinstance(insn, str): - raise TypeError("Instructions must be either an Instruction " + elif not isinstance(stmt, str): + raise TypeError("Statements must be either an Statement " "instance or a parseable string. got '%s' instead." - % type(insn)) + % type(stmt)) - for insn in insn.split("\n"): - comment_start = insn.find("#") + for stmt in stmt.split("\n"): + comment_start = stmt.find("#") if comment_start >= 0: - insn = insn[:comment_start] + stmt = stmt[:comment_start] - insn = insn.strip() - if not insn: + stmt = stmt.strip() + if not stmt: continue - new_instructions.append(insn) + new_statements.append(stmt) # }}} - instructions = new_instructions - new_instructions = [] + statements = new_statements + new_statements = [] # {{{ pass 2: join-by-paren - insn_buffer = None + stmt_buffer = None - for i, insn in enumerate(instructions): - if isinstance(insn, InstructionBase): - if insn_buffer is not None: - raise LoopyError("cannot join instruction lines " + for i, stmt in enumerate(statements): + if isinstance(stmt, StatementBase): + if stmt_buffer is not None: + raise LoopyError("cannot join statement lines " "by paren-like delimiters " - "across InstructionBase instance at instructions index %d" + "across StatementBase instance at statements index %d" % i) - new_instructions.append(insn) + new_statements.append(stmt) else: - if insn_buffer is not None: - insn_buffer = insn_buffer + " " + insn - if _count_open_paren_symbols(insn_buffer) == 0: - new_instructions.append(insn_buffer) - insn_buffer = None + if stmt_buffer is not None: + stmt_buffer = stmt_buffer + " " + stmt + if _count_open_paren_symbols(stmt_buffer) == 0: + new_statements.append(stmt_buffer) + stmt_buffer = None else: - if _count_open_paren_symbols(insn) == 0: - new_instructions.append(insn) + if _count_open_paren_symbols(stmt) == 0: + new_statements.append(stmt) else: - insn_buffer = insn + stmt_buffer = stmt - if insn_buffer is not None: - raise LoopyError("unclosed paren-like delimiter at end of 'instructions' " + if stmt_buffer is not None: + raise LoopyError("unclosed paren-like delimiter at end of 'statements' " "while attempting to join lines by paren-like delimiters") # }}} - instructions = new_instructions - new_instructions = [] + statements = new_statements + new_statements = [] # {{{ pass 3: defines - for insn in instructions: - if isinstance(insn, InstructionBase): - new_instructions.append(insn) + for stmt in statements: + if isinstance(stmt, StatementBase): + new_statements.append(stmt) else: - for sub_insn in expand_defines(insn, defines, single_valued=False): - new_instructions.append(sub_insn) + for sub_stmt in expand_defines(stmt, defines, single_valued=False): + new_statements.append(sub_stmt) # }}} - instructions = new_instructions - new_instructions = [] + statements = new_statements + new_statements = [] - inames_to_dup = [] # one for each parsed_instruction + inames_to_dup = [] # one for each parsed_statement # {{{ pass 4: parsing - insn_options_stack = [get_default_insn_options_dict()] + stmt_options_stack = [get_default_stmt_options_dict()] if_predicates_stack = [ {'predicates': frozenset(), - 'insn_predicates': frozenset()}] + 'stmt_predicates': frozenset()}] - for insn in instructions: - if isinstance(insn, InstructionBase): - local_w_inames = insn_options_stack[-1]["within_inames"] + for stmt in statements: + if isinstance(stmt, StatementBase): + local_w_inames = stmt_options_stack[-1]["within_inames"] - if insn.within_inames_is_final: + if stmt.within_inames_is_final: if not ( - local_w_inames <= insn.within_inames): - raise LoopyError("non-parsed instruction '%s' without " + local_w_inames <= stmt.within_inames): + raise LoopyError("non-parsed statement '%s' without " "inames '%s' (but with final iname dependencies) " "found inside 'for'/'with' block for inames " "'%s'" - % (insn.id, - ", ".join(local_w_inames - insn.within_inames), - insn_options_stack[-1].within_inames)) + % (stmt.id, + ", ".join(local_w_inames - stmt.within_inames), + stmt_options_stack[-1].within_inames)) else: # not final, add inames from current scope kwargs = {} - if insn.id is None: - kwargs["id"] = insn_options_stack[-1]["insn_id"] + if stmt.id is None: + kwargs["id"] = stmt_options_stack[-1]["stmt_id"] - insn = insn.copy( - within_inames=insn.within_inames | local_w_inames, + stmt = stmt.copy( + within_inames=stmt.within_inames | local_w_inames, within_inames_is_final=( # If it's inside a for/with block, then it's # final now. bool(local_w_inames)), depends_on=( - (insn.depends_on - | insn_options_stack[-1]["depends_on"]) - if insn_options_stack[-1]["depends_on"] is not None - else insn.depends_on), + (stmt.depends_on + | stmt_options_stack[-1]["depends_on"]) + if stmt_options_stack[-1]["depends_on"] is not None + else stmt.depends_on), tags=( - insn.tags - | insn_options_stack[-1]["tags"]), + stmt.tags + | stmt_options_stack[-1]["tags"]), predicates=( - insn.predicates - | insn_options_stack[-1]["predicates"]), + stmt.predicates + | stmt_options_stack[-1]["predicates"]), groups=( - insn.groups - | insn_options_stack[-1]["groups"]), + stmt.groups + | stmt_options_stack[-1]["groups"]), conflicts_with_groups=( - insn.groups - | insn_options_stack[-1]["conflicts_with_groups"]), + stmt.groups + | stmt_options_stack[-1]["conflicts_with_groups"]), **kwargs) - new_instructions.append(insn) + new_statements.append(stmt) inames_to_dup.append([]) del local_w_inames continue - with_options_match = WITH_OPTIONS_RE.match(insn) + with_options_match = WITH_OPTIONS_RE.match(stmt) if with_options_match is not None: - insn_options_stack.append( - parse_insn_options( - insn_options_stack[-1], + stmt_options_stack.append( + parse_stmt_options( + stmt_options_stack[-1], with_options_match.group("options"))) continue - for_match = FOR_RE.match(insn) + for_match = FOR_RE.match(stmt) if for_match is not None: - options = insn_options_stack[-1].copy() + options = stmt_options_stack[-1].copy() added_inames = frozenset( iname.strip() for iname in for_match.group("inames").split(",") @@ -809,13 +809,13 @@ def parse_instructions(instructions, defines): | added_inames) options["within_inames_is_final"] = True - insn_options_stack.append(options) + stmt_options_stack.append(options) del options continue - if_match = IF_RE.match(insn) + if_match = IF_RE.match(stmt) if if_match is not None: - options = insn_options_stack[-1].copy() + options = stmt_options_stack[-1].copy() predicate = if_match.group("predicate") if not predicate: raise LoopyError("'if' without predicate encountered") @@ -827,27 +827,27 @@ def parse_instructions(instructions, defines): options.get("predicates", frozenset()) | frozenset([predicate])) - insn_options_stack.append(options) + stmt_options_stack.append(options) #add to the if_stack if_options = options.copy() - if_options['insn_predicates'] = options["predicates"] + if_options['stmt_predicates'] = options["predicates"] if_predicates_stack.append(if_options) del options del predicate continue - elif_match = ELIF_RE.match(insn) - else_match = ELSE_RE.match(insn) + elif_match = ELIF_RE.match(stmt) + else_match = ELSE_RE.match(stmt) if elif_match is not None or else_match is not None: - prev_predicates = insn_options_stack[-1].get( + prev_predicates = stmt_options_stack[-1].get( "predicates", frozenset()) last_if_predicates = if_predicates_stack[-1].get( "predicates", frozenset()) - insn_options_stack.pop() + stmt_options_stack.pop() if_predicates_stack.pop() - outer_predicates = insn_options_stack[-1].get( + outer_predicates = stmt_options_stack[-1].get( "predicates", frozenset()) last_if_predicates = last_if_predicates - outer_predicates @@ -867,8 +867,8 @@ def parse_instructions(instructions, defines): raise LoopyError("'else' without 'if'/'elif' encountered") additional_preds = frozenset() - options = insn_options_stack[-1].copy() - if_options = insn_options_stack[-1].copy() + options = stmt_options_stack[-1].copy() + if_options = stmt_options_stack[-1].copy() from pymbolic.primitives import LogicalNot options["predicates"] = ( @@ -881,9 +881,9 @@ def parse_instructions(instructions, defines): ) if_options["predicates"] = additional_preds #hold on to this for comparison / stack popping later - if_options["insn_predicates"] = options["predicates"] + if_options["stmt_predicates"] = options["predicates"] - insn_options_stack.append(options) + stmt_options_stack.append(options) if_predicates_stack.append(if_options) del options @@ -892,53 +892,53 @@ def parse_instructions(instructions, defines): continue - if insn == "end": - obj = insn_options_stack.pop() + if stmt == "end": + obj = stmt_options_stack.pop() #if this object is the end of an if statement - if obj['predicates'] == if_predicates_stack[-1]["insn_predicates"] and\ - if_predicates_stack[-1]["insn_predicates"]: + if obj['predicates'] == if_predicates_stack[-1]["stmt_predicates"] and\ + if_predicates_stack[-1]["stmt_predicates"]: if_predicates_stack.pop() continue - insn_match = SPECIAL_INSN_RE.match(insn) - if insn_match is not None: - insn, insn_inames_to_dup = parse_special_insn( - insn_match.groupdict(), insn_options_stack[-1]) - new_instructions.append(insn) - inames_to_dup.append(insn_inames_to_dup) + stmt_match = SPECIAL_INSN_RE.match(stmt) + if stmt_match is not None: + stmt, stmt_inames_to_dup = parse_special_stmt( + stmt_match.groupdict(), stmt_options_stack[-1]) + new_statements.append(stmt) + inames_to_dup.append(stmt_inames_to_dup) continue - subst_match = SUBST_RE.match(insn) + subst_match = SUBST_RE.match(stmt) if subst_match is not None: subst = parse_subst_rule(subst_match.groupdict()) substitutions[subst.name] = subst continue - insn_match = INSN_RE.match(insn) - if insn_match is not None: - insn, insn_inames_to_dup = parse_insn( - insn_match.groupdict(), insn_options_stack[-1]) - new_instructions.append(insn) - inames_to_dup.append(insn_inames_to_dup) + stmt_match = INSN_RE.match(stmt) + if stmt_match is not None: + stmt, stmt_inames_to_dup = parse_stmt( + stmt_match.groupdict(), stmt_options_stack[-1]) + new_statements.append(stmt) + inames_to_dup.append(stmt_inames_to_dup) continue - insn_match = EMPTY_LHS_INSN_RE.match(insn) - if insn_match is not None: - insn, insn_inames_to_dup = parse_insn( - insn_match.groupdict(), insn_options_stack[-1]) - new_instructions.append(insn) - inames_to_dup.append(insn_inames_to_dup) + stmt_match = EMPTY_LHS_INSN_RE.match(stmt) + if stmt_match is not None: + stmt, stmt_inames_to_dup = parse_stmt( + stmt_match.groupdict(), stmt_options_stack[-1]) + new_statements.append(stmt) + inames_to_dup.append(stmt_inames_to_dup) continue - raise LoopyError("instruction parse error: %s" % insn) + raise LoopyError("statement parse error: %s" % stmt) - if len(insn_options_stack) != 1: + if len(stmt_options_stack) != 1: raise LoopyError("unbalanced number of 'for'/'with' and 'end' " "declarations") # }}} - return new_instructions, inames_to_dup, substitutions + return new_statements, inames_to_dup, substitutions # }}} @@ -1050,10 +1050,10 @@ class IndexRankFinder(CSECachingMapperMixin, WalkMapper): class ArgumentGuesser: - def __init__(self, domains, instructions, temporary_variables, + def __init__(self, domains, statements, temporary_variables, subst_rules, default_offset): self.domains = domains - self.instructions = instructions + self.statements = statements self.temporary_variables = temporary_variables self.subst_rules = subst_rules self.default_offset = default_offset @@ -1073,15 +1073,15 @@ class ArgumentGuesser: self.all_names = set() self.all_written_names = set() from loopy.symbolic import get_dependencies - for insn in instructions: - if isinstance(insn, MultiAssignmentBase): - for assignee_var_name in insn.assignee_var_names(): + for stmt in statements: + if isinstance(stmt, MultiAssignmentBase): + for assignee_var_name in stmt.assignee_var_names(): self.all_written_names.add(assignee_var_name) self.all_names.update(get_dependencies( - self.submap(insn.assignees))) + self.submap(stmt.assignees))) self.all_names.update(get_dependencies( - self.submap(insn.expression))) + self.submap(stmt.expression))) def find_index_rank(self, name): irf = IndexRankFinder(name) @@ -1090,8 +1090,8 @@ class ArgumentGuesser: irf(self.submap(expr)) return expr - for insn in self.instructions: - insn.with_transformed_expressions(run_irf) + for stmt in self.statements: + stmt.with_transformed_expressions(run_irf) if not irf.index_ranks: return 0 @@ -1145,11 +1145,11 @@ class ArgumentGuesser: temp_var_names = set(six.iterkeys(self.temporary_variables)) - for insn in self.instructions: - if isinstance(insn, MultiAssignmentBase): + for stmt in self.statements: + if isinstance(stmt, MultiAssignmentBase): for assignee_var_name, temp_var_type in zip( - insn.assignee_var_names(), - insn.temp_var_types): + stmt.assignee_var_names(), + stmt.temp_var_types): if temp_var_type is not None: temp_var_names.add(assignee_var_name) @@ -1206,14 +1206,14 @@ def check_for_duplicate_names(knl): def check_for_nonexistent_iname_deps(knl): - for insn in knl.instructions: - if not set(insn.within_inames) <= knl.all_inames(): - raise ValueError("In instruction '%s': " + for stmt in knl.statements: + if not set(stmt.within_inames) <= knl.all_inames(): + raise ValueError("In statement '%s': " "cannot force dependency on inames '%s'--" "they don't exist" % ( - insn.id, + stmt.id, ",".join( - set(insn.within_inames)-knl.all_inames()))) + set(stmt.within_inames)-knl.all_inames()))) def check_for_multiple_writes_to_loop_bounds(knl): @@ -1240,8 +1240,8 @@ def check_written_variable_names(knl): set(arg.name for arg in knl.args) | set(six.iterkeys(knl.temporary_variables))) - for insn in knl.instructions: - for var_name in insn.assignee_var_names(): + for stmt in knl.statements: + for var_name in stmt.assignee_var_names(): if var_name not in admissible_vars: raise RuntimeError("variable '%s' not declared or not " "allowed for writing" % var_name) @@ -1284,7 +1284,7 @@ class CSEToAssignmentMapper(IdentityMapper): return var -def expand_cses(instructions, inames_to_dup, cse_prefix="cse_expr"): +def expand_cses(statements, inames_to_dup, cse_prefix="cse_expr"): def add_assignment(base_name, expr, dtype, additional_inames): if base_name is None: base_name = "var" @@ -1305,47 +1305,47 @@ def expand_cses(instructions, inames_to_dup, cse_prefix="cse_expr"): shape=())) from pymbolic.primitives import Variable - new_insn = Assignment( + new_stmt = Assignment( id=None, assignee=Variable(new_var_name), expression=expr, - predicates=insn.predicates, - within_inames=insn.within_inames | additional_inames, - within_inames_is_final=insn.within_inames_is_final, + predicates=stmt.predicates, + within_inames=stmt.within_inames | additional_inames, + within_inames_is_final=stmt.within_inames_is_final, ) - newly_created_insn_ids.add(new_insn.id) - new_insns.append(new_insn) - if insn_inames_to_dup: + newly_created_stmt_ids.add(new_stmt.id) + new_stmts.append(new_stmt) + if stmt_inames_to_dup: raise LoopyError("in-line iname duplication not allowed in " - "an instruction containing a tagged common " - "subexpression (found in instruction '%s')" - % insn) + "an statement containing a tagged common " + "subexpression (found in statement '%s')" + % stmt) - new_inames_to_dup.append(insn_inames_to_dup) + new_inames_to_dup.append(stmt_inames_to_dup) return new_var_name cseam = CSEToAssignmentMapper(add_assignment=add_assignment) - new_insns = [] + new_stmts = [] new_inames_to_dup = [] from pytools import UniqueNameGenerator var_name_gen = UniqueNameGenerator(forced_prefix=cse_prefix) - newly_created_insn_ids = set() + newly_created_stmt_ids = set() new_temp_vars = [] - for insn, insn_inames_to_dup in zip(instructions, inames_to_dup): - if isinstance(insn, MultiAssignmentBase): - new_insns.append(insn.copy( - expression=cseam(insn.expression, frozenset()))) - new_inames_to_dup.append(insn_inames_to_dup) + for stmt, stmt_inames_to_dup in zip(statements, inames_to_dup): + if isinstance(stmt, MultiAssignmentBase): + new_stmts.append(stmt.copy( + expression=cseam(stmt.expression, frozenset()))) + new_inames_to_dup.append(stmt_inames_to_dup) else: - new_insns.append(insn) - new_inames_to_dup.append(insn_inames_to_dup) + new_stmts.append(stmt) + new_inames_to_dup.append(stmt_inames_to_dup) - return new_insns, new_inames_to_dup, new_temp_vars + return new_stmts, new_inames_to_dup, new_temp_vars # }}} @@ -1353,25 +1353,25 @@ def expand_cses(instructions, inames_to_dup, cse_prefix="cse_expr"): # {{{ add_sequential_dependencies def add_sequential_dependencies(knl): - new_insns = [] - prev_insn = None - for insn in knl.instructions: - depon = insn.depends_on + new_stmts = [] + prev_stmt = None + for stmt in knl.statements: + depon = stmt.depends_on if depon is None: depon = frozenset() - if prev_insn is not None: - depon = depon | frozenset((prev_insn.id,)) + if prev_stmt is not None: + depon = depon | frozenset((prev_stmt.id,)) - insn = insn.copy( + stmt = stmt.copy( depends_on=depon, depends_on_is_final=True) - new_insns.append(insn) + new_stmts.append(stmt) - prev_insn = insn + prev_stmt = stmt - return knl.copy(instructions=new_insns) + return knl.copy(statements=new_stmts) # }}} @@ -1379,16 +1379,16 @@ def add_sequential_dependencies(knl): # {{{ temporary variable creation def create_temporaries(knl, default_order): - new_insns = [] + new_stmts = [] new_temp_vars = knl.temporary_variables.copy() import loopy as lp - for insn in knl.instructions: - if isinstance(insn, MultiAssignmentBase): + for stmt in knl.statements: + if isinstance(stmt, MultiAssignmentBase): for assignee_name, temp_var_type in zip( - insn.assignee_var_names(), - insn.temp_var_types): + stmt.assignee_var_names(), + stmt.temp_var_types): if temp_var_type is None: continue @@ -1412,15 +1412,15 @@ def create_temporaries(knl, default_order): order=default_order, target=knl.target) - if isinstance(insn, Assignment): - insn = insn.copy(temp_var_type=None) + if isinstance(stmt, Assignment): + stmt = stmt.copy(temp_var_type=None) else: - insn = insn.copy(temp_var_types=None) + stmt = stmt.copy(temp_var_types=None) - new_insns.append(insn) + new_stmts.append(stmt) return knl.copy( - instructions=new_insns, + statements=new_stmts, temporary_variables=new_temp_vars) # }}} @@ -1489,9 +1489,9 @@ def determine_shapes_of_temporaries(knl): vars_needing_shape_inference.add(tv.name) def feed_all_expressions(receiver): - for insn in knl.instructions: - insn.with_transformed_expressions( - lambda expr: receiver(expr, knl.insn_inames(insn))) + for stmt in knl.statements: + stmt.with_transformed_expressions( + lambda expr: receiver(expr, knl.stmt_inames(stmt))) var_to_base_indices, var_to_shape, var_to_error = ( find_shapes_of_vars( @@ -1509,14 +1509,14 @@ def determine_shapes_of_temporaries(knl): "shape of temporary '%s' because: %s" % (varname, err)) - def feed_assignee_of_instruction(receiver): - for insn in knl.instructions: - for assignee in insn.assignees: - receiver(assignee, knl.insn_inames(insn)) + def feed_assignee_of_statement(receiver): + for stmt in knl.statements: + for assignee in stmt.assignees: + receiver(assignee, knl.stmt_inames(stmt)) var_to_base_indices_fallback, var_to_shape_fallback, var_to_error = ( find_shapes_of_vars( - knl, vars_needing_shape_inference, feed_assignee_of_instruction)) + knl, vars_needing_shape_inference, feed_assignee_of_statement)) if len(var_to_error) > 0: # No way around errors: propagate an exception upward. @@ -1622,10 +1622,10 @@ def apply_default_order_to_args(kernel, default_order): # }}} -# {{{ resolve instruction dependencies +# {{{ resolve statement dependencies -def _resolve_dependencies(knl, insn, deps): - from loopy import find_instructions +def _resolve_dependencies(knl, stmt, deps): + from loopy import find_statements from loopy.match import MatchExpressionBase new_deps = [] @@ -1634,45 +1634,45 @@ def _resolve_dependencies(knl, insn, deps): found_any = False if isinstance(dep, MatchExpressionBase): - for new_dep in find_instructions(knl, dep): - if new_dep.id != insn.id: + for new_dep in find_statements(knl, dep): + if new_dep.id != stmt.id: new_deps.append(new_dep.id) found_any = True else: from fnmatch import fnmatchcase - for other_insn in knl.instructions: - if fnmatchcase(other_insn.id, dep): - new_deps.append(other_insn.id) + for other_stmt in knl.statements: + if fnmatchcase(other_stmt.id, dep): + new_deps.append(other_stmt.id) found_any = True if not found_any and knl.options.check_dep_resolution: - raise LoopyError("instruction '%s' declared a depency on '%s', " - "which did not resolve to any instruction present in the " + raise LoopyError("statement '%s' declared a depency on '%s', " + "which did not resolve to any statement present in the " "kernel '%s'. Set the kernel option 'check_dep_resolution'" - "to False to disable this check." % (insn.id, dep, knl.name)) + "to False to disable this check." % (stmt.id, dep, knl.name)) for dep_id in new_deps: - if dep_id not in knl.id_to_insn: - raise LoopyError("instruction '%s' depends on instruction id '%s', " - "which was not found" % (insn.id, dep_id)) + if dep_id not in knl.id_to_stmt: + raise LoopyError("statement '%s' depends on statement id '%s', " + "which was not found" % (stmt.id, dep_id)) return frozenset(new_deps) def resolve_dependencies(knl): - new_insns = [] + new_stmts = [] - for insn in knl.instructions: - new_insns.append(insn.copy( - depends_on=_resolve_dependencies(knl, insn, insn.depends_on), + for stmt in knl.statements: + new_stmts.append(stmt.copy( + depends_on=_resolve_dependencies(knl, stmt, stmt.depends_on), no_sync_with=frozenset( - (resolved_insn_id, nosync_scope) - for nosync_dep, nosync_scope in insn.no_sync_with - for resolved_insn_id in - _resolve_dependencies(knl, insn, (nosync_dep,))), + (resolved_stmt_id, nosync_scope) + for nosync_dep, nosync_scope in stmt.no_sync_with + for resolved_stmt_id in + _resolve_dependencies(knl, stmt, (nosync_dep,))), )) - return knl.copy(instructions=new_insns) + return knl.copy(statements=new_stmts) # }}} @@ -1680,20 +1680,20 @@ def resolve_dependencies(knl): # {{{ add used inames deps def add_used_inames(knl): - new_insns = [] + new_stmts = [] - for insn in knl.instructions: - deps = insn.read_dependency_names() | insn.write_dependency_names() + for stmt in knl.statements: + deps = stmt.read_dependency_names() | stmt.write_dependency_names() iname_deps = deps & knl.all_inames() - new_within_inames = insn.within_inames | iname_deps + new_within_inames = stmt.within_inames | iname_deps - if new_within_inames != insn.within_inames: - insn = insn.copy(within_inames=new_within_inames) + if new_within_inames != stmt.within_inames: + stmt = stmt.copy(within_inames=new_within_inames) - new_insns.append(insn) + new_stmts.append(stmt) - return knl.copy(instructions=new_insns) + return knl.copy(statements=new_stmts) # }}} @@ -1701,12 +1701,12 @@ def add_used_inames(knl): # {{{ add inferred iname deps def add_inferred_inames(knl): - from loopy.kernel.tools import find_all_insn_inames - insn_inames = find_all_insn_inames(knl) + from loopy.kernel.tools import find_all_stmt_inames + stmt_inames = find_all_stmt_inames(knl) - return knl.copy(instructions=[ - insn.copy(within_inames=insn_inames[insn.id]) - for insn in knl.instructions]) + return knl.copy(statements=[ + stmt.copy(within_inames=stmt_inames[stmt.id]) + for stmt in knl.statements]) # }}} @@ -1726,18 +1726,18 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True): var_names = arg_names | set(six.iterkeys(kernel.temporary_variables)) dep_map = dict( - (insn.id, insn.read_dependency_names() & var_names) - for insn in expanded_kernel.instructions) + (stmt.id, stmt.read_dependency_names() & var_names) + for stmt in expanded_kernel.statements) - new_insns = [] - for insn in kernel.instructions: - if not insn.depends_on_is_final: + new_stmts = [] + for stmt in kernel.statements: + if not stmt.depends_on_is_final: auto_deps = set() # {{{ add automatic dependencies all_my_var_writers = set() - for var in dep_map[insn.id]: + for var in dep_map[stmt.id]: var_writers = writer_map.get(var, set()) all_my_var_writers |= var_writers @@ -1751,11 +1751,11 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True): if len(var_writers) == 1: auto_deps.update( var_writers - - set([insn.id])) + - set([stmt.id])) # }}} - depends_on = insn.depends_on + depends_on = stmt.depends_on if depends_on is None: depends_on = frozenset() @@ -1764,26 +1764,26 @@ def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True): if warn_if_used and new_deps != depends_on: warn_with_kernel(kernel, "single_writer_after_creation", "The single-writer dependency heuristic added dependencies " - "on instruction ID(s) '%s' to instruction ID '%s' after " + "on statement ID(s) '%s' to statement ID '%s' after " "kernel creation is complete. This is deprecated and " "may stop working in the future. " - "To fix this, ensure that instruction dependencies " + "To fix this, ensure that statement dependencies " "are added/resolved as soon as possible, ideally at kernel " "creation time." - % (", ".join(new_deps - depends_on), insn.id)) + % (", ".join(new_deps - depends_on), stmt.id)) - insn = insn.copy(depends_on=new_deps) + stmt = stmt.copy(depends_on=new_deps) - new_insns.append(insn) + new_stmts.append(stmt) - return kernel.copy(instructions=new_insns) + return kernel.copy(statements=new_stmts) # }}} # {{{ kernel creation top-level -def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): +def make_kernel(domains, statements, kernel_data=["..."], **kwargs): """User-facing kernel creation entrypoint. :arg domains: @@ -1792,9 +1792,9 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): representing the :ref:`domain-tree`. May also be a list of strings which will be parsed into such instances according to :ref:`isl-syntax`. - :arg instructions: + :arg statements: - A list of :class:`Assignment` (or other :class:`InstructionBase` + A list of :class:`Assignment` (or other :class:`StatementBase` subclasses), possibly intermixed with instances of :class:`SubstitutionRule`. This same list may also contain strings which will be parsed into such objects using the @@ -1857,7 +1857,7 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): :arg target: an instance of :class:`loopy.TargetBase`, or *None*, to use the default target. :arg seq_dependencies: If *True*, dependencies that sequentially - connect the given *instructions* will be added. Defaults to + connect the given *statements* will be added. Defaults to *False*. :arg fixed_parameters: A dictionary of *name*/*value* pairs, where *name* will be fixed to *value*. *name* may refer to :ref:`domain-parameters` @@ -1948,8 +1948,8 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): # }}} - instructions, inames_to_dup, substitutions = \ - parse_instructions(instructions, defines) + statements, inames_to_dup, substitutions = \ + parse_statements(statements, defines) # {{{ find/create isl_context @@ -1959,15 +1959,15 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): # }}} - instructions, inames_to_dup, cse_temp_vars = expand_cses( - instructions, inames_to_dup) + statements, inames_to_dup, cse_temp_vars = expand_cses( + statements, inames_to_dup) for tv in cse_temp_vars: temporary_variables[tv.name] = tv del cse_temp_vars domains = parse_domains(domains, defines) - arg_guesser = ArgumentGuesser(domains, instructions, + arg_guesser = ArgumentGuesser(domains, statements, temporary_variables, substitutions, default_offset) @@ -1977,29 +1977,29 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): kwargs["substitutions"] = substitutions from loopy.kernel import LoopKernel - knl = LoopKernel(domains, instructions, kernel_args, + knl = LoopKernel(domains, statements, kernel_args, temporary_variables=temporary_variables, silenced_warnings=silenced_warnings, options=options, target=target, **kwargs) - from loopy.transform.instruction import uniquify_instruction_ids - knl = uniquify_instruction_ids(knl) - from loopy.check import check_for_duplicate_insn_ids - check_for_duplicate_insn_ids(knl) + from loopy.transform.statement import uniquify_statement_ids + knl = uniquify_statement_ids(knl) + from loopy.check import check_for_duplicate_stmt_ids + check_for_duplicate_stmt_ids(knl) if seq_dependencies: knl = add_sequential_dependencies(knl) - assert len(knl.instructions) == len(inames_to_dup) + assert len(knl.statements) == len(inames_to_dup) from loopy import duplicate_inames from loopy.match import Id - for insn, insn_inames_to_dup in zip(knl.instructions, inames_to_dup): - for old_iname, new_iname in insn_inames_to_dup: + for stmt, stmt_inames_to_dup in zip(knl.statements, inames_to_dup): + for old_iname, new_iname in stmt_inames_to_dup: knl = duplicate_inames(knl, old_iname, - within=Id(insn.id), new_inames=new_iname) + within=Id(stmt.id), new_inames=new_iname) check_for_nonexistent_iname_deps(knl) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 44cbdea49..42c0a6b75 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -30,8 +30,8 @@ import numpy as np # noqa from pytools import ImmutableRecord from loopy.kernel.array import ArrayBase from loopy.diagnostic import LoopyError -from loopy.kernel.instruction import ( # noqa - InstructionBase, +from loopy.kernel.statement import ( # noqa + StatementBase, memory_ordering, memory_scope, VarAtomicity, @@ -39,10 +39,10 @@ from loopy.kernel.instruction import ( # noqa AtomicUpdate, MultiAssignmentBase, Assignment, - ExpressionInstruction, - CallInstruction, + ExpressionStatement, + CallStatement, make_assignment, - CInstruction) + CStatement) class auto(object): # noqa @@ -70,7 +70,7 @@ class IndexTag(ImmutableRecord): @property def key(self): """Return a hashable, comparable value that is used to ensure - per-instruction uniqueness of this unique iname tag. + per-statement uniqueness of this unique iname tag. Also used for persistent hash construction. """ diff --git a/loopy/kernel/instruction.py b/loopy/kernel/statement.py similarity index 88% rename from loopy/kernel/instruction.py rename to loopy/kernel/statement.py index e6b093785..f23815ccb 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/statement.py @@ -28,10 +28,10 @@ from loopy.diagnostic import LoopyError from warnings import warn -# {{{ instructions: base class +# {{{ statements: base class -class InstructionBase(ImmutableRecord): - """A base class for all types of instruction that can occur in +class StatementBase(ImmutableRecord): + """A base class for all types of statement that can occur in a kernel. .. attribute:: id @@ -39,26 +39,26 @@ class InstructionBase(ImmutableRecord): An (otherwise meaningless) identifier that is unique within a :class:`loopy.kernel.LoopKernel`. - .. rubric:: Instruction ordering + .. rubric:: Statement ordering .. attribute:: depends_on - a :class:`frozenset` of :attr:`id` values of :class:`Instruction` instances + a :class:`frozenset` of :attr:`id` values of :class:`Statement` instances that *must* be executed before this one. Note that :func:`loopy.preprocess_kernel` (usually invoked automatically) augments this by adding dependencies on any writes to temporaries read - by this instruction. + by this statement. May be *None* to invoke the default. There are two extensions to this: - You may use `*` as a wildcard in the given IDs. This will be expanded - to all matching instruction IDs during :func:`loopy.make_kernel`. - - Instead of an instruction ID, you may pass an instance of + to all matching statement IDs during :func:`loopy.make_kernel`. + - Instead of an statement ID, you may pass an instance of :class:`loopy.match.MatchExpressionBase` into the :attr:`depends_on` :class:`frozenset`. The given expression will be used to add any - matching instructions in the kernel to :attr:`depends_on` during + matching statements in the kernel to :attr:`depends_on` during :func:`loopy.make_kernel`. Note, that this is not meant as a user-facing interface. @@ -71,16 +71,16 @@ class InstructionBase(ImmutableRecord): .. attribute:: groups - A :class:`frozenset` of strings indicating the names of 'instruction - groups' of which this instruction is a part. An instruction group is - considered 'active' as long as one (but not all) instructions of the + A :class:`frozenset` of strings indicating the names of 'statement + groups' of which this statement is a part. An statement group is + considered 'active' as long as one (but not all) statements of the group have been executed. .. attribute:: conflicts_with_groups - A :class:`frozenset` of strings indicating which instruction groups - (see :class:`InstructionBase.groups`) may not be active when this - instruction is scheduled. + A :class:`frozenset` of strings indicating which statement groups + (see :class:`StatementBase.groups`) may not be active when this + statement is scheduled. .. attribute:: priority @@ -91,8 +91,8 @@ class InstructionBase(ImmutableRecord): .. attribute:: no_sync_with - a :class:`frozenset` of tuples of the form `(insn_id, scope)`, where - `insn_id` refers to :attr:`id` of :class:`Instruction` instances + a :class:`frozenset` of tuples of the form `(stmt_id, scope)`, where + `stmt_id` refers to :attr:`id` of :class:`Statement` instances and `scope` is one of the following strings: - `"local"` @@ -100,10 +100,10 @@ class InstructionBase(ImmutableRecord): - `"any"`. This indicates no barrier synchronization is necessary with the given - instruction using barriers of type `scope`, even given the existence of + statement using barriers of type `scope`, even given the existence of a dependency chain and apparently conflicting access. - Note, that :attr:`no_sync_with` allows instruction matching through wildcards + Note, that :attr:`no_sync_with` allows statement matching through wildcards and match expression, just like :attr:`depends_on`. .. rubric:: Conditionals @@ -111,7 +111,7 @@ class InstructionBase(ImmutableRecord): .. attribute:: predicates a :class:`frozenset` of expressions. The conjunction (logical and) of - their truth values (as defined by C) determines whether this instruction + their truth values (as defined by C) determines whether this statement should be run. .. rubric:: Iname dependencies @@ -119,7 +119,7 @@ class InstructionBase(ImmutableRecord): .. attribute:: within_inames A :class:`frozenset` of inames identifying the loops within which this - instruction will be executed. + statement will be executed. .. rubric:: Iname dependencies @@ -128,7 +128,7 @@ class InstructionBase(ImmutableRecord): .. attribute:: tags A :class:`frozenset` of string identifiers that can be used to - identify groups of instructions. + identify groups of statements. Tags starting with exclamation marks (``!``) are reserved and may have specific meanings defined by :mod:`loopy` or its targets. @@ -164,19 +164,19 @@ class InstructionBase(ImmutableRecord): within_inames_is_final, within_inames, priority, boostable, boostable_into, predicates, tags, - insn_deps=None, insn_deps_is_final=None, + stmt_deps=None, stmt_deps_is_final=None, forced_iname_deps=None, forced_iname_deps_is_final=None): # {{{ backwards compatibility goop - if depends_on is not None and insn_deps is not None: - raise LoopyError("may not specify both insn_deps and depends_on") - elif insn_deps is not None: - warn("insn_deps is deprecated, use depends_on", + if depends_on is not None and stmt_deps is not None: + raise LoopyError("may not specify both stmt_deps and depends_on") + elif stmt_deps is not None: + warn("stmt_deps is deprecated, use depends_on", DeprecationWarning, stacklevel=2) - depends_on = insn_deps - depends_on_is_final = insn_deps_is_final + depends_on = stmt_deps + depends_on_is_final = stmt_deps_is_final if forced_iname_deps is not None and within_inames is not None: raise LoopyError("may not specify both forced_iname_deps " @@ -282,16 +282,16 @@ class InstructionBase(ImmutableRecord): # {{{ backwards compatibility goop @property - def insn_deps(self): - warn("insn_deps is deprecated, use depends_on", + def stmt_deps(self): + warn("stmt_deps is deprecated, use depends_on", DeprecationWarning, stacklevel=2) return self.depends_on # legacy @property - def insn_deps_is_final(self): - warn("insn_deps_is_final is deprecated, use depends_on_is_final", + def stmt_deps_is_final(self): + warn("stmt_deps_is_final is deprecated, use depends_on_is_final", DeprecationWarning, stacklevel=2) return self.depends_on_is_final @@ -349,14 +349,14 @@ class InstructionBase(ImmutableRecord): def assignee_name(self): """A convenience wrapper around :meth:`assignee_var_names` that returns the the name of the variable being assigned. - If more than one variable is being modified in the instruction, + If more than one variable is being modified in the statement, :raise:`ValueError` is raised. """ names = self.assignee_var_names() if len(names) != 1: - raise ValueError("expected exactly one assignment in instruction " + raise ValueError("expected exactly one assignment in statement " "on which assignee_name is being called, found %d" % len(names)) @@ -366,7 +366,7 @@ class InstructionBase(ImmutableRecord): @memoize_method def write_dependency_names(self): """Return a set of dependencies of the left hand side of the - assignments performed by this instruction, including written variables + assignments performed by this statement, including written variables and indices. """ @@ -393,7 +393,7 @@ class InstructionBase(ImmutableRecord): elif self.boostable is None: pass else: - raise RuntimeError("unexpected value for Instruction.boostable") + raise RuntimeError("unexpected value for Statement.boostable") if self.depends_on: result.append("dep="+":".join(self.depends_on)) @@ -447,22 +447,22 @@ class InstructionBase(ImmutableRecord): # }}} def copy(self, **kwargs): - if "insn_deps" in kwargs: - warn("insn_deps is deprecated, use depends_on", + if "stmt_deps" in kwargs: + warn("stmt_deps is deprecated, use depends_on", DeprecationWarning, stacklevel=2) - kwargs["depends_on"] = kwargs.pop("insn_deps") + kwargs["depends_on"] = kwargs.pop("stmt_deps") - if "insn_deps_is_final" in kwargs: - warn("insn_deps_is_final is deprecated, use depends_on", + if "stmt_deps_is_final" in kwargs: + warn("stmt_deps_is_final is deprecated, use depends_on", DeprecationWarning, stacklevel=2) - kwargs["depends_on_is_final"] = kwargs.pop("insn_deps_is_final") + kwargs["depends_on_is_final"] = kwargs.pop("stmt_deps_is_final") - return super(InstructionBase, self).copy(**kwargs) + return super(StatementBase, self).copy(**kwargs) def __setstate__(self, val): - super(InstructionBase, self).__setstate__(val) + super(StatementBase, self).__setstate__(val) from loopy.tools import intern_frozenset_of_ids @@ -666,13 +666,13 @@ class AtomicUpdate(VarAtomicity): # }}} -# {{{ instruction base class: expression rhs +# {{{ statement base class: expression rhs -class MultiAssignmentBase(InstructionBase): - """An assignment instruction with an expression as a right-hand side.""" +class MultiAssignmentBase(StatementBase): + """An assignment statement with an expression as a right-hand side.""" - fields = InstructionBase.fields | set(["expression"]) - pymbolic_fields = InstructionBase.pymbolic_fields | set(["expression"]) + fields = StatementBase.fields | set(["expression"]) + pymbolic_fields = StatementBase.pymbolic_fields | set(["expression"]) @memoize_method def read_dependency_names(self): @@ -704,7 +704,7 @@ class MultiAssignmentBase(InstructionBase): # }}} -# {{{ instruction: assignment +# {{{ statement: assignment class Assignment(MultiAssignmentBase): """ @@ -774,7 +774,7 @@ class Assignment(MultiAssignmentBase): boostable=None, boostable_into=None, tags=None, temp_var_type=None, atomicity=(), priority=0, predicates=frozenset(), - insn_deps=None, insn_deps_is_final=None, + stmt_deps=None, stmt_deps_is_final=None, forced_iname_deps=None, forced_iname_deps_is_final=None): super(Assignment, self).__init__( @@ -791,8 +791,8 @@ class Assignment(MultiAssignmentBase): priority=priority, predicates=predicates, tags=tags, - insn_deps=insn_deps, - insn_deps_is_final=insn_deps_is_final, + stmt_deps=stmt_deps, + stmt_deps_is_final=stmt_deps_is_final, forced_iname_deps=forced_iname_deps, forced_iname_deps_is_final=forced_iname_deps_is_final) @@ -812,7 +812,7 @@ class Assignment(MultiAssignmentBase): self.temp_var_type = temp_var_type self.atomicity = atomicity - # {{{ implement InstructionBase interface + # {{{ implement StatementBase interface @memoize_method def assignee_var_names(self): @@ -844,7 +844,7 @@ class Assignment(MultiAssignmentBase): result += "\n" + 10*" " + "if (%s)" % " && ".join(self.predicates) return result - # {{{ for interface uniformity with CallInstruction + # {{{ for interface uniformity with CallStatement @property def temp_var_types(self): @@ -857,21 +857,21 @@ class Assignment(MultiAssignmentBase): # }}} -class ExpressionInstruction(Assignment): +class ExpressionStatement(Assignment): def __init__(self, *args, **kwargs): - warn("ExpressionInstruction is deprecated. Use Assignment instead", + warn("ExpressionStatement is deprecated. Use Assignment instead", DeprecationWarning, stacklevel=2) - super(ExpressionInstruction, self).__init__(*args, **kwargs) + super(ExpressionStatement, self).__init__(*args, **kwargs) # }}} -# {{{ instruction: function call +# {{{ statement: function call -class CallInstruction(MultiAssignmentBase): - """An instruction capturing a function call. Unlike :class:`Assignment`, - this instruction supports functions with multiple return values. +class CallStatement(MultiAssignmentBase): + """An statement capturing a function call. Unlike :class:`Assignment`, + this statement supports functions with multiple return values. .. attribute:: assignees @@ -907,11 +907,11 @@ class CallInstruction(MultiAssignmentBase): boostable=None, boostable_into=None, tags=None, temp_var_types=None, priority=0, predicates=frozenset(), - insn_deps=None, insn_deps_is_final=None, + stmt_deps=None, stmt_deps_is_final=None, forced_iname_deps=None, forced_iname_deps_is_final=None): - super(CallInstruction, self).__init__( + super(CallStatement, self).__init__( id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, @@ -925,22 +925,22 @@ class CallInstruction(MultiAssignmentBase): priority=priority, predicates=predicates, tags=tags, - insn_deps=insn_deps, - insn_deps_is_final=insn_deps_is_final, + stmt_deps=stmt_deps, + stmt_deps_is_final=stmt_deps_is_final, forced_iname_deps=forced_iname_deps, forced_iname_deps_is_final=forced_iname_deps_is_final) from pymbolic.primitives import Call from loopy.symbolic import Reduction if not isinstance(expression, (Call, Reduction)) and expression is not None: - raise LoopyError("'expression' argument to CallInstruction " + raise LoopyError("'expression' argument to CallStatement " "must be a function call") from loopy.symbolic import parse if isinstance(assignees, str): assignees = parse(assignees) if not isinstance(assignees, tuple): - raise LoopyError("'assignees' argument to CallInstruction " + raise LoopyError("'assignees' argument to CallStatement " "must be a tuple or a string parseable to a tuple" "--got '%s'" % type(assignees).__name__) @@ -961,7 +961,7 @@ class CallInstruction(MultiAssignmentBase): else: self.temp_var_types = temp_var_types - # {{{ implement InstructionBase interface + # {{{ implement StatementBase interface @memoize_method def assignee_var_names(self): @@ -1017,7 +1017,7 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): raise LoopyError("right-hand side in multiple assignment must be " "function call or reduction, got: '%s'" % expression) - return CallInstruction( + return CallStatement( assignees=assignees, expression=expression, temp_var_types=temp_var_types, @@ -1034,14 +1034,14 @@ def make_assignment(assignees, expression, temp_var_types=None, **kwargs): **kwargs) -# {{{ c instruction +# {{{ c statement -class CInstruction(InstructionBase): +class CStatement(StatementBase): """ .. attribute:: iname_exprs A list of tuples *(name, expr)* of inames or expressions based on them - that the instruction needs access to. + that the statement needs access to. .. attribute:: code @@ -1071,11 +1071,11 @@ class CInstruction(InstructionBase): figuring out dependencies. """ - fields = InstructionBase.fields | \ + fields = StatementBase.fields | \ set("iname_exprs code read_variables assignees".split()) - pymbolic_fields = InstructionBase.pymbolic_fields | \ + pymbolic_fields = StatementBase.pymbolic_fields | \ set("iname_exprs assignees".split()) - pymbolic_set_fields = InstructionBase.pymbolic_set_fields | \ + pymbolic_set_fields = StatementBase.pymbolic_set_fields | \ set(["read_variables"]) def __init__(self, @@ -1087,7 +1087,7 @@ class CInstruction(InstructionBase): within_inames_is_final=None, within_inames=None, priority=0, boostable=None, boostable_into=None, predicates=frozenset(), tags=None, - insn_deps=None, insn_deps_is_final=None): + stmt_deps=None, stmt_deps_is_final=None): """ :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples, simple strings pepresenting inames are also allowed. A single @@ -1098,7 +1098,7 @@ class CInstruction(InstructionBase): sequence of strings parseable into the desired format. """ - InstructionBase.__init__(self, + StatementBase.__init__(self, id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, @@ -1109,8 +1109,8 @@ class CInstruction(InstructionBase): boostable=boostable, boostable_into=boostable_into, priority=priority, predicates=predicates, tags=tags, - insn_deps=insn_deps, - insn_deps_is_final=insn_deps_is_final) + stmt_deps=stmt_deps, + stmt_deps_is_final=stmt_deps_is_final) # {{{ normalize iname_exprs @@ -1153,7 +1153,7 @@ class CInstruction(InstructionBase): def read_dependency_names(self): result = ( - super(CInstruction, self).read_dependency_names() + super(CStatement, self).read_dependency_names() | frozenset(self.read_variables)) from loopy.symbolic import get_dependencies @@ -1204,7 +1204,7 @@ class CInstruction(InstructionBase): # }}} -class _DataObliviousInstruction(InstructionBase): +class _DataObliviousStatement(StatementBase): # {{{ abstract interface # read_dependency_names inherited @@ -1230,12 +1230,12 @@ class _DataObliviousInstruction(InstructionBase): return () -# {{{ barrier instruction +# {{{ barrier statement -class NoOpInstruction(_DataObliviousInstruction): - """An instruction that carries out no operation. It is mainly +class NoOpStatement(_DataObliviousStatement): + """An statement that carries out no operation. It is mainly useful as a way to structure dependencies between other - instructions. + statements. The textual syntax in a :mod:`loopy` kernel is:: @@ -1249,7 +1249,7 @@ class NoOpInstruction(_DataObliviousInstruction): priority=None, boostable=None, boostable_into=None, predicates=None, tags=None): - super(NoOpInstruction, self).__init__( + super(NoOpStatement, self).__init__( id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, @@ -1276,10 +1276,10 @@ class NoOpInstruction(_DataObliviousInstruction): # }}} -# {{{ barrier instruction +# {{{ barrier statement -class BarrierInstruction(_DataObliviousInstruction): - """An instruction that requires synchronization with all +class BarrierStatement(_DataObliviousStatement): + """An statement that requires synchronization with all concurrent work items of :attr:`kind`. .. attribute:: kind @@ -1292,7 +1292,7 @@ class BarrierInstruction(_DataObliviousInstruction): ... lbarrier """ - fields = _DataObliviousInstruction.fields | set(["kind"]) + fields = _DataObliviousStatement.fields | set(["kind"]) def __init__(self, id, depends_on=None, depends_on_is_final=None, groups=None, conflicts_with_groups=None, @@ -1305,7 +1305,7 @@ class BarrierInstruction(_DataObliviousInstruction): if predicates: raise LoopyError("conditional barriers are not supported") - super(BarrierInstruction, self).__init__( + super(BarrierStatement, self).__init__( id=id, depends_on=depends_on, depends_on_is_final=depends_on_is_final, diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index ad1e71e59..edc154b15 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -131,30 +131,30 @@ def _add_and_infer_dtypes_overdetermined(knl, dtype_dict): # }}} -# {{{ find_all_insn_inames fixed point iteration (deprecated) +# {{{ find_all_stmt_inames fixed point iteration (deprecated) -def guess_iname_deps_based_on_var_use(kernel, insn, insn_id_to_inames=None): - # For all variables that insn depends on, find the intersection - # of iname deps of all writers, and add those to insn's +def guess_iname_deps_based_on_var_use(kernel, stmt, stmt_id_to_inames=None): + # For all variables that stmt depends on, find the intersection + # of iname deps of all writers, and add those to stmt's # dependencies. result = frozenset() writer_map = kernel.writer_map() - for tv_name in (insn.read_dependency_names() & kernel.get_written_variables()): + for tv_name in (stmt.read_dependency_names() & kernel.get_written_variables()): tv_implicit_inames = None for writer_id in writer_map[tv_name]: - writer_insn = kernel.id_to_insn[writer_id] - if insn_id_to_inames is None: - writer_inames = writer_insn.within_inames + writer_stmt = kernel.id_to_stmt[writer_id] + if stmt_id_to_inames is None: + writer_inames = writer_stmt.within_inames else: - writer_inames = insn_id_to_inames[writer_id] + writer_inames = stmt_id_to_inames[writer_id] writer_implicit_inames = ( writer_inames - - (writer_insn.write_dependency_names() & kernel.all_inames())) + - (writer_stmt.write_dependency_names() & kernel.all_inames())) if tv_implicit_inames is None: tv_implicit_inames = writer_implicit_inames else: @@ -164,16 +164,16 @@ def guess_iname_deps_based_on_var_use(kernel, insn, insn_id_to_inames=None): if tv_implicit_inames is not None: result = result | tv_implicit_inames - return result - insn.reduction_inames() + return result - stmt.reduction_inames() -def find_all_insn_inames(kernel): - logger.debug("%s: find_all_insn_inames: start" % kernel.name) +def find_all_stmt_inames(kernel): + logger.debug("%s: find_all_stmt_inames: start" % kernel.name) writer_map = kernel.writer_map() - insn_id_to_inames = {} - insn_assignee_inames = {} + stmt_id_to_inames = {} + stmt_assignee_inames = {} all_read_deps = {} all_write_deps = {} @@ -181,30 +181,30 @@ def find_all_insn_inames(kernel): from loopy.transform.subst import expand_subst kernel = expand_subst(kernel) - for insn in kernel.instructions: - all_read_deps[insn.id] = read_deps = insn.read_dependency_names() - all_write_deps[insn.id] = write_deps = insn.write_dependency_names() + for stmt in kernel.statements: + all_read_deps[stmt.id] = read_deps = stmt.read_dependency_names() + all_write_deps[stmt.id] = write_deps = stmt.write_dependency_names() deps = read_deps | write_deps - if insn.within_inames_is_final: - iname_deps = insn.within_inames + if stmt.within_inames_is_final: + iname_deps = stmt.within_inames else: iname_deps = ( deps & kernel.all_inames() - | insn.within_inames) + | stmt.within_inames) - assert isinstance(read_deps, frozenset), type(insn) - assert isinstance(write_deps, frozenset), type(insn) - assert isinstance(iname_deps, frozenset), type(insn) + assert isinstance(read_deps, frozenset), type(stmt) + assert isinstance(write_deps, frozenset), type(stmt) + assert isinstance(iname_deps, frozenset), type(stmt) - logger.debug("%s: find_all_insn_inames: %s (init): %s - " + logger.debug("%s: find_all_stmt_inames: %s (init): %s - " "read deps: %s - write deps: %s" % ( - kernel.name, insn.id, ", ".join(sorted(iname_deps)), + kernel.name, stmt.id, ", ".join(sorted(iname_deps)), ", ".join(sorted(read_deps)), ", ".join(sorted(write_deps)), )) - insn_id_to_inames[insn.id] = iname_deps - insn_assignee_inames[insn.id] = write_deps & kernel.all_inames() + stmt_id_to_inames[stmt.id] = iname_deps + stmt_assignee_inames[stmt.id] = write_deps & kernel.all_inames() # fixed point iteration until all iname dep sets have converged @@ -221,36 +221,36 @@ def find_all_insn_inames(kernel): while True: did_something = False - for insn in kernel.instructions: + for stmt in kernel.statements: - if insn.within_inames_is_final: + if stmt.within_inames_is_final: continue # {{{ depdency-based propagation - inames_old = insn_id_to_inames[insn.id] + inames_old = stmt_id_to_inames[stmt.id] inames_new = inames_old | guess_iname_deps_based_on_var_use( - kernel, insn, insn_id_to_inames) + kernel, stmt, stmt_id_to_inames) - insn_id_to_inames[insn.id] = inames_new + stmt_id_to_inames[stmt.id] = inames_new if inames_new != inames_old: did_something = True warn_with_kernel(kernel, "inferred_iname", - "The iname(s) '%s' on instruction '%s' " + "The iname(s) '%s' on statement '%s' " "was/were automatically added. " "This is deprecated. Please add the iname " - "to the instruction " + "to the statement " "explicitly, e.g. by adding 'for' loops" - % (", ".join(inames_new-inames_old), insn.id)) + % (", ".join(inames_new-inames_old), stmt.id)) # }}} # {{{ domain-based propagation - inames_old = insn_id_to_inames[insn.id] - inames_new = set(insn_id_to_inames[insn.id]) + inames_old = stmt_id_to_inames[stmt.id] + inames_new = set(stmt_id_to_inames[stmt.id]) for iname in inames_old: home_domain = kernel.domains[kernel.get_home_domain_index(iname)] @@ -268,31 +268,31 @@ def find_all_insn_inames(kernel): if par in kernel.temporary_variables: for writer_id in writer_map.get(par, []): - inames_new.update(insn_id_to_inames[writer_id]) + inames_new.update(stmt_id_to_inames[writer_id]) if inames_new != inames_old: did_something = True - insn_id_to_inames[insn.id] = frozenset(inames_new) + stmt_id_to_inames[stmt.id] = frozenset(inames_new) warn_with_kernel(kernel, "inferred_iname", - "The iname(s) '%s' on instruction '%s' was " + "The iname(s) '%s' on statement '%s' was " "automatically added. " "This is deprecated. Please add the iname " - "to the instruction " + "to the statement " "explicitly, e.g. by adding 'for' loops" - % (", ".join(inames_new-inames_old), insn.id)) + % (", ".join(inames_new-inames_old), stmt.id)) # }}} if not did_something: break - logger.debug("%s: find_all_insn_inames: done" % kernel.name) + logger.debug("%s: find_all_stmt_inames: done" % kernel.name) - for v in six.itervalues(insn_id_to_inames): + for v in six.itervalues(stmt_id_to_inames): assert isinstance(v, frozenset) - return insn_id_to_inames + return stmt_id_to_inames # }}} @@ -447,17 +447,17 @@ class DomainChanger: # Changing the domain might look like it wants to change grid # sizes. Not true. # (Relevant for 'slab decomposition') - overridden_get_grid_sizes_for_insn_ids=( - self.kernel.get_grid_sizes_for_insn_ids)) + overridden_get_grid_sizes_for_stmt_ids=( + self.kernel.get_grid_sizes_for_stmt_ids)) # }}} # {{{ graphviz / dot export -def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): +def get_dot_dependency_graph(kernel, iname_cluster=True, use_stmt_id=False): """Return a string in the `dot `_ language depicting - dependencies among kernel instructions. + dependencies among kernel statements. """ # make sure all automatically added stuff shows up @@ -478,34 +478,34 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): dep_graph = {} lines = [] - from loopy.kernel.data import MultiAssignmentBase, CInstruction + from loopy.kernel.data import MultiAssignmentBase, CStatement - for insn in kernel.instructions: - if isinstance(insn, MultiAssignmentBase): - op = "%s <- %s" % (insn.assignees, insn.expression) + for stmt in kernel.statements: + if isinstance(stmt, MultiAssignmentBase): + op = "%s <- %s" % (stmt.assignees, stmt.expression) if len(op) > 200: op = op[:200] + "..." - elif isinstance(insn, CInstruction): - op = "" % insn.id + elif isinstance(stmt, CStatement): + op = "" % stmt.id else: - op = "" % insn.id + op = "" % stmt.id - if use_insn_id: - insn_label = insn.id + if use_stmt_id: + stmt_label = stmt.id tooltip = op else: - insn_label = op - tooltip = insn.id + stmt_label = op + tooltip = stmt.id lines.append("\"%s\" [label=\"%s\",shape=\"box\",tooltip=\"%s\"];" % ( - insn.id, - repr(insn_label)[1:-1], + stmt.id, + repr(stmt_label)[1:-1], repr(tooltip)[1:-1], )) - for dep in insn.depends_on: - dep_graph.setdefault(insn.id, set()).add(dep) + for dep in stmt.depends_on: + dep_graph.setdefault(stmt.id, set()).add(dep) # {{{ O(n^3) transitive reduction @@ -513,31 +513,31 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): while True: changed_something = False - for insn_1 in dep_graph: - for insn_2 in dep_graph.get(insn_1, set()).copy(): - for insn_3 in dep_graph.get(insn_2, set()).copy(): - if insn_3 not in dep_graph.get(insn_1, set()): + for stmt_1 in dep_graph: + for stmt_2 in dep_graph.get(stmt_1, set()).copy(): + for stmt_3 in dep_graph.get(stmt_2, set()).copy(): + if stmt_3 not in dep_graph.get(stmt_1, set()): changed_something = True - dep_graph[insn_1].add(insn_3) + dep_graph[stmt_1].add(stmt_3) if not changed_something: break - for insn_1 in dep_graph: - for insn_2 in dep_graph.get(insn_1, set()).copy(): - for insn_3 in dep_graph.get(insn_2, set()).copy(): - if insn_3 in dep_graph.get(insn_1, set()): - dep_graph[insn_1].remove(insn_3) + for stmt_1 in dep_graph: + for stmt_2 in dep_graph.get(stmt_1, set()).copy(): + for stmt_3 in dep_graph.get(stmt_2, set()).copy(): + if stmt_3 in dep_graph.get(stmt_1, set()): + dep_graph[stmt_1].remove(stmt_3) # }}} - for insn_1 in dep_graph: - for insn_2 in dep_graph.get(insn_1, set()): - lines.append("%s -> %s" % (insn_2, insn_1)) + for stmt_1 in dep_graph: + for stmt_2 in dep_graph.get(stmt_1, set()): + lines.append("%s -> %s" % (stmt_2, stmt_1)) if iname_cluster: from loopy.schedule import ( - EnterLoop, LeaveLoop, RunInstruction, Barrier, + EnterLoop, LeaveLoop, RunStatement, Barrier, CallKernel, ReturnFromKernel) for sched_item in kernel.schedule: @@ -546,8 +546,8 @@ def get_dot_dependency_graph(kernel, iname_cluster=True, use_insn_id=False): % (sched_item.iname, sched_item.iname)) elif isinstance(sched_item, LeaveLoop): lines.append("}") - elif isinstance(sched_item, RunInstruction): - lines.append(sched_item.insn_id) + elif isinstance(sched_item, RunStatement): + lines.append(sched_item.stmt_id) elif isinstance(sched_item, (CallKernel, ReturnFromKernel, Barrier)): pass else: @@ -672,14 +672,14 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames): for par in dom_parameters: if par in kernel.temporary_variables: - writer_insns = kernel.writer_map()[par] + writer_stmts = kernel.writer_map()[par] - if len(writer_insns) > 1: + if len(writer_stmts) > 1: raise RuntimeError("loop bound '%s' " "may only be written to once" % par) - writer_insn, = writer_insns - writer_inames = kernel.insn_inames(writer_insn) + writer_stmt, = writer_stmts + writer_inames = kernel.stmt_inames(writer_stmt) if writer_inames & inames: return True @@ -695,7 +695,7 @@ def is_domain_dependent_on_inames(kernel, domain_index, inames): # {{{ rank inames by stride -def get_auto_axis_iname_ranking_by_stride(kernel, insn): +def get_auto_axis_iname_ranking_by_stride(kernel, stmt): from loopy.kernel.data import ImageArg, ValueArg approximate_arg_values = {} @@ -707,14 +707,14 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): raise LoopyError("No approximate arg value specified for '%s'" % arg.name) - # {{{ find all array accesses in insn + # {{{ find all array accesses in stmt from loopy.symbolic import ArrayAccessFinder - ary_acc_exprs = list(ArrayAccessFinder()(insn.expression)) + ary_acc_exprs = list(ArrayAccessFinder()(stmt.expression)) from pymbolic.primitives import Subscript - for assignee in insn.assignees: + for assignee in stmt.assignees: if isinstance(assignee, Subscript): ary_acc_exprs.append(assignee) @@ -742,7 +742,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): from loopy.kernel.data import AutoLocalIndexTagBase auto_axis_inames = set( iname - for iname in kernel.insn_inames(insn) + for iname in kernel.stmt_inames(stmt) if isinstance(kernel.iname_to_tag.get(iname), AutoLocalIndexTagBase)) @@ -802,7 +802,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): if aggregate_strides: very_large_stride = int(np.iinfo(np.int32).max) - return sorted((iname for iname in kernel.insn_inames(insn)), + return sorted((iname for iname in kernel.stmt_inames(stmt)), key=lambda iname: ( aggregate_strides.get(iname, very_large_stride), iname)) @@ -912,13 +912,13 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): import loopy as lp - for insn in kernel.instructions: - if not isinstance(insn, lp.MultiAssignmentBase): + for stmt in kernel.statements: + if not isinstance(stmt, lp.MultiAssignmentBase): continue auto_axis_inames = [ iname - for iname in kernel.insn_inames(insn) + for iname in kernel.stmt_inames(stmt) if isinstance(kernel.iname_to_tag.get(iname), AutoLocalIndexTagBase)] @@ -927,7 +927,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): assigned_local_axes = set() - for iname in kernel.insn_inames(insn): + for iname in kernel.stmt_inames(stmt): tag = kernel.iname_to_tag.get(iname) if isinstance(tag, LocalIndexTag): assigned_local_axes.add(tag.axis) @@ -936,7 +936,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # "valid" pass: try to assign a given axis if axis not in assigned_local_axes: - iname_ranking = get_auto_axis_iname_ranking_by_stride(kernel, insn) + iname_ranking = get_auto_axis_iname_ranking_by_stride(kernel, stmt) if iname_ranking is not None: for iname in iname_ranking: prev_tag = kernel.iname_to_tag.get(iname) @@ -963,7 +963,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): # }}} - # We've seen all instructions and not punted to recursion/restart because + # We've seen all statements and not punted to recursion/restart because # of a new axis assignment. if axis >= len(local_size): @@ -1032,12 +1032,12 @@ def guess_var_shape(kernel, var_name): submap = SubstitutionRuleExpander(kernel.substitutions) def run_through_armap(expr): - armap(submap(expr), kernel.insn_inames(insn)) + armap(submap(expr), kernel.stmt_inames(stmt)) return expr try: - for insn in kernel.instructions: - insn.with_transformed_expressions(run_through_armap) + for stmt in kernel.statements: + stmt.with_transformed_expressions(run_through_armap) except TypeError as e: from traceback import print_exc print_exc() @@ -1200,9 +1200,9 @@ def get_visual_iname_order_embedding(kernel): iname_trie = SetTrie() - for insn in kernel.instructions: + for stmt in kernel.statements: within_inames = set( - iname for iname in insn.within_inames + iname for iname in stmt.within_inames if iname not in ilp_inames) iname_trie.add_or_update(within_inames) @@ -1225,17 +1225,17 @@ def get_visual_iname_order_embedding(kernel): # {{{ find_recursive_dependencies -def find_recursive_dependencies(kernel, insn_ids): - queue = list(insn_ids) +def find_recursive_dependencies(kernel, stmt_ids): + queue = list(stmt_ids) - result = set(insn_ids) + result = set(stmt_ids) while queue: new_queue = [] - for insn_id in queue: - insn = kernel.id_to_insn[insn_id] - additionals = insn.depends_on - result + for stmt_id in queue: + stmt = kernel.id_to_stmt[stmt_id] + additionals = stmt.depends_on - result result.update(additionals) new_queue.extend(additionals) @@ -1248,15 +1248,15 @@ def find_recursive_dependencies(kernel, insn_ids): # {{{ find_reverse_dependencies -def find_reverse_dependencies(kernel, insn_ids): - """Finds a set of IDs of instructions that depend on one of the insn_ids. +def find_reverse_dependencies(kernel, stmt_ids): + """Finds a set of IDs of statements that depend on one of the stmt_ids. - :arg insn_ids: a set of instruction IDs + :arg stmt_ids: a set of statement IDs """ return frozenset( - insn.id - for insn in kernel.instructions - if insn.depends_on & insn_ids) + stmt.id + for stmt in kernel.statements + if stmt.depends_on & stmt_ids) # }}} @@ -1264,28 +1264,28 @@ def find_reverse_dependencies(kernel, insn_ids): # {{{ draw_dependencies_as_unicode_arrows def draw_dependencies_as_unicode_arrows( - instructions, fore, style, flag_downward=True, max_columns=20): + statements, fore, style, flag_downward=True, max_columns=20): """ - :arg instructions: an ordered iterable of :class:`loopy.InstructionBase` + :arg statements: an ordered iterable of :class:`loopy.StatementBase` instances :arg fore: if given, will be used like a :mod:`colorama` ``Fore`` object to color-code dependencies. (E.g. red for downward edges) :returns: A list of tuples (arrows, extender) with Unicode-drawn dependency - arrows, one per entry of *instructions*. *extender* can be used to - extend arrows below the line of an instruction. + arrows, one per entry of *statements*. *extender* can be used to + extend arrows below the line of an statement. """ reverse_deps = {} - for insn in instructions: - for dep in insn.depends_on: - reverse_deps.setdefault(dep, []).append(insn.id) + for stmt in statements: + for dep in stmt.depends_on: + reverse_deps.setdefault(dep, []).append(stmt.id) # mapping of (from_id, to_id) tuples to column_index dep_to_column = {} # {{{ find column assignments - # mapping from column indices to (end_insn_id, updown) + # mapping from column indices to (end_stmt_id, updown) columns_in_use = {} n_columns = [0] @@ -1313,28 +1313,28 @@ def draw_dependencies_as_unicode_arrows( return result rows = [] - for insn in instructions: + for stmt in statements: row = make_extender() - for rdep in reverse_deps.get(insn.id, []): - assert rdep != insn.id + for rdep in reverse_deps.get(stmt.id, []): + assert rdep != stmt.id - dep_key = (rdep, insn.id) + dep_key = (rdep, stmt.id) if dep_key not in dep_to_column: col = dep_to_column[dep_key] = find_free_column() columns_in_use[col] = (rdep, "up") row[col] = u"↱" - for dep in insn.depends_on: - assert dep != insn.id - dep_key = (insn.id, dep) + for dep in stmt.depends_on: + assert dep != stmt.id + dep_key = (stmt.id, dep) if dep_key not in dep_to_column: col = dep_to_column[dep_key] = find_free_column() columns_in_use[col] = (dep, "down") row[col] = do_flag_downward(u"┌", "down") for col, (end, updown) in list(six.iteritems(columns_in_use)): - if insn.id == end: + if stmt.id == end: del columns_in_use[col] if updown == "up": row[col] = u"└" @@ -1376,26 +1376,26 @@ def draw_dependencies_as_unicode_arrows( # }}} -# {{{ stringify_instruction_list +# {{{ stringify_statement_list -def stringify_instruction_list(kernel): +def stringify_statement_list(kernel): # {{{ topological sort - printed_insn_ids = set() - printed_insn_order = [] + printed_stmt_ids = set() + printed_stmt_order = [] - def insert_insn_into_order(insn): - if insn.id in printed_insn_ids: + def insert_stmt_into_order(stmt): + if stmt.id in printed_stmt_ids: return - printed_insn_ids.add(insn.id) + printed_stmt_ids.add(stmt.id) - for dep_id in natsorted(insn.depends_on): - insert_insn_into_order(kernel.id_to_insn[dep_id]) + for dep_id in natsorted(stmt.depends_on): + insert_stmt_into_order(kernel.id_to_stmt[dep_id]) - printed_insn_order.append(insn) + printed_stmt_order.append(stmt) - for insn in kernel.instructions: - insert_insn_into_order(insn) + for stmt in kernel.statements: + insert_stmt_into_order(stmt) # }}} @@ -1406,7 +1406,7 @@ def stringify_instruction_list(kernel): uniform_arrow_length, arrows_and_extenders = \ draw_dependencies_as_unicode_arrows( - printed_insn_order, fore=Fore, style=Style) + printed_stmt_order, fore=Fore, style=Style) leader = " " * uniform_arrow_length lines = [] @@ -1457,51 +1457,51 @@ def stringify_instruction_list(kernel): current_inames[0] = new_inames - for insn, (arrows, extender) in zip(printed_insn_order, arrows_and_extenders): - if isinstance(insn, lp.MultiAssignmentBase): - lhs = ", ".join(str(a) for a in insn.assignees) - rhs = str(insn.expression) + for stmt, (arrows, extender) in zip(printed_stmt_order, arrows_and_extenders): + if isinstance(stmt, lp.MultiAssignmentBase): + lhs = ", ".join(str(a) for a in stmt.assignees) + rhs = str(stmt.expression) trailing = [] - elif isinstance(insn, lp.CInstruction): - lhs = ", ".join(str(a) for a in insn.assignees) + elif isinstance(stmt, lp.CStatement): + lhs = ", ".join(str(a) for a in stmt.assignees) rhs = "CODE(%s|%s)" % ( - ", ".join(str(x) for x in insn.read_variables), + ", ".join(str(x) for x in stmt.read_variables), ", ".join("%s=%s" % (name, expr) - for name, expr in insn.iname_exprs)) + for name, expr in stmt.iname_exprs)) - trailing = [l for l in insn.code.split("\n")] - elif isinstance(insn, lp.BarrierInstruction): + trailing = [l for l in stmt.code.split("\n")] + elif isinstance(stmt, lp.BarrierStatement): lhs = "" - rhs = "... %sbarrier" % insn.kind[0] + rhs = "... %sbarrier" % stmt.kind[0] trailing = [] - elif isinstance(insn, lp.NoOpInstruction): + elif isinstance(stmt, lp.NoOpStatement): lhs = "" rhs = "... nop" trailing = [] else: - raise LoopyError("unexpected instruction type: %s" - % type(insn).__name__) + raise LoopyError("unexpected statement type: %s" + % type(stmt).__name__) - adapt_to_new_inames_list(kernel.insn_inames(insn)) + adapt_to_new_inames_list(kernel.stmt_inames(stmt)) - options = ["id="+Fore.GREEN+insn.id+Style.RESET_ALL] - if insn.priority: - options.append("priority=%d" % insn.priority) - if insn.tags: - options.append("tags=%s" % ":".join(insn.tags)) - if isinstance(insn, lp.Assignment) and insn.atomicity: + options = ["id="+Fore.GREEN+stmt.id+Style.RESET_ALL] + if stmt.priority: + options.append("priority=%d" % stmt.priority) + if stmt.tags: + options.append("tags=%s" % ":".join(stmt.tags)) + if isinstance(stmt, lp.Assignment) and stmt.atomicity: options.append("atomic=%s" % ":".join( - str(a) for a in insn.atomicity)) - if insn.groups: - options.append("groups=%s" % ":".join(insn.groups)) - if insn.conflicts_with_groups: + str(a) for a in stmt.atomicity)) + if stmt.groups: + options.append("groups=%s" % ":".join(stmt.groups)) + if stmt.conflicts_with_groups: options.append( - "conflicts=%s" % ":".join(insn.conflicts_with_groups)) - if insn.no_sync_with: + "conflicts=%s" % ":".join(stmt.conflicts_with_groups)) + if stmt.no_sync_with: options.append("no_sync_with=%s" % ":".join( - "%s@%s" % entry for entry in sorted(insn.no_sync_with))) + "%s@%s" % entry for entry in sorted(stmt.no_sync_with))) if lhs: core = "%s = %s" % ( @@ -1513,9 +1513,9 @@ def stringify_instruction_list(kernel): options_str = " {%s}" % ", ".join(options) - if insn.predicates: + if stmt.predicates: # FIXME: precedence - add_pre_line("if %s" % " and ".join([str(x) for x in insn.predicates])) + add_pre_line("if %s" % " and ".join([str(x) for x in stmt.predicates])) indent_level[0] += indent_increment add_main_line(core + options_str) @@ -1523,7 +1523,7 @@ def stringify_instruction_list(kernel): for t in trailing: add_post_line(t) - if insn.predicates: + if stmt.predicates: indent_level[0] -= indent_increment add_post_line("end") @@ -1540,21 +1540,21 @@ def stringify_instruction_list(kernel): @memoize_on_first_arg def get_global_barrier_order(kernel): - """Return a :class:`tuple` of the listing the ids of global barrier instructions + """Return a :class:`tuple` of the listing the ids of global barrier statements as they appear in order in the kernel. - See also :class:`loopy.instruction.BarrierInstruction`. + See also :class:`loopy.statement.BarrierStatement`. """ barriers = [] visiting = set() visited = set() - unvisited = set(insn.id for insn in kernel.instructions) + unvisited = set(stmt.id for stmt in kernel.statements) - def is_barrier(my_insn_id): - insn = kernel.id_to_insn[my_insn_id] - from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and insn.kind == "global" + def is_barrier(my_stmt_id): + stmt = kernel.id_to_stmt[my_stmt_id] + from loopy.kernel.statement import BarrierStatement + return isinstance(stmt, BarrierStatement) and stmt.kind == "global" while unvisited: stack = [unvisited.pop()] @@ -1574,7 +1574,7 @@ def get_global_barrier_order(kernel): visited.add(top) visiting.add(top) - for child in kernel.id_to_insn[top].depends_on: + for child in kernel.id_to_stmt[top].depends_on: # Check for no cycles. assert child not in visiting stack.append(child) @@ -1610,7 +1610,7 @@ def get_global_barrier_order(kernel): visiting.clear() break - for child in kernel.id_to_insn[top].depends_on: + for child in kernel.id_to_stmt[top].depends_on: stack.append(child) else: # Search exhausted and we did not find prev_barrier. @@ -1625,10 +1625,10 @@ def get_global_barrier_order(kernel): # {{{ find most recent global barrier @memoize_on_first_arg -def find_most_recent_global_barrier(kernel, insn_id): +def find_most_recent_global_barrier(kernel, stmt_id): """Return the id of the latest occuring global barrier which the - given instruction (indirectly or directly) depends on, or *None* if this - instruction does not depend on a global barrier. + given statement (indirectly or directly) depends on, or *None* if this + statement does not depend on a global barrier. The return value is guaranteed to be unique because global barriers are totally ordered within the kernel. @@ -1639,15 +1639,15 @@ def find_most_recent_global_barrier(kernel, insn_id): if len(global_barrier_order) == 0: return None - insn = kernel.id_to_insn[insn_id] + stmt = kernel.id_to_stmt[stmt_id] - if len(insn.depends_on) == 0: + if len(stmt.depends_on) == 0: return None - def is_barrier(my_insn_id): - insn = kernel.id_to_insn[my_insn_id] - from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and insn.kind == "global" + def is_barrier(my_stmt_id): + stmt = kernel.id_to_stmt[my_stmt_id] + from loopy.kernel.statement import BarrierStatement + return isinstance(stmt, BarrierStatement) and stmt.kind == "global" global_barrier_to_ordinal = dict( (b, i) for i, b in enumerate(global_barrier_order)) @@ -1658,13 +1658,13 @@ def find_most_recent_global_barrier(kernel, insn_id): else -1) direct_barrier_dependencies = set( - dep for dep in insn.depends_on if is_barrier(dep)) + dep for dep in stmt.depends_on if is_barrier(dep)) if len(direct_barrier_dependencies) > 0: return max(direct_barrier_dependencies, key=get_barrier_ordinal) else: return max((find_most_recent_global_barrier(kernel, dep) - for dep in insn.depends_on), + for dep in stmt.depends_on), key=get_barrier_ordinal) # }}} @@ -1691,9 +1691,9 @@ def get_subkernels(kernel): @memoize_on_first_arg -def get_subkernel_to_insn_id_map(kernel): +def get_subkernel_to_stmt_id_map(kernel): """Return a :class:`dict` mapping subkernel names to a :class:`frozenset` - consisting of the instruction ids scheduled within the subkernel. The + consisting of the statement ids scheduled within the subkernel. The kernel must be scheduled. """ from loopy.kernel import kernel_state @@ -1701,7 +1701,7 @@ def get_subkernel_to_insn_id_map(kernel): raise LoopyError("Kernel must be scheduled") from loopy.schedule import ( - sched_item_to_insn_id, CallKernel, ReturnFromKernel) + sched_item_to_stmt_id, CallKernel, ReturnFromKernel) subkernel = None result = {} @@ -1715,8 +1715,8 @@ def get_subkernel_to_insn_id_map(kernel): subkernel = None if subkernel is not None: - for insn_id in sched_item_to_insn_id(sched_item): - result[subkernel].add(insn_id) + for stmt_id in sched_item_to_stmt_id(sched_item): + result[subkernel].add(stmt_id) for subkernel in result: result[subkernel] = frozenset(result[subkernel]) diff --git a/loopy/loop.py b/loopy/loop.py index 459246382..6daef3ac7 100644 --- a/loopy/loop.py +++ b/loopy/loop.py @@ -37,7 +37,7 @@ def potential_loop_nest_map(kernel): result = {} all_inames = kernel.all_inames() - iname_to_insns = kernel.iname_to_insns() + iname_to_stmts = kernel.iname_to_stmts() # examine pairs of all inames--O(n**2), I know. for inner_iname in all_inames: @@ -46,7 +46,7 @@ def potential_loop_nest_map(kernel): if inner_iname == outer_iname: continue - if iname_to_insns[inner_iname] <= iname_to_insns[outer_iname]: + if iname_to_stmts[inner_iname] <= iname_to_stmts[outer_iname]: inner_result.add(outer_iname) if inner_result: diff --git a/loopy/match.py b/loopy/match.py index ab0038af8..434eced40 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -1,4 +1,4 @@ -"""Matching functionality for instruction ids and subsitution +"""Matching functionality for statement ids and subsitution rule invocations stacks.""" from __future__ import division, absolute_import @@ -471,11 +471,11 @@ class StackMatch(object): def __ne__(self, other): return not self.__eq__(other) - def __call__(self, kernel, insn, rule_stack): + def __call__(self, kernel, stmt, rule_stack): """ :arg rule_stack: a tuple of (name, tags) rule invocation, outermost first """ - stack_of_matchables = [insn] + stack_of_matchables = [stmt] for id, tags in rule_stack: stack_of_matchables.append(RuleInvocationMatchable(id, tags)) @@ -490,8 +490,8 @@ def parse_stack_match(smatch): """Syntax example:: ... > outer > ... > next > innermost $ - insn > next - insn > ... > next > innermost $ + stmt > next + stmt > ... > next > innermost $ ``...`` matches an arbitrary number of intervening stack levels. diff --git a/loopy/maxima.py b/loopy/maxima.py index 22d0c085c..c9a9a42ed 100644 --- a/loopy/maxima.py +++ b/loopy/maxima.py @@ -42,7 +42,7 @@ class MaximaStringifyMapper(MaximaStringifyMapperBase): return res -def get_loopy_instructions_as_maxima(kernel, prefix): +def get_loopy_statements_as_maxima(kernel, prefix): """Sample use for code comparison:: load("knl-optFalse.mac"); @@ -60,8 +60,8 @@ def get_loopy_instructions_as_maxima(kernel, prefix): my_variable_names = ( avn - for insn in kernel.instructions - for avn in insn.assignee_var_names() + for stmt in kernel.statements + for avn in stmt.assignee_var_names() ) from pymbolic import var @@ -75,30 +75,30 @@ def get_loopy_instructions_as_maxima(kernel, prefix): result = ["ratprint:false;"] - written_insn_ids = set() + written_stmt_ids = set() - from loopy.kernel import InstructionBase, Assignment + from loopy.kernel import StatementBase, Assignment - def write_insn(insn): - if not isinstance(insn, InstructionBase): - insn = kernel.id_to_insn[insn] - if not isinstance(insn, Assignment): + def write_stmt(stmt): + if not isinstance(stmt, StatementBase): + stmt = kernel.id_to_stmt[stmt] + if not isinstance(stmt, Assignment): raise RuntimeError("non-single-output assignment not supported " "in maxima export") - for dep in insn.depends_on: - if dep not in written_insn_ids: - write_insn(dep) + for dep in stmt.depends_on: + if dep not in written_stmt_ids: + write_stmt(dep) - aname, = insn.assignee_var_names() + aname, = stmt.assignee_var_names() result.append("%s%s : %s;" % ( prefix, aname, - mstr(substitute(insn.expression)))) + mstr(substitute(stmt.expression)))) - written_insn_ids.add(insn.id) + written_stmt_ids.add(stmt.id) - for insn in kernel.instructions: - if insn.id not in written_insn_ids: - write_insn(insn) + for stmt in kernel.statements: + if stmt.id not in written_stmt_ids: + write_stmt(stmt) return "\n".join(result) diff --git a/loopy/options.py b/loopy/options.py index 25bb7014c..451da8b7a 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -82,7 +82,7 @@ class Options(ImmutableRecord): .. attribute:: trace_assignments Generate code that uses *printf* in kernels to trace the - execution of assignment instructions. + execution of assignment statements. .. attribute:: trace_assignment_values @@ -98,7 +98,7 @@ class Options(ImmutableRecord): .. attribute:: check_dep_resolution Whether loopy should issue an error if a dependency - expression does not match any instructions in the kernel. + expression does not match any statements in the kernel. .. rubric:: Invocation-related options diff --git a/loopy/preprocess.py b/loopy/preprocess.py index ae70a0d6c..35f405aa2 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -78,16 +78,16 @@ def prepare_for_caching(kernel): def check_for_writes_to_predicates(kernel): from loopy.symbolic import get_dependencies - for insn in kernel.instructions: + for stmt in kernel.statements: pred_vars = ( frozenset.union( - *(get_dependencies(pred) for pred in insn.predicates)) - if insn.predicates else frozenset()) - written_pred_vars = frozenset(insn.assignee_var_names()) & pred_vars + *(get_dependencies(pred) for pred in stmt.predicates)) + if stmt.predicates else frozenset()) + written_pred_vars = frozenset(stmt.assignee_var_names()) & pred_vars if written_pred_vars: - raise LoopyError("In instruction '%s': may not write to " - "variable(s) '%s' involved in the instruction's predicates" - % (insn.id, ", ".join(written_pred_vars))) + raise LoopyError("In statement '%s': may not write to " + "variable(s) '%s' involved in the statement's predicates" + % (stmt.id, ", ".join(written_pred_vars))) # }}} @@ -113,8 +113,8 @@ def check_reduction_iname_uniqueness(kernel): from loopy.symbolic import ReductionCallbackMapper cb_mapper = ReductionCallbackMapper(map_reduction) - for insn in kernel.instructions: - insn.with_transformed_expressions(cb_mapper) + for stmt in kernel.statements: + stmt.with_transformed_expressions(cb_mapper) for iname, count in six.iteritems(iname_to_reduction_count): nonsimul_count = iname_to_nonsimultaneous_reduction_count.get(iname, 0) @@ -134,17 +134,17 @@ def check_reduction_iname_uniqueness(kernel): # {{{ decide temporary scope -def _get_compute_inames_tagged(kernel, insn, tag_base): +def _get_compute_inames_tagged(kernel, stmt, tag_base): return set(iname - for iname in kernel.insn_inames(insn.id) + for iname in kernel.stmt_inames(stmt.id) if isinstance(kernel.iname_to_tag.get(iname), tag_base)) -def _get_assignee_inames_tagged(kernel, insn, tag_base, tv_names): +def _get_assignee_inames_tagged(kernel, stmt, tag_base, tv_names): return set(iname for aname, adeps in zip( - insn.assignee_var_names(), - insn.assignee_subscript_deps()) + stmt.assignee_var_names(), + stmt.assignee_subscript_deps()) for iname in adeps & kernel.all_inames() if aname in tv_names if isinstance(kernel.iname_to_tag.get(iname), tag_base)) @@ -190,28 +190,28 @@ def find_temporary_scope(kernel): for alias in base_storage_to_aliases.get(temp_var.base_storage, []): my_writers = my_writers | writers.get(alias, frozenset()) - desired_scope_per_insn = [] - for insn_id in my_writers: - insn = kernel.id_to_insn[insn_id] + desired_scope_per_stmt = [] + for stmt_id in my_writers: + stmt = kernel.id_to_stmt[stmt_id] # A write race will emerge if: # # - the variable is local # and - # - the instruction is run across more inames (locally) parallel + # - the statement is run across more inames (locally) parallel # than are reflected in the assignee indices. locparallel_compute_inames = _get_compute_inames_tagged( - kernel, insn, LocalIndexTagBase) + kernel, stmt, LocalIndexTagBase) locparallel_assignee_inames = _get_assignee_inames_tagged( - kernel, insn, LocalIndexTagBase, tv_names) + kernel, stmt, LocalIndexTagBase, tv_names) grpparallel_compute_inames = _get_compute_inames_tagged( - kernel, insn, GroupIndexTag) + kernel, stmt, GroupIndexTag) grpparallel_assignee_inames = _get_assignee_inames_tagged( - kernel, insn, GroupIndexTag, temp_var.name) + kernel, stmt, GroupIndexTag, temp_var.name) assert locparallel_assignee_inames <= locparallel_compute_inames assert grpparallel_assignee_inames <= grpparallel_compute_inames @@ -227,13 +227,13 @@ def find_temporary_scope(kernel): if (apin != cpin and bool(apin)): warn_with_kernel( kernel, - "write_race_%s(%s)" % (scope_descr, insn_id), - "instruction '%s' looks invalid: " + "write_race_%s(%s)" % (scope_descr, stmt_id), + "statement '%s' looks invalid: " "it assigns to indices based on %s IDs, but " "its temporary '%s' cannot be made %s because " "a write race across the iname(s) '%s' would emerge. " "(Do you need to add an extra iname to your prefetch?)" - % (insn_id, iname_descr, temp_var.name, scope_descr, + % (stmt_id, iname_descr, temp_var.name, scope_descr, ", ".join(cpin - apin)), WriteRaceConditionWarning) @@ -244,9 +244,9 @@ def find_temporary_scope(kernel): and bool(cpin)): desired_scope = max(desired_scope, scope) - desired_scope_per_insn.append(desired_scope) + desired_scope_per_stmt.append(desired_scope) - if not desired_scope_per_insn: + if not desired_scope_per_stmt: if temp_var.initializer is None: warn_with_kernel(kernel, "temp_to_write(%s)" % temp_var.name, "temporary variable '%s' never written, eliminating" @@ -258,11 +258,11 @@ def find_temporary_scope(kernel): continue - overall_scope = max(desired_scope_per_insn) + overall_scope = max(desired_scope_per_stmt) from pytools import all - if not all(iscope == overall_scope for iscope in desired_scope_per_insn): - raise LoopyError("not all instructions agree on the " + if not all(iscope == overall_scope for iscope in desired_scope_per_stmt): + raise LoopyError("not all statements agree on the " "the desired scope (private/local/global) of the " "temporary '%s'" % temp_var.name) @@ -712,59 +712,59 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): be the case by introducing temporary assignments into the kernel. """ - insn_id_gen = kernel.get_instruction_id_generator() + stmt_id_gen = kernel.get_statement_id_generator() var_name_gen = kernel.get_var_name_generator() - new_or_updated_instructions = {} + new_or_updated_statements = {} new_temporaries = {} dep_map = dict( - (insn.id, insn.depends_on) for insn in kernel.instructions) + (stmt.id, stmt.depends_on) for stmt in kernel.statements) - inverse_dep_map = dict((insn.id, set()) for insn in kernel.instructions) + inverse_dep_map = dict((stmt.id, set()) for stmt in kernel.statements) import six - for insn_id, deps in six.iteritems(dep_map): + for stmt_id, deps in six.iteritems(dep_map): for dep in deps: - inverse_dep_map[dep].add(insn_id) + inverse_dep_map[dep].add(stmt_id) del dep_map # {{{ utils - def _add_to_no_sync_with(insn_id, new_no_sync_with_params): - insn = kernel.id_to_insn.get(insn_id) - insn = new_or_updated_instructions.get(insn_id, insn) - new_or_updated_instructions[insn_id] = ( - insn.copy( + def _add_to_no_sync_with(stmt_id, new_no_sync_with_params): + stmt = kernel.id_to_stmt.get(stmt_id) + stmt = new_or_updated_statements.get(stmt_id, stmt) + new_or_updated_statements[stmt_id] = ( + stmt.copy( no_sync_with=( - insn.no_sync_with | frozenset(new_no_sync_with_params)))) + stmt.no_sync_with | frozenset(new_no_sync_with_params)))) - def _add_to_depends_on(insn_id, new_depends_on_params): - insn = kernel.id_to_insn.get(insn_id) - insn = new_or_updated_instructions.get(insn_id, insn) - new_or_updated_instructions[insn_id] = ( - insn.copy( - depends_on=insn.depends_on | frozenset(new_depends_on_params))) + def _add_to_depends_on(stmt_id, new_depends_on_params): + stmt = kernel.id_to_stmt.get(stmt_id) + stmt = new_or_updated_statements.get(stmt_id, stmt) + new_or_updated_statements[stmt_id] = ( + stmt.copy( + depends_on=stmt.depends_on | frozenset(new_depends_on_params))) # }}} - from loopy.kernel.instruction import CallInstruction - for insn in kernel.instructions: - if not isinstance(insn, CallInstruction): + from loopy.kernel.statement import CallStatement + for stmt in kernel.statements: + if not isinstance(stmt, CallStatement): continue - if len(insn.assignees) <= 1: + if len(stmt.assignees) <= 1: continue - assignees = insn.assignees - assignee_var_names = insn.assignee_var_names() + assignees = stmt.assignees + assignee_var_names = stmt.assignee_var_names() new_assignees = [assignees[0]] newly_added_assignments_ids = set() needs_replacement = False - last_added_insn_id = insn.id + last_added_stmt_id = stmt.id from loopy.kernel.data import temp_var_scope, TemporaryVariable @@ -785,15 +785,15 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): needs_replacement = True - # {{{ generate a new assignent instruction + # {{{ generate a new assignent statement new_assignee_name = var_name_gen( - "{insn_id}_retval_{assignee_nr}" - .format(insn_id=insn.id, assignee_nr=assignee_nr)) + "{stmt_id}_retval_{assignee_nr}" + .format(stmt_id=stmt.id, assignee_nr=assignee_nr)) - new_assignment_id = insn_id_gen( - "{insn_id}_assign_retval_{assignee_nr}" - .format(insn_id=insn.id, assignee_nr=assignee_nr)) + new_assignment_id = stmt_id_gen( + "{stmt_id}_assign_retval_{assignee_nr}" + .format(stmt_id=stmt.id, assignee_nr=assignee_nr)) newly_added_assignments_ids.add(new_assignment_id) @@ -808,55 +808,55 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): new_assignee = var(new_assignee_name) new_assignees.append(new_assignee) - new_or_updated_instructions[new_assignment_id] = ( + new_or_updated_statements[new_assignment_id] = ( make_assignment( assignees=(assignee,), expression=new_assignee, id=new_assignment_id, - depends_on=frozenset([last_added_insn_id]), + depends_on=frozenset([last_added_stmt_id]), depends_on_is_final=True, no_sync_with=( - insn.no_sync_with | frozenset([(insn.id, "any")])), - predicates=insn.predicates, - within_inames=insn.within_inames)) + stmt.no_sync_with | frozenset([(stmt.id, "any")])), + predicates=stmt.predicates, + within_inames=stmt.within_inames)) - last_added_insn_id = new_assignment_id + last_added_stmt_id = new_assignment_id # }}} if not needs_replacement: continue - # {{{ update originating instruction + # {{{ update originating statement - orig_insn = new_or_updated_instructions.get(insn.id, insn) + orig_stmt = new_or_updated_statements.get(stmt.id, stmt) - new_or_updated_instructions[insn.id] = ( - orig_insn.copy(assignees=tuple(new_assignees))) + new_or_updated_statements[stmt.id] = ( + orig_stmt.copy(assignees=tuple(new_assignees))) - _add_to_no_sync_with(insn.id, + _add_to_no_sync_with(stmt.id, [(id, "any") for id in newly_added_assignments_ids]) # }}} # {{{ squash spurious memory dependencies amongst new assignments - for new_insn_id in newly_added_assignments_ids: - _add_to_no_sync_with(new_insn_id, + for new_stmt_id in newly_added_assignments_ids: + _add_to_no_sync_with(new_stmt_id, [(id, "any") for id in newly_added_assignments_ids - if id != new_insn_id]) + if id != new_stmt_id]) # }}} - # {{{ update instructions that depend on the originating instruction + # {{{ update statements that depend on the originating statement - for inverse_dep in inverse_dep_map[insn.id]: + for inverse_dep in inverse_dep_map[stmt.id]: _add_to_depends_on(inverse_dep, newly_added_assignments_ids) - for insn_id, scope in ( - new_or_updated_instructions[inverse_dep].no_sync_with): - if insn_id == insn.id: + for stmt_id, scope in ( + new_or_updated_statements[inverse_dep].no_sync_with): + if stmt_id == stmt.id: _add_to_no_sync_with( inverse_dep, [(id, scope) for id in newly_added_assignments_ids]) @@ -866,14 +866,14 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): new_temporary_variables = kernel.temporary_variables.copy() new_temporary_variables.update(new_temporaries) - new_instructions = ( - list(new_or_updated_instructions.values()) - + list(insn - for insn in kernel.instructions - if insn.id not in new_or_updated_instructions)) + new_statements = ( + list(new_or_updated_statements.values()) + + list(stmt + for stmt in kernel.statements + if stmt.id not in new_or_updated_statements)) return kernel.copy(temporary_variables=new_temporary_variables, - instructions=new_instructions) + statements=new_statements) def _insert_subdomain_into_domain_tree(kernel, domains, subdomain): @@ -888,18 +888,18 @@ def _insert_subdomain_into_domain_tree(kernel, domains, subdomain): # }}} -def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, +def realize_reduction(kernel, stmt_id_filter=None, unknown_types_ok=True, automagic_scans_ok=False, force_scan=False, force_outer_iname_for_scan=None): - """Rewrites reductions into their imperative form. With *insn_id_filter* - specified, operate only on the instruction with an instruction id matching - *insn_id_filter*. + """Rewrites reductions into their imperative form. With *stmt_id_filter* + specified, operate only on the statement with an statement id matching + *stmt_id_filter*. - If *insn_id_filter* is given, only the outermost level of reductions will be + If *stmt_id_filter* is given, only the outermost level of reductions will be expanded, inner reductions will be left alone (because they end up in a new - instruction with a different ID, which doesn't match the filter). + statement with a different ID, which doesn't match the filter). - If *insn_id_filter* is not given, all reductions in all instructions will + If *stmt_id_filter* is not given, all reductions in all statements will be realized. If *automagic_scans_ok*, this function will attempt to rewrite triangular @@ -907,7 +907,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, If *force_scan* is *True*, this function will attempt to rewrite *all* candidate reductions as scans and raise an error if this is not possible - (this is most useful combined with *insn_id_filter*). + (this is most useful combined with *stmt_id_filter*). If *force_outer_iname_for_scan* is not *None*, this function will attempt to realize candidate reductions as scans using the specified iname as the @@ -916,10 +916,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, logger.debug("%s: realize reduction" % kernel.name) - new_insns = [] + new_stmts = [] new_iname_tags = {} - insn_id_gen = kernel.get_instruction_id_generator() + stmt_id_gen = kernel.get_statement_id_generator() var_name_gen = kernel.get_var_name_generator() new_temporary_variables = kernel.temporary_variables.copy() @@ -935,8 +935,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, return val def preprocess_scan_arguments( - insn, expr, nresults, scan_iname, track_iname, - newly_generated_insn_id_set): + stmt, expr, nresults, scan_iname, track_iname, + newly_generated_stmt_id_set): """Does iname substitution within scan arguments and returns a set of values suitable to be passed to the binary op. Returns a tuple.""" @@ -947,20 +947,20 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, # the arguments in order to pass them to the binary op - so we expand # items that are not "plain" tuples here. if not isinstance(inner_expr, tuple): - get_args_insn_id = insn_id_gen( - "%s_%s_get" % (insn.id, "_".join(expr.inames))) + get_args_stmt_id = stmt_id_gen( + "%s_%s_get" % (stmt.id, "_".join(expr.inames))) inner_expr = expand_inner_reduction( - id=get_args_insn_id, + id=get_args_stmt_id, expr=inner_expr, nresults=nresults, - depends_on=insn.depends_on, - within_inames=insn.within_inames | expr.inames, - within_inames_is_final=insn.within_inames_is_final, - predicates=insn.predicates, + depends_on=stmt.depends_on, + within_inames=stmt.within_inames | expr.inames, + within_inames_is_final=stmt.within_inames_is_final, + predicates=stmt.predicates, ) - newly_generated_insn_id_set.add(get_args_insn_id) + newly_generated_stmt_id_set.add(get_args_stmt_id) updated_inner_exprs = tuple( replace_var_within_expr(sub_expr, scan_iname, track_iname) @@ -993,7 +993,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, from pymbolic import var temp_vars = tuple(var(n) for n in temp_var_names) - call_insn = make_assignment( + call_stmt = make_assignment( id=id, assignees=temp_vars, expression=expr, @@ -1002,7 +1002,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, within_inames_is_final=within_inames_is_final, predicates=predicates) - generated_insns.append(call_insn) + generated_stmts.append(call_stmt) return temp_vars @@ -1012,7 +1012,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, def map_reduction_seq(expr, rec, nresults, arg_dtypes, reduction_dtypes): - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_stmt_inames = temp_kernel.stmt_inames(stmt) from loopy.kernel.data import temp_var_scope acc_var_names = make_temporaries( @@ -1022,75 +1022,75 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, dtypes=reduction_dtypes, scope=temp_var_scope.PRIVATE) - init_insn_depends_on = frozenset() + init_stmt_depends_on = frozenset() - global_barrier = lp.find_most_recent_global_barrier(temp_kernel, insn.id) + global_barrier = lp.find_most_recent_global_barrier(temp_kernel, stmt.id) if global_barrier is not None: - init_insn_depends_on |= frozenset([global_barrier]) + init_stmt_depends_on |= frozenset([global_barrier]) from pymbolic import var acc_vars = tuple(var(n) for n in acc_var_names) - init_id = insn_id_gen( - "%s_%s_init" % (insn.id, "_".join(expr.inames))) + init_id = stmt_id_gen( + "%s_%s_init" % (stmt.id, "_".join(expr.inames))) - init_insn = make_assignment( + init_stmt = make_assignment( id=init_id, assignees=acc_vars, - within_inames=outer_insn_inames - frozenset(expr.inames), - within_inames_is_final=insn.within_inames_is_final, - depends_on=init_insn_depends_on, + within_inames=outer_stmt_inames - frozenset(expr.inames), + within_inames_is_final=stmt.within_inames_is_final, + depends_on=init_stmt_depends_on, expression=expr.operation.neutral_element(*arg_dtypes), - predicates=insn.predicates,) + predicates=stmt.predicates,) - generated_insns.append(init_insn) + generated_stmts.append(init_stmt) - update_id = insn_id_gen( - based_on="%s_%s_update" % (insn.id, "_".join(expr.inames))) + update_id = stmt_id_gen( + based_on="%s_%s_update" % (stmt.id, "_".join(expr.inames))) - update_insn_iname_deps = temp_kernel.insn_inames(insn) | set(expr.inames) - if insn.within_inames_is_final: - update_insn_iname_deps = insn.within_inames | set(expr.inames) + update_stmt_iname_deps = temp_kernel.stmt_inames(stmt) | set(expr.inames) + if stmt.within_inames_is_final: + update_stmt_iname_deps = stmt.within_inames | set(expr.inames) - reduction_insn_depends_on = set([init_id]) + reduction_stmt_depends_on = set([init_id]) # In the case of a multi-argument reduction, we need a name for each of # the arguments in order to pass them to the binary op - so we expand # items that are not "plain" tuples here. if nresults > 1 and not isinstance(expr.expr, tuple): - get_args_insn_id = insn_id_gen( - "%s_%s_get" % (insn.id, "_".join(expr.inames))) + get_args_stmt_id = stmt_id_gen( + "%s_%s_get" % (stmt.id, "_".join(expr.inames))) reduction_expr = expand_inner_reduction( - id=get_args_insn_id, + id=get_args_stmt_id, expr=expr.expr, nresults=nresults, - depends_on=insn.depends_on, - within_inames=update_insn_iname_deps, - within_inames_is_final=insn.within_inames_is_final, - predicates=insn.predicates, + depends_on=stmt.depends_on, + within_inames=update_stmt_iname_deps, + within_inames_is_final=stmt.within_inames_is_final, + predicates=stmt.predicates, ) - reduction_insn_depends_on.add(get_args_insn_id) + reduction_stmt_depends_on.add(get_args_stmt_id) else: reduction_expr = expr.expr - reduction_insn = make_assignment( + reduction_stmt = make_assignment( id=update_id, assignees=acc_vars, expression=expr.operation( arg_dtypes, _strip_if_scalar(acc_vars, acc_vars), reduction_expr), - depends_on=frozenset(reduction_insn_depends_on) | insn.depends_on, - within_inames=update_insn_iname_deps, - within_inames_is_final=insn.within_inames_is_final, - predicates=insn.predicates,) + depends_on=frozenset(reduction_stmt_depends_on) | stmt.depends_on, + within_inames=update_stmt_iname_deps, + within_inames_is_final=stmt.within_inames_is_final, + predicates=stmt.predicates,) - generated_insns.append(reduction_insn) + generated_stmts.append(reduction_stmt) - new_insn_add_depends_on.add(reduction_insn.id) + new_stmt_add_depends_on.add(reduction_stmt.id) if nresults == 1: assert len(acc_vars) == 1 @@ -1134,12 +1134,12 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, size = _get_int_iname_size(red_iname) - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_stmt_inames = temp_kernel.stmt_inames(stmt) from loopy.kernel.data import LocalIndexTagBase outer_local_inames = tuple( oiname - for oiname in outer_insn_inames + for oiname in outer_stmt_inames if isinstance( kernel.iname_to_tag.get(oiname), LocalIndexTagBase)) @@ -1181,34 +1181,34 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, # }}} - base_iname_deps = outer_insn_inames - frozenset(expr.inames) + base_iname_deps = outer_stmt_inames - frozenset(expr.inames) neutral = expr.operation.neutral_element(*arg_dtypes) - init_id = insn_id_gen("%s_%s_init" % (insn.id, red_iname)) - init_insn = make_assignment( + init_id = stmt_id_gen("%s_%s_init" % (stmt.id, red_iname)) + init_stmt = make_assignment( id=init_id, assignees=tuple( acc_var[outer_local_iname_vars + (var(base_exec_iname),)] for acc_var in acc_vars), expression=neutral, within_inames=base_iname_deps | frozenset([base_exec_iname]), - within_inames_is_final=insn.within_inames_is_final, + within_inames_is_final=stmt.within_inames_is_final, depends_on=frozenset(), - predicates=insn.predicates, + predicates=stmt.predicates, ) - generated_insns.append(init_insn) + generated_stmts.append(init_stmt) - init_neutral_id = insn_id_gen("%s_%s_init_neutral" % (insn.id, red_iname)) - init_neutral_insn = make_assignment( + init_neutral_id = stmt_id_gen("%s_%s_init_neutral" % (stmt.id, red_iname)) + init_neutral_stmt = make_assignment( id=init_neutral_id, assignees=tuple(var(nvn) for nvn in neutral_var_names), expression=neutral, within_inames=base_iname_deps | frozenset([base_exec_iname]), - within_inames_is_final=insn.within_inames_is_final, + within_inames_is_final=stmt.within_inames_is_final, depends_on=frozenset(), - predicates=insn.predicates, + predicates=stmt.predicates, ) - generated_insns.append(init_neutral_insn) + generated_stmts.append(init_neutral_stmt) transfer_depends_on = set([init_neutral_id, init_id]) @@ -1216,27 +1216,27 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, # the arguments in order to pass them to the binary op - so we expand # items that are not "plain" tuples here. if nresults > 1 and not isinstance(expr.expr, tuple): - get_args_insn_id = insn_id_gen( - "%s_%s_get" % (insn.id, red_iname)) + get_args_stmt_id = stmt_id_gen( + "%s_%s_get" % (stmt.id, red_iname)) reduction_expr = expand_inner_reduction( - id=get_args_insn_id, + id=get_args_stmt_id, expr=expr.expr, nresults=nresults, - depends_on=insn.depends_on, + depends_on=stmt.depends_on, within_inames=( - (outer_insn_inames - frozenset(expr.inames)) + (outer_stmt_inames - frozenset(expr.inames)) | frozenset([red_iname])), - within_inames_is_final=insn.within_inames_is_final, - predicates=insn.predicates, + within_inames_is_final=stmt.within_inames_is_final, + predicates=stmt.predicates, ) - transfer_depends_on.add(get_args_insn_id) + transfer_depends_on.add(get_args_stmt_id) else: reduction_expr = expr.expr - transfer_id = insn_id_gen("%s_%s_transfer" % (insn.id, red_iname)) - transfer_insn = make_assignment( + transfer_id = stmt_id_gen("%s_%s_transfer" % (stmt.id, red_iname)) + transfer_stmt = make_assignment( id=transfer_id, assignees=tuple( acc_var[outer_local_iname_vars + (var(red_iname),)] @@ -1248,14 +1248,14 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, tuple(var(nvn) for nvn in neutral_var_names)), reduction_expr), within_inames=( - (outer_insn_inames - frozenset(expr.inames)) + (outer_stmt_inames - frozenset(expr.inames)) | frozenset([red_iname])), - within_inames_is_final=insn.within_inames_is_final, - depends_on=frozenset([init_id, init_neutral_id]) | insn.depends_on, + within_inames_is_final=stmt.within_inames_is_final, + depends_on=frozenset([init_id, init_neutral_id]) | stmt.depends_on, no_sync_with=frozenset([(init_id, "any")]), - predicates=insn.predicates, + predicates=stmt.predicates, ) - generated_insns.append(transfer_insn) + generated_stmts.append(transfer_stmt) cur_size = 1 while cur_size < size: @@ -1274,8 +1274,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, domains.append(_make_slab_set(stage_exec_iname, bound-new_size)) new_iname_tags[stage_exec_iname] = kernel.iname_to_tag[red_iname] - stage_id = insn_id_gen("red_%s_stage_%d" % (red_iname, istage)) - stage_insn = make_assignment( + stage_id = stmt_id_gen("red_%s_stage_%d" % (red_iname, istage)) + stage_stmt = make_assignment( id=stage_id, assignees=tuple( acc_var[outer_local_iname_vars + (var(stage_exec_iname),)] @@ -1293,21 +1293,21 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, for acc_var in acc_vars))), within_inames=( base_iname_deps | frozenset([stage_exec_iname])), - within_inames_is_final=insn.within_inames_is_final, + within_inames_is_final=stmt.within_inames_is_final, depends_on=frozenset([prev_id]), - predicates=insn.predicates, + predicates=stmt.predicates, ) - generated_insns.append(stage_insn) + generated_stmts.append(stage_stmt) prev_id = stage_id cur_size = new_size bound = cur_size istage += 1 - new_insn_add_depends_on.add(prev_id) - new_insn_add_no_sync_with.add((prev_id, "any")) - new_insn_add_within_inames.add(base_exec_iname or stage_exec_iname) + new_stmt_add_depends_on.add(prev_id) + new_stmt_add_no_sync_with.add((prev_id, "any")) + new_stmt_add_within_inames.add(base_exec_iname or stage_exec_iname) if nresults == 1: assert len(acc_vars) == 1 @@ -1375,7 +1375,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, def map_scan_seq(expr, rec, nresults, arg_dtypes, reduction_dtypes, sweep_iname, scan_iname, sweep_min_value, scan_min_value, stride): - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_stmt_inames = temp_kernel.stmt_inames(stmt) inames_to_remove.add(scan_iname) track_iname = var_name_gen( @@ -1397,59 +1397,59 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, from pymbolic import var acc_vars = tuple(var(n) for n in acc_var_names) - init_id = insn_id_gen( - "%s_%s_init" % (insn.id, "_".join(expr.inames))) + init_id = stmt_id_gen( + "%s_%s_init" % (stmt.id, "_".join(expr.inames))) - init_insn_depends_on = frozenset() + init_stmt_depends_on = frozenset() - global_barrier = lp.find_most_recent_global_barrier(temp_kernel, insn.id) + global_barrier = lp.find_most_recent_global_barrier(temp_kernel, stmt.id) if global_barrier is not None: - init_insn_depends_on |= frozenset([global_barrier]) + init_stmt_depends_on |= frozenset([global_barrier]) - init_insn = make_assignment( + init_stmt = make_assignment( id=init_id, assignees=acc_vars, - within_inames=outer_insn_inames - frozenset( + within_inames=outer_stmt_inames - frozenset( (sweep_iname,) + expr.inames), - within_inames_is_final=insn.within_inames_is_final, - depends_on=init_insn_depends_on, + within_inames_is_final=stmt.within_inames_is_final, + depends_on=init_stmt_depends_on, expression=expr.operation.neutral_element(*arg_dtypes), - predicates=insn.predicates, + predicates=stmt.predicates, ) - generated_insns.append(init_insn) + generated_stmts.append(init_stmt) - update_insn_depends_on = set([init_insn.id]) | insn.depends_on + update_stmt_depends_on = set([init_stmt.id]) | stmt.depends_on updated_inner_exprs = ( - preprocess_scan_arguments(insn, expr.expr, nresults, - scan_iname, track_iname, update_insn_depends_on)) + preprocess_scan_arguments(stmt, expr.expr, nresults, + scan_iname, track_iname, update_stmt_depends_on)) - update_id = insn_id_gen( - based_on="%s_%s_update" % (insn.id, "_".join(expr.inames))) + update_id = stmt_id_gen( + based_on="%s_%s_update" % (stmt.id, "_".join(expr.inames))) - update_insn_iname_deps = temp_kernel.insn_inames(insn) | set([track_iname]) - if insn.within_inames_is_final: - update_insn_iname_deps = insn.within_inames | set([track_iname]) + update_stmt_iname_deps = temp_kernel.stmt_inames(stmt) | set([track_iname]) + if stmt.within_inames_is_final: + update_stmt_iname_deps = stmt.within_inames | set([track_iname]) - scan_insn = make_assignment( + scan_stmt = make_assignment( id=update_id, assignees=acc_vars, expression=expr.operation( arg_dtypes, _strip_if_scalar(acc_vars, acc_vars), _strip_if_scalar(acc_vars, updated_inner_exprs)), - depends_on=frozenset(update_insn_depends_on), - within_inames=update_insn_iname_deps, - no_sync_with=insn.no_sync_with, - within_inames_is_final=insn.within_inames_is_final, - predicates=insn.predicates, + depends_on=frozenset(update_stmt_depends_on), + within_inames=update_stmt_iname_deps, + no_sync_with=stmt.no_sync_with, + within_inames_is_final=stmt.within_inames_is_final, + predicates=stmt.predicates, ) - generated_insns.append(scan_insn) + generated_stmts.append(scan_stmt) - new_insn_add_depends_on.add(scan_insn.id) + new_stmt_add_depends_on.add(scan_stmt.id) if nresults == 1: assert len(acc_vars) == 1 @@ -1473,12 +1473,12 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, return map_reduction_seq( expr, rec, nresults, arg_dtypes, reduction_dtypes) - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_stmt_inames = temp_kernel.stmt_inames(stmt) from loopy.kernel.data import LocalIndexTagBase outer_local_inames = tuple( oiname - for oiname in outer_insn_inames + for oiname in outer_stmt_inames if isinstance( kernel.iname_to_tag.get(oiname), LocalIndexTagBase) @@ -1530,45 +1530,45 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, acc_vars = tuple(var(n) for n in acc_var_names) read_vars = tuple(var(n) for n in read_var_names) - base_iname_deps = (outer_insn_inames + base_iname_deps = (outer_stmt_inames - frozenset(expr.inames) - frozenset([sweep_iname])) neutral = expr.operation.neutral_element(*arg_dtypes) - init_insn_depends_on = insn.depends_on + init_stmt_depends_on = stmt.depends_on - global_barrier = lp.find_most_recent_global_barrier(temp_kernel, insn.id) + global_barrier = lp.find_most_recent_global_barrier(temp_kernel, stmt.id) if global_barrier is not None: - init_insn_depends_on |= frozenset([global_barrier]) + init_stmt_depends_on |= frozenset([global_barrier]) - init_id = insn_id_gen("%s_%s_init" % (insn.id, scan_iname)) - init_insn = make_assignment( + init_id = stmt_id_gen("%s_%s_init" % (stmt.id, scan_iname)) + init_stmt = make_assignment( id=init_id, assignees=tuple( acc_var[outer_local_iname_vars + (var(base_exec_iname),)] for acc_var in acc_vars), expression=neutral, within_inames=base_iname_deps | frozenset([base_exec_iname]), - within_inames_is_final=insn.within_inames_is_final, - depends_on=init_insn_depends_on, - predicates=insn.predicates, + within_inames_is_final=stmt.within_inames_is_final, + depends_on=init_stmt_depends_on, + predicates=stmt.predicates, ) - generated_insns.append(init_insn) + generated_stmts.append(init_stmt) - transfer_insn_depends_on = set([init_insn.id]) | insn.depends_on + transfer_stmt_depends_on = set([init_stmt.id]) | stmt.depends_on updated_inner_exprs = ( - preprocess_scan_arguments(insn, expr.expr, nresults, - scan_iname, track_iname, transfer_insn_depends_on)) + preprocess_scan_arguments(stmt, expr.expr, nresults, + scan_iname, track_iname, transfer_stmt_depends_on)) from loopy.symbolic import Reduction from loopy.symbolic import pw_aff_to_expr sweep_min_value_expr = pw_aff_to_expr(sweep_min_value) - transfer_id = insn_id_gen("%s_%s_transfer" % (insn.id, scan_iname)) - transfer_insn = make_assignment( + transfer_id = stmt_id_gen("%s_%s_transfer" % (stmt.id, scan_iname)) + transfer_stmt = make_assignment( id=transfer_id, assignees=tuple( acc_var[outer_local_iname_vars @@ -1580,14 +1580,14 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, expr=_strip_if_scalar(acc_vars, updated_inner_exprs), allow_simultaneous=False, ), - within_inames=outer_insn_inames - frozenset(expr.inames), - within_inames_is_final=insn.within_inames_is_final, - depends_on=frozenset(transfer_insn_depends_on), - no_sync_with=frozenset([(init_id, "any")]) | insn.no_sync_with, - predicates=insn.predicates, + within_inames=outer_stmt_inames - frozenset(expr.inames), + within_inames_is_final=stmt.within_inames_is_final, + depends_on=frozenset(transfer_stmt_depends_on), + no_sync_with=frozenset([(init_id, "any")]) | stmt.no_sync_with, + predicates=stmt.predicates, ) - generated_insns.append(transfer_insn) + generated_stmts.append(transfer_stmt) prev_id = transfer_id @@ -1601,10 +1601,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, new_iname_tags[stage_exec_iname] = kernel.iname_to_tag[sweep_iname] for read_var, acc_var in zip(read_vars, acc_vars): - read_stage_id = insn_id_gen( + read_stage_id = stmt_id_gen( "scan_%s_read_stage_%d" % (scan_iname, istage)) - read_stage_insn = make_assignment( + read_stage_stmt = make_assignment( id=read_stage_id, assignees=(read_var,), expression=( @@ -1613,26 +1613,26 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, + (var(stage_exec_iname) - cur_size,)]), within_inames=( base_iname_deps | frozenset([stage_exec_iname])), - within_inames_is_final=insn.within_inames_is_final, + within_inames_is_final=stmt.within_inames_is_final, depends_on=frozenset([prev_id]), - predicates=insn.predicates, + predicates=stmt.predicates, ) if cur_size == 1: - # Performance hack: don't add a barrier here with transfer_insn. + # Performance hack: don't add a barrier here with transfer_stmt. # NOTE: This won't work if the way that local inames # are lowered changes. - read_stage_insn = read_stage_insn.copy( + read_stage_stmt = read_stage_stmt.copy( no_sync_with=( - read_stage_insn.no_sync_with + read_stage_stmt.no_sync_with | frozenset([(transfer_id, "any")]))) - generated_insns.append(read_stage_insn) + generated_stmts.append(read_stage_stmt) prev_id = read_stage_id - write_stage_id = insn_id_gen( + write_stage_id = stmt_id_gen( "scan_%s_write_stage_%d" % (scan_iname, istage)) - write_stage_insn = make_assignment( + write_stage_stmt = make_assignment( id=write_stage_id, assignees=tuple( acc_var[outer_local_iname_vars + (var(stage_exec_iname),)] @@ -1647,19 +1647,19 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, ), within_inames=( base_iname_deps | frozenset([stage_exec_iname])), - within_inames_is_final=insn.within_inames_is_final, + within_inames_is_final=stmt.within_inames_is_final, depends_on=frozenset([prev_id]), - predicates=insn.predicates, + predicates=stmt.predicates, ) - generated_insns.append(write_stage_insn) + generated_stmts.append(write_stage_stmt) prev_id = write_stage_id cur_size *= 2 istage += 1 - new_insn_add_depends_on.add(prev_id) - new_insn_add_within_inames.add(sweep_iname) + new_stmt_add_depends_on.add(prev_id) + new_stmt_add_within_inames.add(sweep_iname) output_idx = var(sweep_iname) - sweep_min_value_expr @@ -1676,7 +1676,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, def map_reduction(expr, rec, nresults=1): # Only expand one level of reduction at a time, going from outermost to - # innermost. Otherwise we get the (iname + insn) dependencies wrong. + # innermost. Otherwise we get the (iname + stmt) dependencies wrong. from loopy.type_inference import ( infer_arg_and_reduction_dtypes_for_reduction_expression) @@ -1684,8 +1684,8 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, infer_arg_and_reduction_dtypes_for_reduction_expression( temp_kernel, expr, unknown_types_ok)) - outer_insn_inames = temp_kernel.insn_inames(insn) - bad_inames = frozenset(expr.inames) & outer_insn_inames + outer_stmt_inames = temp_kernel.stmt_inames(stmt) + bad_inames = frozenset(expr.inames) & outer_stmt_inames if bad_inames: raise LoopyError("reduction used within loop(s) that it was " "supposed to reduce over: " + ", ".join(bad_inames)) @@ -1711,7 +1711,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, # Try to determine scan candidate information (sweep iname, scan # iname, etc). scan_param = _try_infer_scan_candidate_from_expr( - temp_kernel, expr, outer_insn_inames, + temp_kernel, expr, outer_stmt_inames, sweep_iname=force_outer_iname_for_scan) except ValueError as v: @@ -1778,10 +1778,10 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, parallel = sweep_iname in sweep_class.local_parallel bad_parallel = sweep_iname in sweep_class.nonlocal_parallel - if sweep_iname not in outer_insn_inames: + if sweep_iname not in outer_stmt_inames: _error_if_force_scan_on(LoopyError, "Sweep iname '%s' was detected, but is not an iname " - "for the instruction." % sweep_iname) + "for the statement." % sweep_iname) elif bad_parallel: _error_if_force_scan_on(LoopyError, "Sweep iname '%s' has an unsupported parallel tag '%s' " @@ -1828,48 +1828,48 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, from loopy.symbolic import ReductionCallbackMapper cb_mapper = ReductionCallbackMapper(map_reduction) - insn_queue = kernel.instructions[:] - insn_id_replacements = {} + stmt_queue = kernel.statements[:] + stmt_id_replacements = {} domains = kernel.domains[:] temp_kernel = kernel import loopy as lp - while insn_queue: - new_insn_add_depends_on = set() - new_insn_add_no_sync_with = set() - new_insn_add_within_inames = set() + while stmt_queue: + new_stmt_add_depends_on = set() + new_stmt_add_no_sync_with = set() + new_stmt_add_within_inames = set() - generated_insns = [] + generated_stmts = [] - insn = insn_queue.pop(0) + stmt = stmt_queue.pop(0) - if insn_id_filter is not None and insn.id != insn_id_filter \ - or not isinstance(insn, lp.MultiAssignmentBase): - new_insns.append(insn) + if stmt_id_filter is not None and stmt.id != stmt_id_filter \ + or not isinstance(stmt, lp.MultiAssignmentBase): + new_stmts.append(stmt) continue - nresults = len(insn.assignees) + nresults = len(stmt.assignees) # Run reduction expansion. from loopy.symbolic import Reduction - if isinstance(insn.expression, Reduction) and nresults > 1: - new_expressions = cb_mapper(insn.expression, nresults=nresults) + if isinstance(stmt.expression, Reduction) and nresults > 1: + new_expressions = cb_mapper(stmt.expression, nresults=nresults) else: - new_expressions = (cb_mapper(insn.expression),) + new_expressions = (cb_mapper(stmt.expression),) - if generated_insns: + if generated_stmts: # An expansion happened, so insert the generated stuff plus # ourselves back into the queue. - kwargs = insn.get_copy_kwargs( - depends_on=insn.depends_on - | frozenset(new_insn_add_depends_on), - no_sync_with=insn.no_sync_with - | frozenset(new_insn_add_no_sync_with), + kwargs = stmt.get_copy_kwargs( + depends_on=stmt.depends_on + | frozenset(new_stmt_add_depends_on), + no_sync_with=stmt.no_sync_with + | frozenset(new_stmt_add_no_sync_with), within_inames=( - temp_kernel.insn_inames(insn) - | new_insn_add_within_inames)) + temp_kernel.stmt_inames(stmt) + | new_stmt_add_within_inames)) kwargs.pop("id") kwargs.pop("expression") @@ -1878,53 +1878,53 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, kwargs.pop("temp_var_type", None) kwargs.pop("temp_var_types", None) - if isinstance(insn.expression, Reduction) and nresults > 1: - replacement_insns = [ + if isinstance(stmt.expression, Reduction) and nresults > 1: + replacement_stmts = [ lp.Assignment( - id=insn_id_gen(insn.id), + id=stmt_id_gen(stmt.id), assignee=assignee, expression=new_expr, **kwargs) for assignee, new_expr in zip( - insn.assignees, new_expressions)] + stmt.assignees, new_expressions)] else: new_expr, = new_expressions - replacement_insns = [ + replacement_stmts = [ make_assignment( - id=insn_id_gen(insn.id), - assignees=insn.assignees, + id=stmt_id_gen(stmt.id), + assignees=stmt.assignees, expression=new_expr, **kwargs) ] - insn_id_replacements[insn.id] = [ - rinsn.id for rinsn in replacement_insns] + stmt_id_replacements[stmt.id] = [ + rstmt.id for rstmt in replacement_stmts] - insn_queue = generated_insns + replacement_insns + insn_queue + stmt_queue = generated_stmts + replacement_stmts + stmt_queue # The reduction expander needs an up-to-date kernel # object to find dependencies. Keep temp_kernel up-to-date. temp_kernel = kernel.copy( - instructions=new_insns + insn_queue, + statements=new_stmts + stmt_queue, temporary_variables=new_temporary_variables, domains=domains) - temp_kernel = lp.replace_instruction_ids( - temp_kernel, insn_id_replacements) + temp_kernel = lp.replace_statement_ids( + temp_kernel, stmt_id_replacements) else: - # nothing happened, we're done with insn - assert not new_insn_add_depends_on + # nothing happened, we're done with stmt + assert not new_stmt_add_depends_on - new_insns.append(insn) + new_stmts.append(stmt) kernel = kernel.copy( - instructions=new_insns, + statements=new_stmts, temporary_variables=new_temporary_variables, domains=domains) - kernel = lp.replace_instruction_ids(kernel, insn_id_replacements) + kernel = lp.replace_statement_ids(kernel, stmt_id_replacements) kernel = lp.tag_inames(kernel, new_iname_tags) @@ -1939,7 +1939,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, # }}} -# {{{ find idempotence ("boostability") of instructions +# {{{ find idempotence ("boostability") of statements def find_idempotence(kernel): logger.debug("%s: idempotence" % kernel.name) @@ -1951,18 +1951,18 @@ def find_idempotence(kernel): var_names = arg_names | set(six.iterkeys(kernel.temporary_variables)) reads_map = dict( - (insn.id, insn.read_dependency_names() & var_names) - for insn in kernel.instructions) + (stmt.id, stmt.read_dependency_names() & var_names) + for stmt in kernel.statements) from collections import defaultdict dep_graph = defaultdict(set) - for insn in kernel.instructions: - dep_graph[insn.id] = set(writer_id - for var in reads_map[insn.id] + for stmt in kernel.statements: + dep_graph[stmt.id] = set(writer_id + for var in reads_map[stmt.id] for writer_id in writer_map.get(var, set())) - # Find SCCs of dep_graph. These are used for checking if the instruction is + # Find SCCs of dep_graph. These are used for checking if the statement is # in a dependency cycle. from loopy.tools import compute_sccs @@ -1972,29 +1972,29 @@ def find_idempotence(kernel): non_idempotently_updated_vars = set() - new_insns = [] - for insn in kernel.instructions: - boostable = len(sccs[insn.id]) == 1 and insn.id not in dep_graph[insn.id] + new_stmts = [] + for stmt in kernel.statements: + boostable = len(sccs[stmt.id]) == 1 and stmt.id not in dep_graph[stmt.id] if not boostable: non_idempotently_updated_vars.update( - insn.assignee_var_names()) + stmt.assignee_var_names()) - new_insns.append(insn.copy(boostable=boostable)) + new_stmts.append(stmt.copy(boostable=boostable)) # {{{ remove boostability from isns that access non-idempotently updated vars - new2_insns = [] - for insn in new_insns: - if insn.boostable and bool( - non_idempotently_updated_vars & insn.dependency_names()): - new2_insns.append(insn.copy(boostable=False)) + new2_stmts = [] + for stmt in new_stmts: + if stmt.boostable and bool( + non_idempotently_updated_vars & stmt.dependency_names()): + new2_stmts.append(stmt.copy(boostable=False)) else: - new2_insns.append(insn) + new2_stmts.append(stmt) # }}} - return kernel.copy(instructions=new2_insns) + return kernel.copy(statements=new2_stmts) # }}} @@ -2002,47 +2002,47 @@ def find_idempotence(kernel): # {{{ limit boostability def limit_boostability(kernel): - """Finds out which other inames an instruction's inames occur with + """Finds out which other inames an statement's inames occur with and then limits boostability to just those inames. """ logger.debug("%s: limit boostability" % kernel.name) iname_occurs_with = {} - for insn in kernel.instructions: - insn_inames = kernel.insn_inames(insn) - for iname in insn_inames: - iname_occurs_with.setdefault(iname, set()).update(insn_inames) + for stmt in kernel.statements: + stmt_inames = kernel.stmt_inames(stmt) + for iname in stmt_inames: + iname_occurs_with.setdefault(iname, set()).update(stmt_inames) iname_use_counts = {} - for insn in kernel.instructions: - for iname in kernel.insn_inames(insn): + for stmt in kernel.statements: + for iname in kernel.stmt_inames(stmt): iname_use_counts[iname] = iname_use_counts.get(iname, 0) + 1 single_use_inames = set(iname for iname, uc in six.iteritems(iname_use_counts) if uc == 1) - new_insns = [] - for insn in kernel.instructions: - if insn.boostable is None: - raise LoopyError("insn '%s' has undetermined boostability" % insn.id) - elif insn.boostable: + new_stmts = [] + for stmt in kernel.statements: + if stmt.boostable is None: + raise LoopyError("stmt '%s' has undetermined boostability" % stmt.id) + elif stmt.boostable: boostable_into = set() - for iname in kernel.insn_inames(insn): + for iname in kernel.stmt_inames(stmt): boostable_into.update(iname_occurs_with[iname]) - boostable_into -= kernel.insn_inames(insn) | single_use_inames + boostable_into -= kernel.stmt_inames(stmt) | single_use_inames # Even if boostable_into is empty, leave boostable flag on--it is used # for boosting into unused hw axes. - insn = insn.copy(boostable_into=boostable_into) + stmt = stmt.copy(boostable_into=boostable_into) else: - insn = insn.copy(boostable_into=set()) + stmt = stmt.copy(boostable_into=set()) - new_insns.append(insn) + new_stmts.append(stmt) - return kernel.copy(instructions=new_insns) + return kernel.copy(statements=new_stmts) # }}} diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index abf4d799f..e585a8a39 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -66,8 +66,8 @@ class LeaveLoop(EndBlockItem): hash_fields = __slots__ = ["iname"] -class RunInstruction(ScheduleItem): - hash_fields = __slots__ = ["insn_id"] +class RunStatement(ScheduleItem): + hash_fields = __slots__ = ["stmt_id"] class CallKernel(BeginBlockItem): @@ -88,11 +88,11 @@ class Barrier(ScheduleItem): ``"local"`` or ``"global"`` - .. attribute:: originating_insn_id + .. attribute:: originating_stmt_id """ hash_fields = ["comment", "kind"] - __slots__ = hash_fields + ["originating_insn_id"] + __slots__ = hash_fields + ["originating_stmt_id"] # }}} @@ -143,12 +143,12 @@ def generate_sub_sched_items(schedule, start_idx): assert False -def get_insn_ids_for_block_at(schedule, start_idx): +def get_stmt_ids_for_block_at(schedule, start_idx): return frozenset( - sub_sched_item.insn_id + sub_sched_item.stmt_id for i, sub_sched_item in generate_sub_sched_items( schedule, start_idx) - if isinstance(sub_sched_item, RunInstruction)) + if isinstance(sub_sched_item, RunStatement)) def find_active_inames_at(kernel, sched_index): @@ -185,17 +185,17 @@ def find_used_inames_within(kernel, sched_index): if isinstance(sched_item, BeginBlockItem): loop_contents, _ = gather_schedule_block( kernel.schedule, sched_index) - run_insns = [subsched_item + run_stmts = [subsched_item for subsched_item in loop_contents - if isinstance(subsched_item, RunInstruction)] - elif isinstance(sched_item, RunInstruction): - run_insns = [sched_item] + if isinstance(subsched_item, RunStatement)] + elif isinstance(sched_item, RunStatement): + run_stmts = [sched_item] else: return set() result = set() - for sched_item in run_insns: - result.update(kernel.insn_inames(sched_item.insn_id)) + for sched_item in run_stmts: + result.update(kernel.stmt_inames(sched_item.stmt_id)) return result @@ -214,13 +214,13 @@ def find_loop_nest_with_map(kernel): if not isinstance(kernel.iname_to_tag.get(iname), (ConcurrentTag, IlpBaseTag, VectorizeTag))]) - iname_to_insns = kernel.iname_to_insns() + iname_to_stmts = kernel.iname_to_stmts() for iname in all_nonpar_inames: result[iname] = set([ other_iname - for insn in iname_to_insns[iname] - for other_iname in kernel.insn_inames(insn) & all_nonpar_inames + for stmt in iname_to_stmts[iname] + for other_iname in kernel.stmt_inames(stmt) & all_nonpar_inames ]) return result @@ -234,7 +234,7 @@ def find_loop_nest_around_map(kernel): all_inames = kernel.all_inames() - iname_to_insns = kernel.iname_to_insns() + iname_to_stmts = kernel.iname_to_stmts() # examine pairs of all inames--O(n**2), I know. from loopy.kernel.data import IlpBaseTag @@ -253,7 +253,7 @@ def find_loop_nest_around_map(kernel): # slack here. continue - if iname_to_insns[inner_iname] < iname_to_insns[outer_iname]: + if iname_to_stmts[inner_iname] < iname_to_stmts[outer_iname]: result[inner_iname].add(outer_iname) for dom_idx, dom in enumerate(kernel.domains): @@ -267,54 +267,54 @@ def find_loop_nest_around_map(kernel): return result -def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): - """Returns a dictionary mapping inames to other instruction ids that need to +def find_loop_stmt_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): + """Returns a dictionary mapping inames to other statement ids that need to be scheduled before the iname should be eligible for scheduling. """ result = {} from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag - for insn in kernel.instructions: - for iname in kernel.insn_inames(insn): + for stmt in kernel.statements: + for iname in kernel.stmt_inames(stmt): if isinstance(kernel.iname_to_tag.get(iname), ConcurrentTag): continue iname_dep = result.setdefault(iname, set()) - for dep_insn_id in insn.depends_on: - if dep_insn_id in iname_dep: + for dep_stmt_id in stmt.depends_on: + if dep_stmt_id in iname_dep: # already depending, nothing to check continue - dep_insn = kernel.id_to_insn[dep_insn_id] - dep_insn_inames = kernel.insn_inames(dep_insn) + dep_stmt = kernel.id_to_stmt[dep_stmt_id] + dep_stmt_inames = kernel.stmt_inames(dep_stmt) - if iname in dep_insn_inames: + if iname in dep_stmt_inames: # Nothing to be learned, dependency is in loop over iname # already. continue - # To make sure dep_insn belongs outside of iname, we must prove - # that all inames that dep_insn will be executed in nest + # To make sure dep_stmt belongs outside of iname, we must prove + # that all inames that dep_stmt will be executed in nest # outside of the loop over *iname*. (i.e. nested around, or # before). may_add_to_loop_dep_map = True - for dep_insn_iname in dep_insn_inames: - if dep_insn_iname in loop_nest_around_map[iname]: - # dep_insn_iname is guaranteed to nest outside of iname + for dep_stmt_iname in dep_stmt_inames: + if dep_stmt_iname in loop_nest_around_map[iname]: + # dep_stmt_iname is guaranteed to nest outside of iname # -> safe. continue - tag = kernel.iname_to_tag.get(dep_insn_iname) + tag = kernel.iname_to_tag.get(dep_stmt_iname) if isinstance(tag, (ConcurrentTag, IlpBaseTag, VectorizeTag)): # Parallel tags don't really nest, so we'll disregard # them here. continue - if dep_insn_iname not in loop_nest_with_map.get(iname, []): - # dep_insn_iname does not nest with iname, so its nest + if dep_stmt_iname not in loop_nest_with_map.get(iname, []): + # dep_stmt_iname does not nest with iname, so its nest # must occur outside. continue @@ -325,38 +325,38 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): continue logger.debug("{knl}: loop dependency map: iname '{iname}' " - "depends on '{dep_insn}' via '{insn}'" + "depends on '{dep_stmt}' via '{stmt}'" .format( knl=kernel.name, iname=iname, - dep_insn=dep_insn_id, - insn=insn.id)) + dep_stmt=dep_stmt_id, + stmt=stmt.id)) - iname_dep.add(dep_insn_id) + iname_dep.add(dep_stmt_id) return result -def group_insn_counts(kernel): +def group_stmt_counts(kernel): result = {} - for insn in kernel.instructions: - for grp in insn.groups: + for stmt in kernel.statements: + for grp in stmt.groups: result[grp] = result.get(grp, 0) + 1 return result -def gen_dependencies_except(kernel, insn_id, except_insn_ids): - insn = kernel.id_to_insn[insn_id] - for dep_id in insn.depends_on: +def gen_dependencies_except(kernel, stmt_id, except_stmt_ids): + stmt = kernel.id_to_stmt[stmt_id] + for dep_id in stmt.depends_on: - if dep_id in except_insn_ids: + if dep_id in except_stmt_ids: continue yield dep_id - for sub_dep_id in gen_dependencies_except(kernel, dep_id, except_insn_ids): + for sub_dep_id in gen_dependencies_except(kernel, dep_id, except_stmt_ids): yield sub_dep_id @@ -403,50 +403,50 @@ def get_priority_tiers(wanted, priorities): yield tier -def sched_item_to_insn_id(sched_item): +def sched_item_to_stmt_id(sched_item): # Helper for use in generator expressions, i.e. - # (... for insn_id in sched_item_to_insn_id(item) ...) - if isinstance(sched_item, RunInstruction): - yield sched_item.insn_id + # (... for stmt_id in sched_item_to_stmt_id(item) ...) + if isinstance(sched_item, RunStatement): + yield sched_item.stmt_id elif isinstance(sched_item, Barrier): - if (hasattr(sched_item, "originating_insn_id") - and sched_item.originating_insn_id is not None): - yield sched_item.originating_insn_id + if (hasattr(sched_item, "originating_stmt_id") + and sched_item.originating_stmt_id is not None): + yield sched_item.originating_stmt_id # }}} # {{{ debug help -def format_insn_id(kernel, insn_id): +def format_stmt_id(kernel, stmt_id): Fore = kernel.options._fore # noqa Style = kernel.options._style # noqa - return Fore.GREEN + insn_id + Style.RESET_ALL + return Fore.GREEN + stmt_id + Style.RESET_ALL -def format_insn(kernel, insn_id): - insn = kernel.id_to_insn[insn_id] +def format_stmt(kernel, stmt_id): + stmt = kernel.id_to_stmt[stmt_id] Fore = kernel.options._fore # noqa Style = kernel.options._style # noqa - from loopy.kernel.instruction import ( - MultiAssignmentBase, NoOpInstruction, BarrierInstruction) - if isinstance(insn, MultiAssignmentBase): + from loopy.kernel.statement import ( + MultiAssignmentBase, NoOpStatement, BarrierStatement) + if isinstance(stmt, MultiAssignmentBase): return "%s%s%s = %s%s%s {id=%s}" % ( - Fore.CYAN, ", ".join(str(a) for a in insn.assignees), Style.RESET_ALL, - Fore.MAGENTA, str(insn.expression), Style.RESET_ALL, - format_insn_id(kernel, insn_id)) - elif isinstance(insn, BarrierInstruction): + Fore.CYAN, ", ".join(str(a) for a in stmt.assignees), Style.RESET_ALL, + Fore.MAGENTA, str(stmt.expression), Style.RESET_ALL, + format_stmt_id(kernel, stmt_id)) + elif isinstance(stmt, BarrierStatement): return "[%s] %s... %sbarrier%s" % ( - format_insn_id(kernel, insn_id), - Fore.MAGENTA, insn.kind[0], Style.RESET_ALL) - elif isinstance(insn, NoOpInstruction): + format_stmt_id(kernel, stmt_id), + Fore.MAGENTA, stmt.kind[0], Style.RESET_ALL) + elif isinstance(stmt, NoOpStatement): return "[%s] %s... nop%s" % ( - format_insn_id(kernel, insn_id), + format_stmt_id(kernel, stmt_id), Fore.MAGENTA, Style.RESET_ALL) else: return "[%s] %s%s%s" % ( - format_insn_id(kernel, insn_id), - Fore.CYAN, str(insn), Style.RESET_ALL) + format_stmt_id(kernel, stmt_id), + Fore.CYAN, str(stmt), Style.RESET_ALL) def dump_schedule(kernel, schedule): @@ -471,13 +471,13 @@ def dump_schedule(kernel, schedule): elif isinstance(sched_item, ReturnFromKernel): indent = indent[:-4] lines.append(indent + "RETURN FROM KERNEL %s" % sched_item.kernel_name) - elif isinstance(sched_item, RunInstruction): - insn = kernel.id_to_insn[sched_item.insn_id] - if isinstance(insn, MultiAssignmentBase): - insn_str = format_insn(kernel, sched_item.insn_id) + elif isinstance(sched_item, RunStatement): + stmt = kernel.id_to_stmt[sched_item.stmt_id] + if isinstance(stmt, MultiAssignmentBase): + stmt_str = format_stmt(kernel, sched_item.stmt_id) else: - insn_str = sched_item.insn_id - lines.append(indent + insn_str) + stmt_str = sched_item.stmt_id + lines.append(indent + stmt_str) elif isinstance(sched_item, Barrier): lines.append(indent + "... %sbarrier" % sched_item.kind[0]) else: @@ -592,9 +592,9 @@ class SchedulerState(ImmutableRecord): .. attribute:: schedule - .. attribute:: scheduled_insn_ids + .. attribute:: scheduled_stmt_ids - .. attribute:: unscheduled_insn_ids + .. attribute:: unscheduled_stmt_ids .. attribute:: preschedule @@ -602,9 +602,9 @@ class SchedulerState(ImmutableRecord): schedule, maintaining the same relative ordering. Newly scheduled items may interleave this sequence. - .. attribute:: prescheduled_insn_ids + .. attribute:: prescheduled_stmt_ids - A :class:`frozenset` of any instruction that started prescheduled + A :class:`frozenset` of any statement that started prescheduled .. attribute:: prescheduled_inames @@ -618,14 +618,14 @@ class SchedulerState(ImmutableRecord): Whether the scheduler is inside a subkernel - .. attribute:: group_insn_counts + .. attribute:: group_stmt_counts - A mapping from instruction group names to the number of instructions + A mapping from statement group names to the number of statements contained in them. .. attribute:: active_group_counts - A mapping from instruction group names to the number of instructions + A mapping from statement group names to the number of statements in them that are left to schedule. If a group name occurs in this mapping, that group is considered active. @@ -645,8 +645,8 @@ class SchedulerState(ImmutableRecord): def generate_loop_schedules_internal( sched_state, allow_boost=False, debug=None): - # allow_insn is set to False initially and after entering each loop - # to give loops containing high-priority instructions a chance. + # allow_stmt is set to False initially and after entering each loop + # to give loops containing high-priority statements a chance. kernel = sched_state.kernel Fore = kernel.options._fore # noqa @@ -734,13 +734,13 @@ def generate_loop_schedules_internal( # {{{ see if there are pending barriers in the preschedule - # Barriers that do not have an originating instruction are handled here. + # Barriers that do not have an originating statement are handled here. # (These are automatically inserted by insert_barriers().) Barriers with - # originating instructions are handled as part of normal instruction + # originating statements are handled as part of normal statement # scheduling below. if ( isinstance(next_preschedule_item, Barrier) - and next_preschedule_item.originating_insn_id is None): + and next_preschedule_item.originating_stmt_id is None): for result in generate_loop_schedules_internal( sched_state.copy( schedule=sched_state.schedule + (next_preschedule_item,), @@ -751,134 +751,134 @@ def generate_loop_schedules_internal( # }}} - # {{{ see if any insns are ready to be scheduled now + # {{{ see if any stmts are ready to be scheduled now - # Also take note of insns that have a chance of being schedulable inside + # Also take note of stmts that have a chance of being schedulable inside # the current loop nest, in this set: - reachable_insn_ids = set() + reachable_stmt_ids = set() active_groups = frozenset(sched_state.active_group_counts) - def insn_sort_key(insn_id): - insn = kernel.id_to_insn[insn_id] + def stmt_sort_key(stmt_id): + stmt = kernel.id_to_stmt[stmt_id] - # Sort by insn.id as a last criterion to achieve deterministic + # Sort by stmt.id as a last criterion to achieve deterministic # schedule generation order. - return (insn.priority, len(active_groups & insn.groups), insn.id) + return (stmt.priority, len(active_groups & stmt.groups), stmt.id) - insn_ids_to_try = sorted( - # Non-prescheduled instructions go first. - sched_state.unscheduled_insn_ids - sched_state.prescheduled_insn_ids, - key=insn_sort_key, reverse=True) + stmt_ids_to_try = sorted( + # Non-prescheduled statements go first. + sched_state.unscheduled_stmt_ids - sched_state.prescheduled_stmt_ids, + key=stmt_sort_key, reverse=True) - insn_ids_to_try.extend( - insn_id + stmt_ids_to_try.extend( + stmt_id for item in sched_state.preschedule - for insn_id in sched_item_to_insn_id(item)) + for stmt_id in sched_item_to_stmt_id(item)) - for insn_id in insn_ids_to_try: - insn = kernel.id_to_insn[insn_id] + for stmt_id in stmt_ids_to_try: + stmt = kernel.id_to_stmt[stmt_id] - is_ready = insn.depends_on <= sched_state.scheduled_insn_ids + is_ready = stmt.depends_on <= sched_state.scheduled_stmt_ids if not is_ready: if debug_mode: - print("instruction '%s' is missing insn depedencies '%s'" % ( - format_insn(kernel, insn.id), ",".join( - insn.depends_on - sched_state.scheduled_insn_ids))) + print("statement '%s' is missing stmt depedencies '%s'" % ( + format_stmt(kernel, stmt.id), ",".join( + stmt.depends_on - sched_state.scheduled_stmt_ids))) continue - want = kernel.insn_inames(insn) - sched_state.parallel_inames + want = kernel.stmt_inames(stmt) - sched_state.parallel_inames have = active_inames_set - sched_state.parallel_inames - # If insn is boostable, it may be placed inside a more deeply + # If stmt is boostable, it may be placed inside a more deeply # nested loop without harm. orig_have = have if allow_boost: - # Note that the inames in 'insn.boostable_into' necessarily won't + # Note that the inames in 'stmt.boostable_into' necessarily won't # be contained in 'want'. - have = have - insn.boostable_into + have = have - stmt.boostable_into if want != have: is_ready = False if debug_mode: if want-have: - print("instruction '%s' is missing inames '%s'" - % (format_insn(kernel, insn.id), ",".join(want-have))) + print("statement '%s' is missing inames '%s'" + % (format_stmt(kernel, stmt.id), ",".join(want-have))) if have-want: - print("instruction '%s' won't work under inames '%s'" - % (format_insn(kernel, insn.id), ",".join(have-want))) + print("statement '%s' won't work under inames '%s'" + % (format_stmt(kernel, stmt.id), ",".join(have-want))) - # {{{ check if scheduling this insn is compatible with preschedule + # {{{ check if scheduling this stmt is compatible with preschedule - if insn_id in sched_state.prescheduled_insn_ids: - if isinstance(next_preschedule_item, RunInstruction): - next_preschedule_insn_id = next_preschedule_item.insn_id + if stmt_id in sched_state.prescheduled_stmt_ids: + if isinstance(next_preschedule_item, RunStatement): + next_preschedule_stmt_id = next_preschedule_item.stmt_id elif isinstance(next_preschedule_item, Barrier): - assert next_preschedule_item.originating_insn_id is not None - next_preschedule_insn_id = next_preschedule_item.originating_insn_id + assert next_preschedule_item.originating_stmt_id is not None + next_preschedule_stmt_id = next_preschedule_item.originating_stmt_id else: - next_preschedule_insn_id = None + next_preschedule_stmt_id = None - if next_preschedule_insn_id != insn_id: + if next_preschedule_stmt_id != stmt_id: if debug_mode: print("can't schedule '%s' because another preschedule " - "instruction precedes it" % format_insn(kernel, insn.id)) + "statement precedes it" % format_stmt(kernel, stmt.id)) is_ready = False # }}} - # {{{ check if scheduler state allows insn scheduling + # {{{ check if scheduler state allows stmt scheduling - from loopy.kernel.instruction import BarrierInstruction - if isinstance(insn, BarrierInstruction) and insn.kind == "global": + from loopy.kernel.statement import BarrierStatement + if isinstance(stmt, BarrierStatement) and stmt.kind == "global": if not sched_state.may_schedule_global_barriers: if debug_mode: print("can't schedule '%s' because global barriers are " - "not currently allowed" % format_insn(kernel, insn.id)) + "not currently allowed" % format_stmt(kernel, stmt.id)) is_ready = False else: if not sched_state.within_subkernel: if debug_mode: print("can't schedule '%s' because not within subkernel" - % format_insn(kernel, insn.id)) + % format_stmt(kernel, stmt.id)) is_ready = False # }}} # {{{ determine group-based readiness - if insn.conflicts_with_groups & active_groups: + if stmt.conflicts_with_groups & active_groups: is_ready = False if debug_mode: - print("instruction '%s' conflicts with active group(s) '%s'" - % (insn.id, ",".join( - active_groups & insn.conflicts_with_groups))) + print("statement '%s' conflicts with active group(s) '%s'" + % (stmt.id, ",".join( + active_groups & stmt.conflicts_with_groups))) # }}} # {{{ determine reachability if (not is_ready and have <= want): - reachable_insn_ids.add(insn_id) + reachable_stmt_ids.add(stmt_id) # }}} if is_ready and debug_mode: - print("ready to schedule '%s'" % format_insn(kernel, insn.id)) + print("ready to schedule '%s'" % format_stmt(kernel, stmt.id)) if is_ready and not debug_mode: - iid_set = frozenset([insn.id]) + iid_set = frozenset([stmt.id]) - # {{{ update active group counts for added instruction + # {{{ update active group counts for added statement - if insn.groups: + if stmt.groups: new_active_group_counts = sched_state.active_group_counts.copy() - for grp in insn.groups: + for grp in stmt.groups: if grp in new_active_group_counts: new_active_group_counts[grp] -= 1 if new_active_group_counts[grp] == 0: @@ -886,7 +886,7 @@ def generate_loop_schedules_internal( else: new_active_group_counts[grp] = ( - sched_state.group_insn_counts[grp] - 1) + sched_state.group_stmt_counts[grp] - 1) else: new_active_group_counts = sched_state.active_group_counts @@ -895,18 +895,18 @@ def generate_loop_schedules_internal( new_uses_of_boostability = [] if allow_boost: - if orig_have & insn.boostable_into: + if orig_have & stmt.boostable_into: new_uses_of_boostability.append( - (insn.id, orig_have & insn.boostable_into)) + (stmt.id, orig_have & stmt.boostable_into)) new_sched_state = sched_state.copy( - scheduled_insn_ids=sched_state.scheduled_insn_ids | iid_set, - unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set, + scheduled_stmt_ids=sched_state.scheduled_stmt_ids | iid_set, + unscheduled_stmt_ids=sched_state.unscheduled_stmt_ids - iid_set, schedule=( - sched_state.schedule + (RunInstruction(insn_id=insn.id),)), + sched_state.schedule + (RunStatement(stmt_id=stmt.id),)), preschedule=( sched_state.preschedule - if insn_id not in sched_state.prescheduled_insn_ids + if stmt_id not in sched_state.prescheduled_stmt_ids else sched_state.preschedule[1:]), active_group_counts=new_active_group_counts, uses_of_boostability=( @@ -923,9 +923,9 @@ def generate_loop_schedules_internal( allow_boost=rec_allow_boost, debug=debug): yield sub_sched - if not sched_state.group_insn_counts: + if not sched_state.group_stmt_counts: # No groups: We won't need to backtrack on scheduling - # instructions. + # statements. return # }}} @@ -949,21 +949,21 @@ def generate_loop_schedules_internal( can_leave = False elif last_entered_loop not in sched_state.breakable_inames: # If the iname is not breakable, then check that we've - # scheduled all the instructions that require it. + # scheduled all the statements that require it. - for insn_id in sched_state.unscheduled_insn_ids: - insn = kernel.id_to_insn[insn_id] - if last_entered_loop in kernel.insn_inames(insn): + for stmt_id in sched_state.unscheduled_stmt_ids: + stmt = kernel.id_to_stmt[stmt_id] + if last_entered_loop in kernel.stmt_inames(stmt): if debug_mode: print("cannot leave '%s' because '%s' still depends on it" - % (last_entered_loop, format_insn(kernel, insn.id))) + % (last_entered_loop, format_stmt(kernel, stmt.id))) - # check if there's a dependency of insn that needs to be + # check if there's a dependency of stmt that needs to be # outside of last_entered_loop. - for subdep_id in gen_dependencies_except(kernel, insn_id, - sched_state.scheduled_insn_ids): - subdep = kernel.id_to_insn[insn_id] - want = (kernel.insn_inames(subdep_id) + for subdep_id in gen_dependencies_except(kernel, stmt_id, + sched_state.scheduled_stmt_ids): + subdep = kernel.id_to_stmt[stmt_id] + want = (kernel.stmt_inames(subdep_id) - sched_state.parallel_inames) if ( last_entered_loop not in want and @@ -982,10 +982,10 @@ def generate_loop_schedules_internal( "warn": Fore.RED + Style.BRIGHT, "reset_all": Style.RESET_ALL, "iname": last_entered_loop, - "subdep": format_insn_id(kernel, subdep_id), - "dep": format_insn_id(kernel, insn_id), - "subdep_i": format_insn(kernel, subdep_id), - "dep_i": format_insn(kernel, insn_id), + "subdep": format_stmt_id(kernel, subdep_id), + "dep": format_stmt_id(kernel, stmt_id), + "subdep_i": format_stmt(kernel, subdep_id), + "dep_i": format_stmt(kernel, stmt_id), }) can_leave = False @@ -994,14 +994,14 @@ def generate_loop_schedules_internal( if can_leave: can_leave = False - # We may only leave this loop if we've scheduled an instruction + # We may only leave this loop if we've scheduled an statement # since entering it. - seen_an_insn = False + seen_an_stmt = False ignore_count = 0 for sched_item in sched_state.schedule[::-1]: - if isinstance(sched_item, RunInstruction): - seen_an_insn = True + if isinstance(sched_item, RunStatement): + seen_an_stmt = True elif isinstance(sched_item, LeaveLoop): ignore_count += 1 elif isinstance(sched_item, EnterLoop): @@ -1009,7 +1009,7 @@ def generate_loop_schedules_internal( ignore_count -= 1 else: assert sched_item.iname == last_entered_loop - if seen_an_insn: + if seen_an_stmt: can_leave = True break @@ -1036,10 +1036,10 @@ def generate_loop_schedules_internal( # {{{ see if any loop can be entered now - # Find inames that are being referenced by as yet unscheduled instructions. + # Find inames that are being referenced by as yet unscheduled statements. needed_inames = set() - for insn_id in sched_state.unscheduled_insn_ids: - needed_inames.update(kernel.insn_inames(insn_id)) + for stmt_id in sched_state.unscheduled_stmt_ids: + needed_inames.update(kernel.stmt_inames(stmt_id)) needed_inames = (needed_inames # There's no notion of 'entering' a parallel loop @@ -1053,8 +1053,8 @@ def generate_loop_schedules_internal( print("inames still needed :", ",".join(needed_inames)) print("active inames :", ",".join(sched_state.active_inames)) print("inames entered so far :", ",".join(sched_state.entered_inames)) - print("reachable insns:", ",".join(reachable_insn_ids)) - print("active groups (with insn counts):", ",".join( + print("reachable stmts:", ",".join(reachable_stmt_ids)) + print("active groups (with stmt counts):", ",".join( "%s: %d" % (grp, c) for grp, c in six.iteritems(sched_state.active_group_counts))) print(75*"-") @@ -1086,18 +1086,18 @@ def generate_loop_schedules_internal( continue if ( - not sched_state.loop_insn_dep_map.get(iname, set()) - <= sched_state.scheduled_insn_ids): + not sched_state.loop_stmt_dep_map.get(iname, set()) + <= sched_state.scheduled_stmt_ids): if debug_mode: print( "scheduling {iname} prohibited by loop dependency map " - "(needs '{needed_insns})'" + "(needs '{needed_stmts})'" .format( iname=iname, - needed_insns=", ".join( - sched_state.loop_insn_dep_map.get(iname, set()) + needed_stmts=", ".join( + sched_state.loop_stmt_dep_map.get(iname, set()) - - sched_state.scheduled_insn_ids))) + sched_state.scheduled_stmt_ids))) continue @@ -1120,8 +1120,8 @@ def generate_loop_schedules_internal( iname_home_domain_params & set(kernel.temporary_variables)): - writer_insn, = kernel.writer_map()[domain_par] - if writer_insn not in sched_state.scheduled_insn_ids: + writer_stmt, = kernel.writer_map()[domain_par] + if writer_stmt not in sched_state.scheduled_stmt_ids: data_dep_written = False if debug_mode: print("iname '%s' not scheduled because domain " @@ -1134,21 +1134,21 @@ def generate_loop_schedules_internal( # }}} - # {{{ determine if that gets us closer to being able to schedule an insn + # {{{ determine if that gets us closer to being able to schedule an stmt - usefulness = None # highest insn priority enabled by iname + usefulness = None # highest stmt priority enabled by iname hypothetically_active_loops = active_inames_set | set([iname]) - for insn_id in reachable_insn_ids: - insn = kernel.id_to_insn[insn_id] + for stmt_id in reachable_stmt_ids: + stmt = kernel.id_to_stmt[stmt_id] - want = kernel.insn_inames(insn) | insn.boostable_into + want = kernel.stmt_inames(stmt) | stmt.boostable_into if hypothetically_active_loops <= want: if usefulness is None: - usefulness = insn.priority + usefulness = stmt.priority else: - usefulness = max(usefulness, insn.priority) + usefulness = max(usefulness, stmt.priority) if usefulness is None: if debug_mode: @@ -1262,18 +1262,18 @@ def generate_loop_schedules_internal( if ( not sched_state.active_inames - and not sched_state.unscheduled_insn_ids + and not sched_state.unscheduled_stmt_ids and not sched_state.preschedule): # if done, yield result debug.log_success(sched_state.schedule) - for boost_insn_id, boost_inames in sched_state.uses_of_boostability: + for boost_stmt_id, boost_inames in sched_state.uses_of_boostability: warn_with_kernel( kernel, "used_boostability", - "instruction '%s' was implicitly nested inside " + "statement '%s' was implicitly nested inside " "inames '%s' based on an idempotence heuristic. " "This is deprecated and will stop working in loopy 2017.x." - % (boost_insn_id, ", ".join(boost_inames)), + % (boost_stmt_id, ", ".join(boost_inames)), DeprecationWarning) yield sched_state.schedule @@ -1296,31 +1296,31 @@ def generate_loop_schedules_internal( # {{{ filter nops from schedule def filter_nops_from_schedule(kernel, schedule): - from loopy.kernel.instruction import NoOpInstruction + from loopy.kernel.statement import NoOpStatement return [ sched_item for sched_item in schedule - if (not isinstance(sched_item, RunInstruction) - or not isinstance(kernel.id_to_insn[sched_item.insn_id], - NoOpInstruction))] + if (not isinstance(sched_item, RunStatement) + or not isinstance(kernel.id_to_stmt[sched_item.stmt_id], + NoOpStatement))] # }}} -# {{{ convert barrier instructions to proper barriers +# {{{ convert barrier statements to proper barriers -def convert_barrier_instructions_to_barriers(kernel, schedule): - from loopy.kernel.instruction import BarrierInstruction +def convert_barrier_statements_to_barriers(kernel, schedule): + from loopy.kernel.statement import BarrierStatement result = [] for sched_item in schedule: - if isinstance(sched_item, RunInstruction): - insn = kernel.id_to_insn[sched_item.insn_id] - if isinstance(insn, BarrierInstruction): + if isinstance(sched_item, RunStatement): + stmt = kernel.id_to_stmt[sched_item.stmt_id] + if isinstance(stmt, BarrierStatement): result.append(Barrier( - kind=insn.kind, - originating_insn_id=insn.id, - comment="Barrier inserted due to %s" % insn.id)) + kind=stmt.kind, + originating_stmt_id=stmt.id, + comment="Barrier inserted due to %s" % stmt.id)) continue result.append(sched_item) @@ -1336,16 +1336,16 @@ class DependencyRecord(ImmutableRecord): """ .. attribute:: source - A :class:`loopy.InstructionBase` instance. + A :class:`loopy.StatementBase` instance. .. attribute:: target - A :class:`loopy.InstructionBase` instance. + A :class:`loopy.StatementBase` instance. .. attribute:: dep_descr A string containing a phrase describing the dependency. The variables - '{src}' and '{tgt}' will be replaced by their respective instruction IDs. + '{src}' and '{tgt}' will be replaced by their respective statement IDs. .. attribute:: variable @@ -1435,10 +1435,10 @@ class DependencyTracker(object): def add_source(self, source): """ - Specify that an instruction may be used as the source of a dependency edge. + Specify that an statement may be used as the source of a dependency edge. """ - # If source is an insn ID, look up the actual instruction. - source = self.kernel.id_to_insn.get(source, source) + # If source is an stmt ID, look up the actual statement. + source = self.kernel.id_to_stmt.get(source, source) for written in self.map_to_base_storage( set(source.assignee_var_names()) & self.relevant_vars): @@ -1451,13 +1451,13 @@ class DependencyTracker(object): def gen_dependencies_with_target_at(self, target): """ Generate :class:`DependencyRecord` instances for dependencies edges - whose target is the given instruction. + whose target is the given statement. - :arg target: The ID of the instruction for which dependencies + :arg target: The ID of the statement for which dependencies with conflicting var access should be found. """ - # If target is an insn ID, look up the actual instruction. - target = self.kernel.id_to_insn.get(target, target) + # If target is an stmt ID, look up the actual statement. + target = self.kernel.id_to_stmt.get(target, target) tgt_write = self.map_to_base_storage( set(target.assignee_var_names()) & self.relevant_vars) @@ -1493,8 +1493,8 @@ class DependencyTracker(object): continue yield DependencyRecord( - source=self.kernel.id_to_insn[source], - target=self.kernel.id_to_insn[target], + source=self.kernel.id_to_stmt[source], + target=self.kernel.id_to_stmt[target], dep_descr=dep_descr, variable=var, var_kind=self.var_kind) @@ -1502,13 +1502,13 @@ class DependencyTracker(object): def describe_dependency(self, source, target): dep_descr = None - source = self.kernel.id_to_insn[source] - target = self.kernel.id_to_insn[target] + source = self.kernel.id_to_stmt[source] + target = self.kernel.id_to_stmt[target] if self.reverse: source, target = target, source - target_deps = self.kernel.recursive_insn_dep_map()[target.id] + target_deps = self.kernel.recursive_stmt_dep_map()[target.id] if source.id in target_deps: if self.reverse: dep_descr = "{tgt} rev-depends on {src}" @@ -1529,15 +1529,15 @@ def barrier_kind_more_or_equally_global(kind1, kind2): return (kind1 == kind2) or (kind1 == "global" and kind2 == "local") -def insn_ids_reaching_end_without_intervening_barrier(schedule, kind): - return _insn_ids_reaching_end(schedule, kind, reverse=False) +def stmt_ids_reaching_end_without_intervening_barrier(schedule, kind): + return _stmt_ids_reaching_end(schedule, kind, reverse=False) -def insn_ids_reachable_from_start_without_intervening_barrier(schedule, kind): - return _insn_ids_reaching_end(schedule, kind, reverse=True) +def stmt_ids_reachable_from_start_without_intervening_barrier(schedule, kind): + return _stmt_ids_reaching_end(schedule, kind, reverse=True) -def _insn_ids_reaching_end(schedule, kind, reverse): +def _stmt_ids_reaching_end(schedule, kind, reverse): if reverse: schedule = reversed(schedule) enter_scope_item_kind = LeaveLoop @@ -1546,45 +1546,45 @@ def _insn_ids_reaching_end(schedule, kind, reverse): enter_scope_item_kind = EnterLoop leave_scope_item_kind = LeaveLoop - insn_ids_alive_at_scope = [set()] + stmt_ids_alive_at_scope = [set()] for sched_item in schedule: if isinstance(sched_item, enter_scope_item_kind): - insn_ids_alive_at_scope.append(set()) + stmt_ids_alive_at_scope.append(set()) elif isinstance(sched_item, leave_scope_item_kind): - innermost_scope = insn_ids_alive_at_scope.pop() - # Instructions in deeper scopes are alive but could be killed by + innermost_scope = stmt_ids_alive_at_scope.pop() + # Statements in deeper scopes are alive but could be killed by # barriers at a shallower level, e.g.: # # for i - # insn0 + # stmt0 # end - # barrier() <= kills insn0 + # barrier() <= kills stmt0 # # Hence we merge this scope into the parent scope. - insn_ids_alive_at_scope[-1].update(innermost_scope) + stmt_ids_alive_at_scope[-1].update(innermost_scope) elif isinstance(sched_item, Barrier): - # This barrier kills only the instruction ids that are alive at + # This barrier kills only the statement ids that are alive at # the current scope (or deeper). Without further analysis, we - # can't assume that instructions at shallower scope can be + # can't assume that statements at shallower scope can be # killed by deeper barriers, since loops might be empty, e.g.: # - # insn0 <= isn't killed by barrier (i loop could be empty) + # stmt0 <= isn't killed by barrier (i loop could be empty) # for i - # insn1 <= is killed by barrier + # stmt1 <= is killed by barrier # for j - # insn2 <= is killed by barrier + # stmt2 <= is killed by barrier # end # barrier() # end if barrier_kind_more_or_equally_global(sched_item.kind, kind): - insn_ids_alive_at_scope[-1].clear() + stmt_ids_alive_at_scope[-1].clear() else: - insn_ids_alive_at_scope[-1] |= set( - insn_id for insn_id in sched_item_to_insn_id(sched_item)) + stmt_ids_alive_at_scope[-1] |= set( + stmt_id for stmt_id in sched_item_to_stmt_id(sched_item)) - assert len(insn_ids_alive_at_scope) == 1 - return insn_ids_alive_at_scope[-1] + assert len(stmt_ids_alive_at_scope) == 1 + return stmt_ids_alive_at_scope[-1] def append_barrier_or_raise_error(schedule, dep, verify_only): @@ -1594,7 +1594,7 @@ def append_barrier_or_raise_error(schedule, dep, verify_only): "Dependency '%s' (for variable '%s') " "requires synchronization " "by a %s barrier (add a 'no_sync_with' " - "instruction option to state that no " + "statement option to state that no " "synchronization is needed)" % ( dep.dep_descr.format( @@ -1608,7 +1608,7 @@ def append_barrier_or_raise_error(schedule, dep, verify_only): schedule.append(Barrier( comment=comment, kind=dep.var_kind, - originating_insn_id=None)) + originating_stmt_id=None)) def insert_barriers(kernel, schedule, kind, verify_only, level=0): @@ -1629,10 +1629,10 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): if reverse: # Populate the dependency tracker with sources from the tail end of # the schedule block. - for insn_id in ( - insn_ids_reaching_end_without_intervening_barrier( + for stmt_id in ( + stmt_ids_reaching_end_without_intervening_barrier( schedule, kind)): - dep_tracker.add_source(insn_id) + dep_tracker.add_source(stmt_id) result = [] @@ -1644,11 +1644,11 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): subloop, new_i = gather_schedule_block(schedule, i) loop_head = ( - insn_ids_reachable_from_start_without_intervening_barrier( + stmt_ids_reachable_from_start_without_intervening_barrier( subloop, kind)) loop_tail = ( - insn_ids_reaching_end_without_intervening_barrier( + stmt_ids_reaching_end_without_intervening_barrier( subloop, kind)) # Checks if a barrier is needed before the loop. This handles @@ -1662,8 +1662,8 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): # ... from itertools import chain for dep in chain.from_iterable( - dep_tracker.gen_dependencies_with_target_at(insn) - for insn in loop_head): + dep_tracker.gen_dependencies_with_target_at(stmt) + for stmt in loop_head): append_barrier_or_raise_error(result, dep, verify_only) # This barrier gets inserted outside the loop, hence it is # executed unconditionally and so kills all sources before @@ -1692,14 +1692,14 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): dep_tracker.discard_all_sources() i += 1 - elif isinstance(sched_item, RunInstruction): + elif isinstance(sched_item, RunStatement): for dep in dep_tracker.gen_dependencies_with_target_at( - sched_item.insn_id): + sched_item.stmt_id): append_barrier_or_raise_error(result, dep, verify_only) dep_tracker.discard_all_sources() break result.append(sched_item) - dep_tracker.add_source(sched_item.insn_id) + dep_tracker.add_source(sched_item.stmt_id) i += 1 elif isinstance(sched_item, (CallKernel, ReturnFromKernel)): @@ -1731,7 +1731,7 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): i = new_i elif isinstance(sched_item, - (Barrier, RunInstruction, CallKernel, ReturnFromKernel)): + (Barrier, RunStatement, CallKernel, ReturnFromKernel)): result.append(sched_item) i += 1 @@ -1756,7 +1756,7 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): def generate_loop_schedules(kernel, debug_args={}): from pytools import MinRecursionLimit - with MinRecursionLimit(max(len(kernel.instructions) * 2, + with MinRecursionLimit(max(len(kernel.statements) * 2, len(kernel.all_inames()) * 4)): for sched in generate_loop_schedules_inner(kernel, debug_args=debug_args): yield sched @@ -1778,14 +1778,14 @@ def generate_loop_schedules_inner(kernel, debug_args={}): preschedule = kernel.schedule if kernel.state == kernel_state.SCHEDULED else () prescheduled_inames = set( - insn.iname - for insn in preschedule - if isinstance(insn, EnterLoop)) + stmt.iname + for stmt in preschedule + if isinstance(stmt, EnterLoop)) - prescheduled_insn_ids = set( - insn_id + prescheduled_stmt_ids = set( + stmt_id for item in preschedule - for insn_id in sched_item_to_insn_id(item)) + for stmt_id in sched_item_to_stmt_id(item)) from loopy.kernel.data import IlpBaseTag, ConcurrentTag, VectorizeTag ilp_inames = set( @@ -1805,7 +1805,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): sched_state = SchedulerState( kernel=kernel, loop_nest_around_map=loop_nest_around_map, - loop_insn_dep_map=find_loop_insn_dep_map( + loop_stmt_dep_map=find_loop_stmt_dep_map( kernel, loop_nest_with_map=loop_nest_with_map, loop_nest_around_map=loop_nest_around_map), @@ -1814,7 +1814,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): vec_inames=vec_inames, prescheduled_inames=prescheduled_inames, - prescheduled_insn_ids=prescheduled_insn_ids, + prescheduled_stmt_ids=prescheduled_stmt_ids, # time-varying part active_inames=(), @@ -1823,8 +1823,8 @@ def generate_loop_schedules_inner(kernel, debug_args={}): schedule=(), - unscheduled_insn_ids=set(insn.id for insn in kernel.instructions), - scheduled_insn_ids=frozenset(), + unscheduled_stmt_ids=set(stmt.id for stmt in kernel.statements), + scheduled_stmt_ids=frozenset(), within_subkernel=kernel.state != kernel_state.SCHEDULED, may_schedule_global_barriers=True, @@ -1833,7 +1833,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): # ilp and vec are not parallel for the purposes of the scheduler parallel_inames=parallel_inames - ilp_inames - vec_inames, - group_insn_counts=group_insn_counts(kernel), + group_stmt_counts=group_stmt_counts(kernel), active_group_counts={}, uses_of_boostability=[]) @@ -1881,7 +1881,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): debug.stop() gen_sched = filter_nops_from_schedule(kernel, gen_sched) - gen_sched = convert_barrier_instructions_to_barriers( + gen_sched = convert_barrier_statements_to_barriers( kernel, gen_sched) gsize, lsize = kernel.get_grid_size_upper_bounds() diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 1a0789c2f..9c73fdffe 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -24,7 +24,7 @@ THE SOFTWARE. from loopy.diagnostic import LoopyError from loopy.schedule import (Barrier, CallKernel, EnterLoop, LeaveLoop, - ReturnFromKernel, RunInstruction) + ReturnFromKernel, RunStatement) from loopy.schedule.tools import get_block_boundaries @@ -72,7 +72,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): while i <= end_idx: sched_item = schedule[i] - if isinstance(sched_item, RunInstruction): + if isinstance(sched_item, RunStatement): current_chunk.append(sched_item) i += 1 @@ -145,7 +145,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): new_schedule + [dummy_return.copy()]) - # Assign names, extra_inames to CallKernel / ReturnFromKernel instructions + # Assign names, extra_inames to CallKernel / ReturnFromKernel statements inames = [] for idx, sched_item in enumerate(new_schedule): diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index f9b08d343..1801ff8e0 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -51,20 +51,20 @@ def get_block_boundaries(schedule): # {{{ subkernel tools def temporaries_read_in_subkernel(kernel, subkernel): - from loopy.kernel.tools import get_subkernel_to_insn_id_map - insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel] + from loopy.kernel.tools import get_subkernel_to_stmt_id_map + stmt_ids = get_subkernel_to_stmt_id_map(kernel)[subkernel] return frozenset(tv - for insn_id in insn_ids - for tv in kernel.id_to_insn[insn_id].read_dependency_names() + for stmt_id in stmt_ids + for tv in kernel.id_to_stmt[stmt_id].read_dependency_names() if tv in kernel.temporary_variables) def temporaries_written_in_subkernel(kernel, subkernel): - from loopy.kernel.tools import get_subkernel_to_insn_id_map - insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel] + from loopy.kernel.tools import get_subkernel_to_stmt_id_map + stmt_ids = get_subkernel_to_stmt_id_map(kernel)[subkernel] return frozenset(tv - for insn_id in insn_ids - for tv in kernel.id_to_insn[insn_id].write_dependency_names() + for stmt_id in stmt_ids + for tv in kernel.id_to_stmt[stmt_id].write_dependency_names() if tv in kernel.temporary_variables) # }}} @@ -75,7 +75,7 @@ def temporaries_written_in_subkernel(kernel, subkernel): def add_extra_args_to_schedule(kernel): """ Fill the `extra_args` fields in all the :class:`loopy.schedule.CallKernel` - instructions in the schedule with global temporaries. + statements in the schedule with global temporaries. """ new_schedule = [] from loopy.schedule import CallKernel diff --git a/loopy/statistics.py b/loopy/statistics.py index 88d7ec328..69bf555bc 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1097,7 +1097,7 @@ def count(kernel, set, space=None): return add_assumptions_guard(kernel, count) -def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): +def get_unused_hw_axes_factor(knl, stmt, disregard_local_axes, space=None): # FIXME: Multi-kernel support gsize, lsize = knl.get_grid_size_upper_bounds() @@ -1105,7 +1105,7 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): l_used = set() from loopy.kernel.data import LocalIndexTag, GroupIndexTag - for iname in knl.insn_inames(insn): + for iname in knl.stmt_inames(stmt): tag = knl.iname_to_tag.get(iname) if isinstance(tag, LocalIndexTag): @@ -1135,17 +1135,17 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): return add_assumptions_guard(knl, result) -def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False): - insn_inames = knl.insn_inames(insn) +def count_stmt_runs(knl, stmt, count_redundant_work, disregard_local_axes=False): + stmt_inames = knl.stmt_inames(stmt) if disregard_local_axes: from loopy.kernel.data import LocalIndexTag - insn_inames = [iname for iname in insn_inames if not + stmt_inames = [iname for iname in stmt_inames if not isinstance(knl.iname_to_tag.get(iname), LocalIndexTag)] - inames_domain = knl.get_inames_domain(insn_inames) + inames_domain = knl.get_inames_domain(stmt_inames) domain = (inames_domain.project_out_except( - insn_inames, [dim_type.set])) + stmt_inames, [dim_type.set])) space = isl.Space.create_from_names(isl.DEFAULT_CONTEXT, set=[], params=knl.outer_params()) @@ -1153,7 +1153,7 @@ def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False) c = count(knl, domain, space=space) if count_redundant_work: - unused_fac = get_unused_hw_axes_factor(knl, insn, + unused_fac = get_unused_hw_axes_factor(knl, stmt, disregard_local_axes=disregard_local_axes, space=space) return c * unused_fac @@ -1210,10 +1210,10 @@ def get_op_map(knl, numpy_types=True, count_redundant_work=False): op_map = ToCountMap() op_counter = ExpressionOpCounter(knl) - for insn in knl.instructions: - ops = op_counter(insn.assignee) + op_counter(insn.expression) - op_map = op_map + ops*count_insn_runs( - knl, insn, + for stmt in knl.statements: + ops = op_counter(stmt.assignee) + op_counter(stmt.expression) + op_map = op_map + ops*count_stmt_runs( + knl, stmt, count_redundant_work=count_redundant_work) if numpy_types: @@ -1296,11 +1296,11 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False): cache_holder = CacheHolder() - @memoize_in(cache_holder, "insn_count") - def get_insn_count(knl, insn_id, uniform=False): - insn = knl.id_to_insn[insn_id] - return count_insn_runs( - knl, insn, disregard_local_axes=uniform, + @memoize_in(cache_holder, "stmt_count") + def get_stmt_count(knl, stmt_id, uniform=False): + stmt = knl.id_to_stmt[stmt_id] + return count_stmt_runs( + knl, stmt, disregard_local_axes=uniform, count_redundant_work=count_redundant_work) knl = infer_unknown_types(knl, expect_completion=True) @@ -1310,13 +1310,13 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False): access_counter_g = GlobalMemAccessCounter(knl) access_counter_l = LocalMemAccessCounter(knl) - for insn in knl.instructions: + for stmt in knl.statements: access_expr = ( - access_counter_g(insn.expression) - + access_counter_l(insn.expression) + access_counter_g(stmt.expression) + + access_counter_l(stmt.expression) ).with_set_attributes(direction="load") - access_assignee_g = access_counter_g(insn.assignee).with_set_attributes( + access_assignee_g = access_counter_g(stmt.assignee).with_set_attributes( direction="store") # FIXME: (!!!!) for now, don't count writes to local mem @@ -1329,7 +1329,7 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False): access_map = ( access_map + ToCountMap({key: val}) - * get_insn_count(knl, insn.id, is_uniform)) + * get_stmt_count(knl, stmt.id, is_uniform)) #currently not counting stride of local mem access for key, val in six.iteritems(access_assignee_g.count_map): @@ -1339,7 +1339,7 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False): access_map = ( access_map + ToCountMap({key: val}) - * get_insn_count(knl, insn.id, is_uniform)) + * get_stmt_count(knl, stmt.id, is_uniform)) # for now, don't count writes to local mem if numpy_types: @@ -1387,7 +1387,7 @@ def get_synchronization_map(knl): from loopy.preprocess import preprocess_kernel, infer_unknown_types from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, - CallKernel, ReturnFromKernel, RunInstruction) + CallKernel, ReturnFromKernel, RunStatement) from operator import mul knl = infer_unknown_types(knl, expect_completion=True) knl = preprocess_kernel(knl) @@ -1424,7 +1424,7 @@ def get_synchronization_map(knl): result = result + ToCountMap( {"kernel_launch": get_count_poly(iname_list)}) - elif isinstance(sched_item, (ReturnFromKernel, RunInstruction)): + elif isinstance(sched_item, (ReturnFromKernel, RunStatement)): pass else: @@ -1459,24 +1459,24 @@ def gather_access_footprints(kernel, ignore_uncountable=False): write_footprints = [] read_footprints = [] - for insn in kernel.instructions: - if not isinstance(insn, MultiAssignmentBase): + for stmt in kernel.statements: + if not isinstance(stmt, MultiAssignmentBase): warn_with_kernel(kernel, "count_non_assignment", - "Non-assignment instruction encountered in " + "Non-assignment statement encountered in " "gather_access_footprints, not counted") continue - insn_inames = kernel.insn_inames(insn) - inames_domain = kernel.get_inames_domain(insn_inames) - domain = (inames_domain.project_out_except(insn_inames, + stmt_inames = kernel.stmt_inames(stmt) + inames_domain = kernel.get_inames_domain(stmt_inames) + domain = (inames_domain.project_out_except(stmt_inames, [dim_type.set])) afg = AccessFootprintGatherer(kernel, domain, ignore_uncountable=ignore_uncountable) - for assignee in insn.assignees: - write_footprints.append(afg(insn.assignees)) - read_footprints.append(afg(insn.expression)) + for assignee in stmt.assignees: + write_footprints.append(afg(stmt.assignees)) + read_footprints.append(afg(stmt.expression)) write_footprints = AccessFootprintGatherer.combine(write_footprints) read_footprints = AccessFootprintGatherer.combine(read_footprints) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 543c2743b..8ac520d70 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -595,7 +595,7 @@ def parse_tagged_name(expr): class ExpansionState(ImmutableRecord): """ .. attribute:: kernel - .. attribute:: instruction + .. attribute:: statement .. attribute:: stack @@ -608,8 +608,8 @@ class ExpansionState(ImmutableRecord): """ @property - def insn_id(self): - return self.instruction.id + def stmt_id(self): + return self.statement.id def apply_arg_context(self, expr): from pymbolic.mapper.substitutor import make_subst_func @@ -651,12 +651,12 @@ class SubstitutionRuleRenamer(IdentityMapper): return TaggedVariable(new_name, tag) -def rename_subst_rules_in_instructions(insns, renames): +def rename_subst_rules_in_statements(stmts, renames): subst_renamer = SubstitutionRuleRenamer(renames) return [ - insn.with_transformed_expressions(subst_renamer) - for insn in insns] + stmt.with_transformed_expressions(subst_renamer) + for stmt in stmts] class SubstitutionRuleMappingContext(object): @@ -766,11 +766,11 @@ class SubstitutionRuleMappingContext(object): def finish_kernel(self, kernel): new_substs, renames = self._get_new_substitutions_and_renames() - new_insns = rename_subst_rules_in_instructions(kernel.instructions, renames) + new_stmts = rename_subst_rules_in_statements(kernel.statements, renames) return kernel.copy( substitutions=new_substs, - instructions=new_insns) + statements=new_stmts) class RuleAwareIdentityMapper(IdentityMapper): @@ -844,30 +844,30 @@ class RuleAwareIdentityMapper(IdentityMapper): else: return sym - def __call__(self, expr, kernel, insn): - from loopy.kernel.data import InstructionBase - assert insn is None or isinstance(insn, InstructionBase) + def __call__(self, expr, kernel, stmt): + from loopy.kernel.data import StatementBase + assert stmt is None or isinstance(stmt, StatementBase) return IdentityMapper.__call__(self, expr, ExpansionState( kernel=kernel, - instruction=insn, + statement=stmt, stack=(), arg_context={})) - def map_instruction(self, kernel, insn): - return insn + def map_statement(self, kernel, stmt): + return stmt def map_kernel(self, kernel): - new_insns = [ + new_stmts = [ # While subst rules are not allowed in assignees, the mapper # may perform tasks entirely unrelated to subst rules, so # we must map assignees, too. - self.map_instruction(kernel, - insn.with_transformed_expressions(self, kernel, insn)) - for insn in kernel.instructions] + self.map_statement(kernel, + stmt.with_transformed_expressions(self, kernel, stmt)) + for stmt in kernel.statements] - return kernel.copy(instructions=new_insns) + return kernel.copy(statements=new_stmts) class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper): @@ -880,7 +880,7 @@ class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper): def map_variable(self, expr, expn_state): if (expr.name in expn_state.arg_context or not self.within( - expn_state.kernel, expn_state.instruction, expn_state.stack)): + expn_state.kernel, expn_state.statement, expn_state.stack)): return super(RuleAwareSubstitutionMapper, self).map_variable( expr, expn_state) @@ -907,7 +907,7 @@ class RuleAwareSubstitutionRuleExpander(RuleAwareIdentityMapper): new_stack = expn_state.stack + ((name, tags),) - if self.within(expn_state.kernel, expn_state.instruction, new_stack): + if self.within(expn_state.kernel, expn_state.statement, new_stack): # expand rule = self.rules[name] diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 5800a0236..f34de9150 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -201,10 +201,10 @@ class ASTBuilderBase(object): def get_image_arg_decl(self, name, shape, num_target_axes, dtype, is_written): raise NotImplementedError() - def emit_assignment(self, codegen_state, insn): + def emit_assignment(self, codegen_state, stmt): raise NotImplementedError() - def emit_multiple_assignment(self, codegen_state, insn): + def emit_multiple_assignment(self, codegen_state, stmt): raise NotImplementedError() def emit_sequential_loop(self, codegen_state, iname, iname_dtype, diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index e54ac0f69..09d138241 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -607,18 +607,18 @@ class CASTBuilder(ASTBuilderBase): return arg_decl - def emit_assignment(self, codegen_state, insn): + def emit_assignment(self, codegen_state, stmt): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper - assignee_var_name, = insn.assignee_var_names() + assignee_var_name, = stmt.assignee_var_names() lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype - if insn.atomicity is not None: + if stmt.atomicity is not None: lhs_atomicity = [ - a for a in insn.atomicity if a.var_name == assignee_var_name] + a for a in stmt.atomicity if a.var_name == assignee_var_name] assert len(lhs_atomicity) <= 1 if lhs_atomicity: lhs_atomicity, = lhs_atomicity @@ -630,13 +630,13 @@ class CASTBuilder(ASTBuilderBase): from loopy.kernel.data import AtomicInit, AtomicUpdate from loopy.expression import dtype_to_type_context - lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None) + lhs_code = ecm(stmt.assignee, prec=PREC_NONE, type_context=None) rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) if lhs_atomicity is None: from cgen import Assign return Assign( lhs_code, - ecm(insn.expression, prec=PREC_NONE, + ecm(stmt.expression, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype)) @@ -647,7 +647,7 @@ class CASTBuilder(ASTBuilderBase): codegen_state.seen_atomic_dtypes.add(lhs_dtype) return codegen_state.ast_builder.emit_atomic_update( codegen_state, lhs_atomicity, lhs_var, - insn.assignee, insn.expression, + stmt.assignee, stmt.expression, lhs_dtype, rhs_type_context) else: @@ -658,16 +658,16 @@ class CASTBuilder(ASTBuilderBase): lhs_expr, rhs_expr, lhs_dtype): raise NotImplementedError("atomic updates in %s" % type(self).__name__) - def emit_tuple_assignment(self, codegen_state, insn): + def emit_tuple_assignment(self, codegen_state, stmt): ecm = codegen_state.expression_to_code_mapper from cgen import Assign, block_if_necessary assignments = [] for i, (assignee, parameter) in enumerate( - zip(insn.assignees, insn.expression.parameters)): + zip(stmt.assignees, stmt.expression.parameters)): lhs_code = ecm(assignee, prec=PREC_NONE, type_context=None) - assignee_var_name = insn.assignee_var_names()[i] + assignee_var_name = stmt.assignee_var_names()[i] lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype @@ -681,21 +681,21 @@ class CASTBuilder(ASTBuilderBase): return block_if_necessary(assignments) - def emit_multiple_assignment(self, codegen_state, insn): + def emit_multiple_assignment(self, codegen_state, stmt): ecm = codegen_state.expression_to_code_mapper from pymbolic.primitives import Variable from pymbolic.mapper.stringifier import PREC_NONE - func_id = insn.expression.function - parameters = insn.expression.parameters + func_id = stmt.expression.function + parameters = stmt.expression.parameters if isinstance(func_id, Variable): func_id = func_id.name assignee_var_descriptors = [ codegen_state.kernel.get_var_descriptor(a) - for a in insn.assignee_var_names()] + for a in stmt.assignee_var_names()] par_dtypes = tuple(ecm.infer_type(par) for par in parameters) @@ -709,7 +709,7 @@ class CASTBuilder(ASTBuilderBase): if mangle_result.target_name == "loopy_make_tuple": # This shorcut avoids actually having to emit a 'make_tuple' function. - return self.emit_tuple_assignment(codegen_state, insn) + return self.emit_tuple_assignment(codegen_state, stmt) from loopy.expression import dtype_to_type_context c_parameters = [ @@ -727,10 +727,10 @@ class CASTBuilder(ASTBuilderBase): from pymbolic import var for i, (a, tgt_dtype) in enumerate( - zip(insn.assignees[1:], mangle_result.result_dtypes[1:])): + zip(stmt.assignees[1:], mangle_result.result_dtypes[1:])): if tgt_dtype != ecm.infer_type(a): raise LoopyError("type mismatch in %d'th (1-based) left-hand " - "side of instruction '%s'" % (i+1, insn.id)) + "side of statement '%s'" % (i+1, stmt.id)) c_parameters.append( # TODO Yuck: The "where-at function": &(...) var("&")( @@ -752,7 +752,7 @@ class CASTBuilder(ASTBuilderBase): assignee_var_descriptors[0].dtype, result) - lhs_code = ecm(insn.assignees[0], prec=PREC_NONE, type_context=None) + lhs_code = ecm(stmt.assignees[0], prec=PREC_NONE, type_context=None) from cgen import Assign return Assign( diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 2bdffb5aa..484973433 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -244,10 +244,10 @@ class CUDACASTBuilder(CASTBuilder): from cgen import Extern fdecl = Extern("C", fdecl) - from loopy.schedule import get_insn_ids_for_block_at + from loopy.schedule import get_stmt_ids_for_block_at _, local_grid_size = \ - codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( - get_insn_ids_for_block_at( + codegen_state.kernel.get_grid_sizes_for_stmt_ids_as_exprs( + get_stmt_ids_for_block_at( codegen_state.kernel.schedule, schedule_index)) from loopy.symbolic import get_dependencies @@ -294,7 +294,7 @@ class CUDACASTBuilder(CASTBuilder): def emit_barrier(self, kind, comment): """ :arg kind: ``"local"`` or ``"global"`` - :return: a :class:`loopy.codegen.GeneratedInstruction`. + :return: a :class:`loopy.codegen.GeneratedStatement`. """ if kind == "local": if comment: diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 35dade904..4d5a60595 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -362,31 +362,31 @@ class ISPCASTBuilder(CASTBuilder): from cgen.ispc import ISPCUniform return ISPCUniform(result) - def emit_assignment(self, codegen_state, insn): + def emit_assignment(self, codegen_state, stmt): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper - assignee_var_name, = insn.assignee_var_names() + assignee_var_name, = stmt.assignee_var_names() lhs_var = codegen_state.kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype - if insn.atomicity: + if stmt.atomicity: raise NotImplementedError("atomic ops in ISPC") from loopy.expression import dtype_to_type_context from pymbolic.mapper.stringifier import PREC_NONE rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) - rhs_code = ecm(insn.expression, prec=PREC_NONE, + rhs_code = ecm(stmt.expression, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype) - lhs = insn.assignee + lhs = stmt.assignee # {{{ handle streaming stores - if "!streaming_store" in insn.tags: + if "!streaming_store" in stmt.tags: ary = ecm.find_array(lhs) from loopy.kernel.array import get_access_info @@ -455,7 +455,7 @@ class ISPCASTBuilder(CASTBuilder): isinstance( kernel.iname_to_tag.get(dep), LocalIndexTag) and kernel.iname_to_tag.get(dep).axis == 0 - for dep in get_dependencies(insn.expression)) + for dep in get_dependencies(stmt.expression)) if not rhs_has_programindex: rhs_code = "broadcast(%s, 0)" % rhs_code diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index a5f7562c4..e47a7466a 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -413,9 +413,9 @@ class OpenCLCASTBuilder(CASTBuilder): from cgen.opencl import CLKernel, CLRequiredWorkGroupSize fdecl = CLKernel(fdecl) - from loopy.schedule import get_insn_ids_for_block_at - _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( - get_insn_ids_for_block_at( + from loopy.schedule import get_stmt_ids_for_block_at + _, local_sizes = codegen_state.kernel.get_grid_sizes_for_stmt_ids_as_exprs( + get_stmt_ids_for_block_at( codegen_state.kernel.schedule, schedule_index)) from loopy.symbolic import get_dependencies @@ -453,7 +453,7 @@ class OpenCLCASTBuilder(CASTBuilder): def emit_barrier(self, kind, comment): """ :arg kind: ``"local"`` or ``"global"`` - :return: a :class:`loopy.codegen.GeneratedInstruction`. + :return: a :class:`loopy.codegen.GeneratedStatement`. """ if kind == "local": if comment: diff --git a/loopy/target/python.py b/loopy/target/python.py index ce04986d3..4c3c33fb5 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -291,18 +291,18 @@ class PythonASTBuilderBase(ASTBuilderBase): from genpy import If return If(condition_str, ast) - def emit_assignment(self, codegen_state, insn): + def emit_assignment(self, codegen_state, stmt): ecm = codegen_state.expression_to_code_mapper - if insn.atomicity: + if stmt.atomicity: raise NotImplementedError("atomic ops in Python") from pymbolic.mapper.stringifier import PREC_NONE from genpy import Assign return Assign( - ecm(insn.assignee, prec=PREC_NONE, type_context=None), - ecm(insn.expression, prec=PREC_NONE, type_context=None)) + ecm(stmt.assignee, prec=PREC_NONE, type_context=None), + ecm(stmt.expression, prec=PREC_NONE, type_context=None)) # }}} diff --git a/loopy/tools.py b/loopy/tools.py index d6952d547..022bebabc 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -135,8 +135,8 @@ class LoopyEqKeyBuilder(object): Usage:: kb = LoopyEqKeyBuilder() - kb.update_for_class(insn.__class__) - kb.update_for_field("field", insn.field) + kb.update_for_class(stmt.__class__) + kb.update_for_field("field", stmt.field) ... key = kb.key() diff --git a/loopy/transform/arithmetic.py b/loopy/transform/arithmetic.py index b7f47c38a..6ec67bf5a 100644 --- a/loopy/transform/arithmetic.py +++ b/loopy/transform/arithmetic.py @@ -34,9 +34,9 @@ def fold_constants(kernel): from loopy.symbolic import ConstantFoldingMapper cfm = ConstantFoldingMapper() - new_insns = [ - insn.with_transformed_expressions(cfm) - for insn in kernel.instructions] + new_stmts = [ + stmt.with_transformed_expressions(cfm) + for stmt in kernel.statements] new_substs = dict( (sub.name, @@ -44,7 +44,7 @@ def fold_constants(kernel): for sub in six.itervalues(kernel.substitutions)) return kernel.copy( - instructions=new_insns, + statements=new_stmts, substitutions=new_substs) # }}} @@ -135,8 +135,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()): else: raise ValueError("unexpected type of access_expr") - def is_assignee(insn): - return var_name in insn.assignee_var_names() + def is_assignee(stmt): + return var_name in stmt.assignee_var_names() def iterate_as(cls, expr): if isinstance(expr, cls): @@ -151,16 +151,16 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()): from loopy.kernel.data import Assignment - for insn in kernel.instructions: - if not is_assignee(insn): + for stmt in kernel.statements: + if not is_assignee(stmt): continue - if not isinstance(insn, Assignment): + if not isinstance(stmt, Assignment): raise LoopyError("'%s' modified by non-single-assignment" % var_name) - lhs = insn.assignee - rhs = insn.expression + lhs = stmt.assignee + rhs = stmt.expression if is_zero(rhs): continue @@ -182,8 +182,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()): for part in iterate_as(Product, term): if var_name in get_dependencies(part): raise LoopyError("unexpected dependency on '%s' " - "in RHS of instruction '%s'" - % (var_name, insn.id)) + "in RHS of statement '%s'" + % (var_name, stmt.id)) product_parts = set(iterate_as(Product, term)) @@ -211,8 +211,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()): for part in iterate_as(Product, term): if var_name in get_dependencies(part): raise LoopyError("unexpected dependency on '%s' " - "in RHS of instruction '%s'" - % (var_name, insn.id)) + "in RHS of statement '%s'" + % (var_name, stmt.id)) product_parts = set(iterate_as(Product, term)) @@ -235,27 +235,27 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()): # {{{ remove common factors - new_insns = [] + new_stmts = [] - for insn in kernel.instructions: - if not isinstance(insn, Assignment) or not is_assignee(insn): - new_insns.append(insn) + for stmt in kernel.statements: + if not isinstance(stmt, Assignment) or not is_assignee(stmt): + new_stmts.append(stmt) continue - index_key = extract_index_key(insn.assignee) + index_key = extract_index_key(stmt.assignee) - lhs = insn.assignee - rhs = insn.expression + lhs = stmt.assignee + rhs = stmt.expression if is_zero(rhs): - new_insns.append(insn) + new_stmts.append(stmt) continue index_key = extract_index_key(lhs) cf_index, unif_result = find_unifiable_cf_index(index_key) if cf_index is None: - new_insns.append(insn) + new_stmts.append(stmt) continue _, my_common_factors = common_factors[cf_index] @@ -281,8 +281,8 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()): if part not in mapped_my_common_factors ])) - new_insns.append( - insn.copy(expression=flattened_sum(new_sum_terms))) + new_stmts.append( + stmt.copy(expression=flattened_sum(new_sum_terms))) # }}} @@ -314,21 +314,21 @@ def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()): else: return expr - insns = new_insns - new_insns = [] + stmts = new_stmts + new_stmts = [] subm = SubstitutionMapper(find_substitution) - for insn in insns: - if not isinstance(insn, Assignment) or is_assignee(insn): - new_insns.append(insn) + for stmt in stmts: + if not isinstance(stmt, Assignment) or is_assignee(stmt): + new_stmts.append(stmt) continue - new_insns.append(insn.with_transformed_expressions(subm)) + new_stmts.append(stmt.with_transformed_expressions(subm)) # }}} - return kernel.copy(instructions=new_insns) + return kernel.copy(statements=new_stmts) # }}} diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py index e7a86300f..e74259cc3 100644 --- a/loopy/transform/batch.py +++ b/loopy/transform/batch.py @@ -168,9 +168,9 @@ def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch", batch_iname_set = frozenset([batch_iname]) kernel = kernel.copy( - instructions=[ - insn.copy(within_inames=insn.within_inames | batch_iname_set) - for insn in kernel.instructions]) + statements=[ + stmt.copy(within_inames=stmt.within_inames | batch_iname_set) + for stmt in kernel.statements]) return kernel diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py index 1b059b6a7..c0ef05d24 100644 --- a/loopy/transform/buffer.py +++ b/loopy/transform/buffer.py @@ -52,7 +52,7 @@ class ArrayAccessReplacer(RuleAwareIdentityMapper): self.array_base_map = array_base_map self.var_name = var_name - self.modified_insn_ids = set() + self.modified_stmt_ids = set() self.buf_var = buf_var @@ -60,28 +60,28 @@ class ArrayAccessReplacer(RuleAwareIdentityMapper): result = None if expr.name == self.var_name and self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack): result = self.map_array_access((), expn_state) if result is None: return super(ArrayAccessReplacer, self).map_variable(expr, expn_state) else: - self.modified_insn_ids.add(expn_state.insn_id) + self.modified_stmt_ids.add(expn_state.stmt_id) return result def map_subscript(self, expr, expn_state): result = None if expr.aggregate.name == self.var_name and self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack): result = self.map_array_access(expr.index_tuple, expn_state) if result is None: return super(ArrayAccessReplacer, self).map_subscript(expr, expn_state) else: - self.modified_insn_ids.add(expn_state.insn_id) + self.modified_stmt_ids.add(expn_state.stmt_id) return result def map_array_access(self, index, expn_state): @@ -153,7 +153,7 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, being buffered). :arg store_expression: Either *None*, *False*, or an expression involving variables 'base' and 'buffer' (without array indices). - (*None* indicates that a default storage instruction should be used, + (*None* indicates that a default storage statement should be used, *False* indicates that no storing of the temporary should occur at all.) :arg within: If not None, limit the action of the transformation to @@ -259,14 +259,14 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, within_inames = set() access_descriptors = [] - for insn in kernel.instructions: - if not within(kernel, insn.id, ()): + for stmt in kernel.statements: + if not within(kernel, stmt.id, ()): continue from pymbolic.primitives import Variable, Subscript from loopy.symbolic import LinearSubscript - for assignee in insn.assignees: + for assignee in stmt.assignees: if isinstance(assignee, Variable): assignee_name = assignee.name index = () @@ -289,7 +289,7 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, - buffer_inames_set) access_descriptors.append( AccessDescriptor( - identifier=insn.id, + identifier=stmt.id, storage_axis_exprs=index)) # {{{ find fetch/store inames @@ -384,11 +384,11 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, # }}} - new_insns = [] + new_stmts = [] buf_var = var(buf_var_name) - # {{{ generate init instruction + # {{{ generate init statement buf_var_init = buf_var if non1_init_inames: @@ -419,9 +419,9 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, "base": init_base, }))(init_expression) - init_insn_id = kernel.make_unique_instruction_id(based_on="init_"+var_name) + init_stmt_id = kernel.make_unique_statement_id(based_on="init_"+var_name) from loopy.kernel.data import Assignment - init_instruction = Assignment(id=init_insn_id, + init_statement = Assignment(id=init_stmt_id, assignee=buf_var_init, expression=init_expression, within_inames=( @@ -439,14 +439,14 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, kernel = rule_mapping_context.finish_kernel(aar.map_kernel(kernel)) did_write = False - for insn_id in aar.modified_insn_ids: - insn = kernel.id_to_insn[insn_id] - if buf_var_name in insn.assignee_var_names(): + for stmt_id in aar.modified_stmt_ids: + stmt = kernel.id_to_stmt[stmt_id] + if buf_var_name in stmt.assignee_var_names(): did_write = True - # {{{ add init_insn_id to depends_on + # {{{ add init_stmt_id to depends_on - new_insns = [] + new_stmts = [] def none_to_empty_set(s): if s is None: @@ -454,19 +454,19 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, else: return s - for insn in kernel.instructions: - if insn.id in aar.modified_insn_ids: - new_insns.append( - insn.copy( + for stmt in kernel.statements: + if stmt.id in aar.modified_stmt_ids: + new_stmts.append( + stmt.copy( depends_on=( - none_to_empty_set(insn.depends_on) - | frozenset([init_insn_id])))) + none_to_empty_set(stmt.depends_on) + | frozenset([init_stmt_id])))) else: - new_insns.append(insn) + new_stmts.append(stmt) # }}} - # {{{ generate store instruction + # {{{ generate store statement buf_var_store = buf_var if non1_store_inames: @@ -498,10 +498,10 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, if store_expression is not False: from loopy.kernel.data import Assignment - store_instruction = Assignment( - id=kernel.make_unique_instruction_id(based_on="store_"+var_name), - depends_on=frozenset(aar.modified_insn_ids), - no_sync_with=frozenset([(init_insn_id, "any")]), + store_statement = Assignment( + id=kernel.make_unique_statement_id(based_on="store_"+var_name), + depends_on=frozenset(aar.modified_stmt_ids), + no_sync_with=frozenset([(init_stmt_id, "any")]), assignee=store_target, expression=store_expression, within_inames=( @@ -512,16 +512,16 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, # }}} - new_insns.append(init_instruction) + new_stmts.append(init_statement) if did_write: - new_insns.append(store_instruction) + new_stmts.append(store_statement) else: for iname in store_inames: del new_iname_to_tag[iname] kernel = kernel.copy( domains=new_kernel_domains, - instructions=new_insns, + statements=new_stmts, temporary_variables=new_temporary_variables) from loopy import tag_inames diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 575311b11..e2c1a5080 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -227,7 +227,7 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, :arg fetch_outer_inames: The inames within which the fetch - instruction is nested. If *None*, make an educated guess. + statement is nested. If *None*, make an educated guess. This function internally uses :func:`extract_subst` and :func:`precompute`. """ @@ -446,8 +446,8 @@ def remove_unused_arguments(knl): exp_knl = lp.expand_subst(knl) refd_vars = set(knl.all_params()) - for insn in exp_knl.instructions: - refd_vars.update(insn.dependency_names()) + for stmt in exp_knl.statements: + refd_vars.update(stmt.dependency_names()) from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag from loopy.symbolic import get_dependencies @@ -512,19 +512,19 @@ def alias_temporaries(knl, names, base_name_prefix=None, names_set = set(names) if synchronize_for_exclusive_use: - new_insns = [] - for insn in knl.instructions: - temp_deps = insn.dependency_names() & names_set + new_stmts = [] + for stmt in knl.statements: + temp_deps = stmt.dependency_names() & names_set if not temp_deps: - new_insns.append(insn) + new_stmts.append(stmt) continue if len(temp_deps) > 1: - raise LoopyError("Instruction {insn} refers to multiple of the " + raise LoopyError("Statement {stmt} refers to multiple of the " "temporaries being aliased, namely '{temps}'. Cannot alias." .format( - insn=insn.id, + stmt=stmt.id, temps=", ".join(temp_deps))) temp_name, = temp_deps @@ -534,13 +534,13 @@ def alias_temporaries(knl, names, base_name_prefix=None, frozenset(group_names[:temp_idx]) | frozenset(group_names[temp_idx+1:])) - new_insns.append( - insn.copy( - groups=insn.groups | frozenset([group_name]), + new_stmts.append( + stmt.copy( + groups=stmt.groups | frozenset([group_name]), conflicts_with_groups=( - insn.conflicts_with_groups | other_group_names))) + stmt.conflicts_with_groups | other_group_names))) else: - new_insns = knl.instructions + new_stmts = knl.statements new_temporary_variables = {} for tv in six.itervalues(knl.temporary_variables): @@ -556,7 +556,7 @@ def alias_temporaries(knl, names, base_name_prefix=None, new_temporary_variables[tv.name] = tv return knl.copy( - instructions=new_insns, + statements=new_stmts, temporary_variables=new_temporary_variables) # }}} @@ -624,7 +624,7 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False): kernel.substitutions, var_name_gen) smap = RuleAwareSubstitutionMapper(rule_mapping_context, make_subst_func(subst_dict), - within=lambda knl, insn, stack: True) + within=lambda knl, stmt, stack: True) kernel = smap.map_kernel(kernel) @@ -683,7 +683,7 @@ def set_temporary_scope(kernel, temp_var_names, scope): # {{{ reduction_arg_to_subst_rule -def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=None): +def reduction_arg_to_subst_rule(knl, inames, stmt_match=None, subst_rule_name=None): if isinstance(inames, str): inames = [s.strip() for s in inames.split(",")] @@ -731,15 +731,15 @@ def reduction_arg_to_subst_rule(knl, inames, insn_match=None, subst_rule_name=No from loopy.kernel.data import MultiAssignmentBase - new_insns = [] - for insn in knl.instructions: - if not isinstance(insn, MultiAssignmentBase): - new_insns.append(insn) + new_stmts = [] + for stmt in knl.statements: + if not isinstance(stmt, MultiAssignmentBase): + new_stmts.append(stmt) else: - new_insns.append(insn.copy(expression=cb_mapper(insn.expression))) + new_stmts.append(stmt.copy(expression=cb_mapper(stmt.expression))) return knl.copy( - instructions=new_insns, + statements=new_stmts, substitutions=substs) # }}} diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py index d4dcb3701..e762082b6 100644 --- a/loopy/transform/diff.py +++ b/loopy/transform/diff.py @@ -168,12 +168,12 @@ class DifferentiationContext(object): self.imported_outputs = set() self.output_to_diff_output = {} - self.generate_instruction_id = self.kernel.get_instruction_id_generator() + self.generate_statement_id = self.kernel.get_statement_id_generator() self.new_args = [] self.new_temporary_variables = {} - self.new_instructions = [] - self.imported_instructions = set() + self.new_statements = [] + self.imported_statements = set() self.new_domains = [] self.rule_mapping_context = SubstitutionRuleMappingContext( @@ -189,7 +189,7 @@ class DifferentiationContext(object): knl = knl.copy( args=new_args, temporary_variables=new_temp_vars, - instructions=self.new_instructions, + statements=self.new_statements, domains=knl.domains + self.new_domains) del new_args @@ -226,24 +226,24 @@ class DifferentiationContext(object): # }}} - def import_instruction_and_deps(self, insn_id): - if insn_id in self.imported_instructions: + def import_statement_and_deps(self, stmt_id): + if stmt_id in self.imported_statements: return - insn = self.kernel.id_to_insn[insn_id] - self.new_instructions.append(insn) - self.imported_instructions.add(insn_id) + stmt = self.kernel.id_to_stmt[stmt_id] + self.new_statements.append(stmt) + self.imported_statements.add(stmt_id) id_map = RuleAwareIdentityMapper(self.rule_mapping_context) - if isinstance(insn, lp.Assignment): - id_map(insn.expression, self.kernel, insn) + if isinstance(stmt, lp.Assignment): + id_map(stmt.expression, self.kernel, stmt) else: raise RuntimeError("do not know how to deal with " - "instruction of type %s" % type(insn)) + "statement of type %s" % type(stmt)) - for dep in insn.depends_on: - self.import_instruction_and_deps(dep) + for dep in stmt.depends_on: + self.import_statement_and_deps(dep) def import_output_var(self, var_name): writers = self.kernel.writer_map().get(var_name, []) @@ -255,8 +255,8 @@ class DifferentiationContext(object): if not writers: return - insn_id, = writers - self.import_instruction_and_deps(insn_id) + stmt_id, = writers + self.import_statement_and_deps(stmt_id) def get_diff_var(self, var_name): """ @@ -279,7 +279,7 @@ class DifferentiationContext(object): % var_name) orig_writer_id, = writers - orig_writer_insn = self.kernel.id_to_insn[orig_writer_id] + orig_writer_stmt = self.kernel.id_to_stmt[orig_writer_id] diff_inames = self.add_diff_inames() diff_iname_exprs = tuple(var(diname) for diname in diff_inames) @@ -289,32 +289,32 @@ class DifferentiationContext(object): diff_mapper = LoopyDiffMapper(self.rule_mapping_context, self, diff_inames) - diff_expr = diff_mapper(orig_writer_insn.expression, - self.kernel, orig_writer_insn) + diff_expr = diff_mapper(orig_writer_stmt.expression, + self.kernel, orig_writer_stmt) if not diff_expr: return None - assert isinstance(orig_writer_insn, lp.Assignment) - if isinstance(orig_writer_insn.assignee, p.Subscript): - lhs_ind = orig_writer_insn.assignee.index_tuple - elif isinstance(orig_writer_insn.assignee, p.Variable): + assert isinstance(orig_writer_stmt, lp.Assignment) + if isinstance(orig_writer_stmt.assignee, p.Subscript): + lhs_ind = orig_writer_stmt.assignee.index_tuple + elif isinstance(orig_writer_stmt.assignee, p.Variable): lhs_ind = () else: raise LoopyError( "Unrecognized LHS type in differentiation: %s" - % type(orig_writer_insn.assignee).__name__) + % type(orig_writer_stmt.assignee).__name__) - new_insn_id = self.generate_instruction_id() - insn = lp.Assignment( - id=new_insn_id, + new_stmt_id = self.generate_statement_id() + stmt = lp.Assignment( + id=new_stmt_id, assignee=var(new_var_name)[ lhs_ind + diff_iname_exprs], expression=diff_expr, within_inames=( - orig_writer_insn.within_inames | frozenset(diff_inames))) + orig_writer_stmt.within_inames | frozenset(diff_inames))) - self.new_instructions.append(insn) + self.new_statements.append(stmt) # }}} @@ -383,7 +383,7 @@ def diff_kernel(knl, diff_outputs, by, diff_iname_prefix="diff_i", var_name_gen = knl.get_var_name_generator() - # {{{ differentiate instructions + # {{{ differentiate statements diff_context = DifferentiationContext( knl, var_name_gen, by, diff_iname_prefix=diff_iname_prefix, diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py index 77c2d3ade..b9be8dd18 100644 --- a/loopy/transform/fusion.py +++ b/loopy/transform/fusion.py @@ -209,10 +209,10 @@ def _fuse_two_kernels(knla, knlb): knlb = _apply_renames_in_exprs(knlb, b_var_renames) from pymbolic.imperative.transform import \ - fuse_instruction_streams_with_unique_ids - new_instructions, old_b_id_to_new_b_id = \ - fuse_instruction_streams_with_unique_ids( - knla.instructions, knlb.instructions) + fuse_statement_streams_with_unique_ids + new_statements, old_b_id_to_new_b_id = \ + fuse_statement_streams_with_unique_ids( + knla.statements, knlb.statements) # {{{ fuse assumptions @@ -238,7 +238,7 @@ def _fuse_two_kernels(knla, knlb): from loopy.kernel import LoopKernel return LoopKernel( domains=new_domains, - instructions=new_instructions, + statements=new_statements, args=new_args, name="%s_and_%s" % (knla.name, knlb.name), preambles=_ordered_merge_lists(knla.preambles, knlb.preambles), @@ -321,10 +321,10 @@ def fuse_kernels(kernels, suffixes=None, data_flow=None): succeed. * Temporaries are automatically renamed to remain uniquely associated - with each instruction stream. + with each statement stream. - * The resulting kernel will contain all instructions from each entry - of *kernels*. Clashing instruction IDs will be renamed to ensure + * The resulting kernel will contain all statements from each entry + of *kernels*. Clashing statement IDs will be renamed to ensure uniqueness. .. versionchanged:: 2016.2 @@ -368,44 +368,44 @@ def fuse_kernels(kernels, suffixes=None, data_flow=None): # }}} - kernel_insn_ids = [] + kernel_stmt_ids = [] result = None for knlb in kernels: if result is None: result = knlb - kernel_insn_ids.append([ - insn.id for insn in knlb.instructions]) + kernel_stmt_ids.append([ + stmt.id for stmt in knlb.statements]) else: result, old_b_id_to_new_b_id = _fuse_two_kernels( knla=result, knlb=knlb) - kernel_insn_ids.append([ - old_b_id_to_new_b_id[insn.id] - for insn in knlb.instructions]) + kernel_stmt_ids.append([ + old_b_id_to_new_b_id[stmt.id] + for stmt in knlb.statements]) # {{{ realize data_flow dependencies - id_to_insn = result.id_to_insn.copy() + id_to_stmt = result.id_to_stmt.copy() for var_name, from_kernel, to_kernel in data_flow: from_writer_ids = frozenset( - insn_id - for insn_id in kernel_insn_ids[from_kernel] - if var_name in id_to_insn[insn_id].assignee_var_names()) + stmt_id + for stmt_id in kernel_stmt_ids[from_kernel] + if var_name in id_to_stmt[stmt_id].assignee_var_names()) - for insn_id in kernel_insn_ids[to_kernel]: - insn = id_to_insn[insn_id] - if var_name in insn.read_dependency_names(): - insn = insn.copy(depends_on=insn.depends_on | from_writer_ids) + for stmt_id in kernel_stmt_ids[to_kernel]: + stmt = id_to_stmt[stmt_id] + if var_name in stmt.read_dependency_names(): + stmt = stmt.copy(depends_on=stmt.depends_on | from_writer_ids) - id_to_insn[insn_id] = insn + id_to_stmt[stmt_id] = stmt - result = result.copy(instructions=[ - id_to_insn[insn_id] - for insn_ids in kernel_insn_ids - for insn_id in insn_ids]) + result = result.copy(statements=[ + id_to_stmt[stmt_id] + for stmt_ids in kernel_stmt_ids + for stmt_id in stmt_ids]) # }}} diff --git a/loopy/transform/ilp.py b/loopy/transform/ilp.py index 0ac71d603..0c86f6dc3 100644 --- a/loopy/transform/ilp.py +++ b/loopy/transform/ilp.py @@ -77,12 +77,12 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None): # {{{ find variables that need extra indices for tv in six.itervalues(kernel.temporary_variables): - for writer_insn_id in wmap.get(tv.name, []): - writer_insn = kernel.id_to_insn[writer_insn_id] + for writer_stmt_id in wmap.get(tv.name, []): + writer_stmt = kernel.id_to_stmt[writer_stmt_id] if iname is None: ilp_inames = frozenset(iname - for iname in kernel.insn_inames(writer_insn) + for iname in kernel.stmt_inames(writer_stmt) if isinstance( kernel.iname_to_tag.get(iname), (IlpBaseTag, VectorizeTag))) @@ -97,7 +97,7 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None): ilp_inames = frozenset([iname]) referenced_ilp_inames = (ilp_inames - & writer_insn.write_dependency_names()) + & writer_stmt.write_dependency_names()) new_ilp_inames = ilp_inames - referenced_ilp_inames @@ -106,10 +106,10 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None): if tv.name in var_to_new_ilp_inames: if new_ilp_inames != set(var_to_new_ilp_inames[tv.name]): - raise LoopyError("instruction '%s' requires adding " + raise LoopyError("statement '%s' requires adding " "indices for ILP inames '%s' on var '%s', but previous " - "instructions required inames '%s'" - % (writer_insn_id, ", ".join(new_ilp_inames), + "statements required inames '%s'" + % (writer_stmt_id, ", ".join(new_ilp_inames), ", ".join(var_to_new_ilp_inames[tv.name]))) continue @@ -167,30 +167,30 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None): (var_name, tuple(var(iname) for iname in inames)) for var_name, inames in six.iteritems(var_to_new_ilp_inames)) - new_insns = [] + new_stmts = [] - for insn in kernel.instructions: + for stmt in kernel.statements: eiii = ExtraInameIndexInserter(var_to_extra_iname) - new_insn = insn.with_transformed_expressions(eiii) - if not eiii.seen_ilp_inames <= insn.within_inames: + new_stmt = stmt.with_transformed_expressions(eiii) + if not eiii.seen_ilp_inames <= stmt.within_inames: from loopy.diagnostic import warn_with_kernel warn_with_kernel( kernel, "implicit_ilp_iname", - "Instruction '%s': touched variable that (for ILP) " - "required iname(s) '%s', but that the instruction was not " + "Statement '%s': touched variable that (for ILP) " + "required iname(s) '%s', but that the statement was not " "previously within the iname(s). Previously, this would " - "implicitly promote the instruction, but that behavior is " + "implicitly promote the statement, but that behavior is " "deprecated and will stop working in 2018.1." - % (insn.id, ", ".join( - eiii.seen_ilp_inames - insn.within_inames))) + % (stmt.id, ", ".join( + eiii.seen_ilp_inames - stmt.within_inames))) - new_insns.append(new_insn) + new_stmts.append(new_stmt) return kernel.copy( temporary_variables=new_temp_vars, - instructions=new_insns) + statements=new_stmts) # }}} @@ -198,14 +198,14 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None): # {{{ realize_ilp def realize_ilp(kernel, iname): - """Instruction-level parallelism (as realized by the loopy iname + """Statement-level parallelism (as realized by the loopy iname tag ``"ilp"``) provides the illusion that multiple concurrent - program instances execute in lockstep within a single instruction + program instances execute in lockstep within a single statement stream. - To do so, storage that is private to each instruction stream needs to be + To do so, storage that is private to each statement stream needs to be duplicated so that each program instance receives its own copy. Storage - that is written to in an instruction using an ILP iname but whose left-hand + that is written to in an statement using an ILP iname but whose left-hand side indices do not contain said ILP iname is marked for duplication. This storage duplication is carried out automatically at code generation diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 22fd7b3bb..105ba7bd8 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -70,7 +70,7 @@ __doc__ = """ .. autofunction:: make_reduction_inames_unique -.. autofunction:: add_inames_to_insn +.. autofunction:: add_inames_to_stmt """ @@ -137,7 +137,7 @@ class _InameSplitter(RuleAwareIdentityMapper): and self.split_iname not in expn_state.arg_context and self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack)): new_inames = list(expr.inames) new_inames.remove(self.split_iname) @@ -155,7 +155,7 @@ class _InameSplitter(RuleAwareIdentityMapper): and self.split_iname not in expn_state.arg_context and self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack)): return self.replacement_index else: @@ -246,20 +246,20 @@ def _split_iname_backend(kernel, split_iname, # {{{ update within_inames - new_insns = [] - for insn in kernel.instructions: - if split_iname in insn.within_inames: + new_stmts = [] + for stmt in kernel.statements: + if split_iname in stmt.within_inames: new_within_inames = ( - (insn.within_inames.copy() + (stmt.within_inames.copy() - frozenset([split_iname])) | frozenset([outer_iname, inner_iname])) else: - new_within_inames = insn.within_inames + new_within_inames = stmt.within_inames - insn = insn.copy( + stmt = stmt.copy( within_inames=new_within_inames) - new_insns.append(insn) + new_stmts.append(stmt) # }}} @@ -279,7 +279,7 @@ def _split_iname_backend(kernel, split_iname, kernel = kernel.copy( domains=new_domains, iname_slab_increments=iname_slab_increments, - instructions=new_insns, + statements=new_stmts, applied_iname_rewrites=applied_iname_rewrites, loop_priority=frozenset(new_priorities)) @@ -458,7 +458,7 @@ class _InameJoiner(RuleAwareSubstitutionMapper): - set(expn_state.arg_context)) if overlap and self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack): if overlap != expr_inames: raise LoopyError( @@ -563,14 +563,14 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None): return frozenset(result) - new_insns = [ - insn.copy( - within_inames=subst_within_inames(insn.within_inames)) - for insn in kernel.instructions] + new_stmts = [ + stmt.copy( + within_inames=subst_within_inames(stmt.within_inames)) + for stmt in kernel.statements] kernel = (kernel .copy( - instructions=new_insns, + statements=new_stmts, domains=domch.get_domains_with(new_domain), applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict] )) @@ -725,7 +725,7 @@ class _InameDuplicator(RuleAwareIdentityMapper): if (set(expr.inames) & self.old_inames_set and self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack)): new_inames = tuple( self.old_to_new.get(iname, iname) @@ -747,21 +747,21 @@ class _InameDuplicator(RuleAwareIdentityMapper): or expr.name in expn_state.arg_context or not self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack)): return super(_InameDuplicator, self).map_variable(expr, expn_state) else: from pymbolic import var return var(new_name) - def map_instruction(self, kernel, insn): - if not self.within(kernel, insn, ()): - return insn + def map_statement(self, kernel, stmt): + if not self.within(kernel, stmt, ()): + return stmt new_fid = frozenset( self.old_to_new.get(iname, iname) - for iname in insn.within_inames) - return insn.copy(within_inames=new_fid) + for iname in stmt.within_inames) + return stmt.copy(within_inames=new_fid) def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, @@ -854,28 +854,28 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, # {{{ iname duplication for schedulability -def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): - # Remove common inames of the current insn_deps, as they are not relevant +def _get_iname_duplication_options(stmt_deps, old_common_inames=frozenset([])): + # Remove common inames of the current stmt_deps, as they are not relevant # for splitting. - common = frozenset([]).union(*insn_deps).intersection(*insn_deps) + common = frozenset([]).union(*stmt_deps).intersection(*stmt_deps) # If common inames were found, we reduce the problem and go into recursion if common: - # Remove the common inames from the instruction dependencies - insn_deps = ( - frozenset(dep - common for dep in insn_deps) + # Remove the common inames from the statement dependencies + stmt_deps = ( + frozenset(dep - common for dep in stmt_deps) - frozenset([frozenset([])])) # Join the common inames with those previously found common = common.union(old_common_inames) # Go into recursion - for option in _get_iname_duplication_options(insn_deps, common): + for option in _get_iname_duplication_options(stmt_deps, common): yield option # Do not yield anything beyond here! return - # Try finding a partitioning of the remaining inames, such that all instructions + # Try finding a partitioning of the remaining inames, such that all statements # use only inames from one of the disjoint sets from the partitioning. def join_sets_if_not_disjoint(sets): for s1 in sets: @@ -888,7 +888,7 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): return sets, True - partitioning = insn_deps + partitioning = stmt_deps stop = False while not stop: partitioning, stop = join_sets_if_not_disjoint(partitioning) @@ -897,7 +897,7 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): # subproblems if len(partitioning) > 1: for part in partitioning: - working_set = frozenset(s for s in insn_deps if s.issubset(part)) + working_set = frozenset(s for s in stmt_deps if s.issubset(part)) for option in _get_iname_duplication_options(working_set, old_common_inames): yield option @@ -907,19 +907,19 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): # There are splitting options for all inames for iname in inames: - iname_insns = frozenset( - insn for insn in insn_deps if frozenset([iname]).issubset(insn)) + iname_stmts = frozenset( + stmt for stmt in stmt_deps if frozenset([iname]).issubset(stmt)) import itertools as it - # For a given iname, the set of instructions containing this iname + # For a given iname, the set of statements containing this iname # is inspected. For each element of the power set without the # empty and the full set, one duplication option is generated. - for insns_to_dup in it.chain.from_iterable( - it.combinations(iname_insns, l) - for l in range(1, len(iname_insns))): + for stmts_to_dup in it.chain.from_iterable( + it.combinations(iname_stmts, l) + for l in range(1, len(iname_stmts))): yield ( iname, - tuple(insn.union(old_common_inames) for insn in insns_to_dup)) + tuple(stmt.union(old_common_inames) for stmt in stmts_to_dup)) # If partitioning was empty, we have recursed successfully and yield nothing @@ -946,31 +946,31 @@ def get_iname_duplication_options(knl, use_boostable_into=False): \"\"\") In the example, there are four possibilities to resolve the problem: - * duplicating i in instruction i3 - * duplicating i in instruction i1 and i3 - * duplicating j in instruction i2 - * duplicating i in instruction i2 and i3 + * duplicating i in statement i3 + * duplicating i in statement i1 and i3 + * duplicating j in statement i2 + * duplicating i in statement i2 and i3 Use :func:`has_schedulable_iname_nesting` to decide, whether an iname needs to be duplicated in a given kernel. """ # First we extract the minimal necessary information from the kernel if use_boostable_into: - insn_deps = ( - frozenset(insn.within_inames.union( - insn.boostable_into if insn.boostable_into is not None + stmt_deps = ( + frozenset(stmt.within_inames.union( + stmt.boostable_into if stmt.boostable_into is not None else frozenset([])) - for insn in knl.instructions) + for stmt in knl.statements) - frozenset([frozenset([])])) else: - insn_deps = ( - frozenset(insn.within_inames for insn in knl.instructions) + stmt_deps = ( + frozenset(stmt.within_inames for stmt in knl.statements) - frozenset([frozenset([])])) # Get the duplication options as a tuple of iname and a set - for iname, insns in _get_iname_duplication_options(insn_deps): + for iname, stmts in _get_iname_duplication_options(stmt_deps): # Check whether this iname has a parallel tag and discard it if so from loopy.kernel.data import ConcurrentTag if (iname in knl.iname_to_tag @@ -987,7 +987,7 @@ def get_iname_duplication_options(knl, use_boostable_into=False): from warnings import warn from loopy.diagnostic import LoopyWarning warn("Kernel '%s' required the deprecated 'boostable_into' " - "instruction attribute in order to be schedulable!" % knl.name, + "statement attribute in order to be schedulable!" % knl.name, LoopyWarning) # Return to avoid yielding the duplication @@ -998,10 +998,10 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # loopy.duplicate_inames from loopy.match import Id, Or within = Or(tuple( - Id(insn.id) for insn in knl.instructions - if insn.within_inames in insns)) + Id(stmt.id) for stmt in knl.statements + if stmt.within_inames in stmts)) - # Only yield the result if an instruction matched. With + # Only yield the result if an statement matched. With # use_boostable_into=True this is not always true. if within.children: @@ -1085,18 +1085,18 @@ def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None): knl = rule_mapping_context.finish_kernel( smap.map_kernel(knl)) - new_instructions = [] - for insn in knl.instructions: - if (old_iname in insn.within_inames - and within(knl, insn, ())): - insn = insn.copy( + new_statements = [] + for stmt in knl.statements: + if (old_iname in stmt.within_inames + and within(knl, stmt, ())): + stmt = stmt.copy( within_inames=( - (insn.within_inames - frozenset([old_iname])) + (stmt.within_inames - frozenset([old_iname])) | frozenset([new_iname]))) - new_instructions.append(insn) + new_statements.append(stmt) - knl = knl.copy(instructions=new_instructions) + knl = knl.copy(statements=new_statements) else: knl = duplicate_inames( @@ -1136,10 +1136,10 @@ def remove_unused_inames(knl, inames=None): inames = set(inames) used_inames = set() - for insn in exp_knl.instructions: + for stmt in exp_knl.statements: used_inames.update( - exp_knl.insn_inames(insn.id) - | insn.reduction_inames()) + exp_knl.stmt_inames(stmt.id) + | stmt.reduction_inames()) unused_inames = inames - used_inames @@ -1184,7 +1184,7 @@ class _ReductionSplitter(RuleAwareIdentityMapper): if (self.inames <= set(expr.inames) and self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack)): leftover_inames = set(expr.inames) - self.inames @@ -1451,43 +1451,43 @@ def affine_map_inames(kernel, old_inames, new_inames, equations): # }}} - # {{{ switch iname refs in instructions + # {{{ switch iname refs in statements - def fix_iname_set(insn_id, inames): + def fix_iname_set(stmt_id, inames): if old_inames_set <= inames: return (inames - old_inames_set) | new_inames_set elif old_inames_set & inames: - raise LoopyError("instruction '%s' uses only a part (%s), not all, " + raise LoopyError("statement '%s' uses only a part (%s), not all, " "of the old inames" - % (insn_id, ", ".join(old_inames_set & inames))) + % (stmt_id, ", ".join(old_inames_set & inames))) else: return inames - new_instructions = [ - insn.copy(within_inames=fix_iname_set( - insn.id, insn.within_inames)) - for insn in kernel.instructions] + new_statements = [ + stmt.copy(within_inames=fix_iname_set( + stmt.id, stmt.within_inames)) + for stmt in kernel.statements] # }}} - return kernel.copy(domains=new_domains, instructions=new_instructions) + return kernel.copy(domains=new_domains, statements=new_statements) # }}} # {{{ find unused axes -def find_unused_axis_tag(kernel, kind, insn_match=None): +def find_unused_axis_tag(kernel, kind, stmt_match=None): """For one of the hardware-parallel execution tags, find an unused axis. - :arg insn_match: An instruction match as understood by + :arg stmt_match: An statement match as understood by :func:`loopy.match.parse_match`. :arg kind: may be "l" or "g", or the corresponding tag class name :returns: an :class:`GroupIndexTag` or :class:`LocalIndexTag` - that is not being used within the instructions matched by - *insn_match*. + that is not being used within the statements matched by + *stmt_match*. """ used_axes = set() @@ -1505,11 +1505,11 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): raise LoopyError("invlaid tag kind: %s" % kind) from loopy.match import parse_match - match = parse_match(insn_match) - insns = [insn for insn in kernel.instructions if match(kernel, insn)] + match = parse_match(stmt_match) + stmts = [stmt for stmt in kernel.statements if match(kernel, stmt)] - for insn in insns: - for iname in kernel.insn_inames(insn): + for stmt in stmts: + for iname in kernel.stmt_inames(stmt): dim_tag = kernel.iname_to_tag.get(iname) if isinstance(dim_tag, kind): @@ -1557,7 +1557,7 @@ class _ReductionInameUniquifier(RuleAwareIdentityMapper): def map_reduction(self, expr, expn_state): within = self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack) for iname in expr.inames: @@ -1645,19 +1645,19 @@ def make_reduction_inames_unique(kernel, inames=None, within=None): # }}} -# {{{ add_inames_to_insn +# {{{ add_inames_to_stmt -def add_inames_to_insn(knl, inames, insn_match): +def add_inames_to_stmt(knl, inames, stmt_match): """ :arg inames: a frozenset of inames that will be added to the - instructions matched by *insn_match*, or a comma-separated + statements matched by *stmt_match*, or a comma-separated string that parses to such a tuple. - :arg insn_match: An instruction match as understood by + :arg stmt_match: An statement match as understood by :func:`loopy.match.parse_match`. :returns: an :class:`GroupIndexTag` or :class:`LocalIndexTag` - that is not being used within the instructions matched by - *insn_match*. + that is not being used within the statements matched by + *stmt_match*. .. versionadded:: 2016.3 """ @@ -1669,18 +1669,18 @@ def add_inames_to_insn(knl, inames, insn_match): raise TypeError("'inames' must be a frozenset") from loopy.match import parse_match - match = parse_match(insn_match) + match = parse_match(stmt_match) - new_instructions = [] + new_statements = [] - for insn in knl.instructions: - if match(knl, insn): - new_instructions.append( - insn.copy(within_inames=insn.within_inames | inames)) + for stmt in knl.statements: + if match(knl, stmt): + new_statements.append( + stmt.copy(within_inames=stmt.within_inames | inames)) else: - new_instructions.append(insn) + new_statements.append(stmt) - return knl.copy(instructions=new_instructions) + return knl.copy(statements=new_statements) # }}} diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py deleted file mode 100644 index 37c5d85a1..000000000 --- a/loopy/transform/instruction.py +++ /dev/null @@ -1,339 +0,0 @@ -from __future__ import division, absolute_import - -__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -import six # noqa - -from loopy.diagnostic import LoopyError - - -# {{{ find_instructions - -def find_instructions(kernel, insn_match): - from loopy.match import parse_match - match = parse_match(insn_match) - return [insn for insn in kernel.instructions if match(kernel, insn)] - -# }}} - - -# {{{ map_instructions - -def map_instructions(kernel, insn_match, f): - from loopy.match import parse_match - match = parse_match(insn_match) - - new_insns = [] - - for insn in kernel.instructions: - if match(kernel, insn): - new_insns.append(f(insn)) - else: - new_insns.append(insn) - - return kernel.copy(instructions=new_insns) - -# }}} - - -# {{{ set_instruction_priority - -def set_instruction_priority(kernel, insn_match, priority): - """Set the priority of instructions matching *insn_match* to *priority*. - - *insn_match* may be any instruction id match understood by - :func:`loopy.match.parse_match`. - """ - - def set_prio(insn): - return insn.copy(priority=priority) - - return map_instructions(kernel, insn_match, set_prio) - -# }}} - - -# {{{ add_dependency - -def add_dependency(kernel, insn_match, depends_on): - """Add the instruction dependency *dependency* to the instructions matched - by *insn_match*. - - *insn_match* and *depends_on* may be any instruction id match understood by - :func:`loopy.match.parse_match`. - - .. versionchanged:: 2016.3 - - Third argument renamed to *depends_on* for clarity, allowed to - be not just ID but also match expression. - """ - - if isinstance(depends_on, str) and depends_on in kernel.id_to_insn: - added_deps = frozenset([depends_on]) - else: - added_deps = frozenset( - dep.id for dep in find_instructions(kernel, depends_on)) - - if not added_deps: - raise LoopyError("no instructions found matching '%s' " - "(to add as dependencies)" % depends_on) - - matched = [False] - - def add_dep(insn): - new_deps = insn.depends_on - matched[0] = True - if new_deps is None: - new_deps = added_deps - else: - new_deps = new_deps | added_deps - - return insn.copy(depends_on=new_deps) - - result = map_instructions(kernel, insn_match, add_dep) - - if not matched[0]: - raise LoopyError("no instructions found matching '%s' " - "(to which dependencies would be added)" % insn_match) - - return result - -# }}} - - -# {{{ remove_instructions - -def remove_instructions(kernel, insn_ids): - """Return a new kernel with instructions in *insn_ids* removed. - - Dependencies across (one, for now) deleted isntructions are propagated. - Behavior is undefined for now for chains of dependencies within the - set of deleted instructions. - - This also updates *no_sync_with* for all instructions. - """ - - if not insn_ids: - return kernel - - assert isinstance(insn_ids, set) - id_to_insn = kernel.id_to_insn - - new_insns = [] - for insn in kernel.instructions: - if insn.id in insn_ids: - continue - - # transitively propagate dependencies - # (only one level for now) - if insn.depends_on is None: - depends_on = frozenset() - else: - depends_on = insn.depends_on - - new_deps = depends_on - insn_ids - - for dep_id in depends_on & insn_ids: - new_deps = new_deps | id_to_insn[dep_id].depends_on - - # update no_sync_with - - new_no_sync_with = frozenset((insn_id, scope) - for insn_id, scope in insn.no_sync_with - if insn_id not in insn_ids) - - new_insns.append( - insn.copy(depends_on=new_deps, no_sync_with=new_no_sync_with)) - - return kernel.copy( - instructions=new_insns) - -# }}} - - -# {{{ replace_instruction_ids - -def replace_instruction_ids(kernel, replacements): - new_insns = [] - - for insn in kernel.instructions: - changed = False - new_depends_on = [] - new_no_sync_with = [] - - for dep in insn.depends_on: - if dep in replacements: - new_depends_on.extend(replacements[dep]) - changed = True - else: - new_depends_on.append(dep) - - for insn_id, scope in insn.no_sync_with: - if insn_id in replacements: - new_no_sync_with.extend( - (repl, scope) for repl in replacements[insn_id]) - changed = True - else: - new_no_sync_with.append((insn_id, scope)) - - new_insns.append( - insn.copy( - depends_on=frozenset(new_depends_on), - no_sync_with=frozenset(new_no_sync_with)) - if changed else insn) - - return kernel.copy(instructions=new_insns) - -# }}} - - -# {{{ tag_instructions - -def tag_instructions(kernel, new_tag, within=None): - from loopy.match import parse_match - within = parse_match(within) - - new_insns = [] - for insn in kernel.instructions: - if within(kernel, insn): - new_insns.append( - insn.copy(tags=insn.tags | frozenset([new_tag]))) - else: - new_insns.append(insn) - - return kernel.copy(instructions=new_insns) - -# }}} - - -# {{{ add nosync - -def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): - """Add a *no_sync_with* directive between *source* and *sink*. - *no_sync_with* is only added if *sink* depends on *source* or - if the instruction pair is in a conflicting group. - - This function does not check for the presence of a memory dependency. - - :arg kernel: The kernel - :arg source: Either a single instruction id, or any instruction id - match understood by :func:`loopy.match.parse_match`. - :arg sink: Either a single instruction id, or any instruction id - match understood by :func:`loopy.match.parse_match`. - :arg scope: A valid *no_sync_with* scope. See - :attr:`loopy.InstructionBase.no_sync_with` for allowable scopes. - :arg bidirectional: A :class:`bool`. If *True*, add a *no_sync_with* - to both the source and sink instructions, otherwise the directive - is only added to the sink instructions. - :arg force: A :class:`bool`. If *True*, add a *no_sync_with* directive - even without the presence of a dependency edge or conflicting - instruction group. - - :return: The updated kernel - """ - - if isinstance(source, str) and source in kernel.id_to_insn: - sources = frozenset([source]) - else: - sources = frozenset( - source.id for source in find_instructions(kernel, source)) - - if isinstance(sink, str) and sink in kernel.id_to_insn: - sinks = frozenset([sink]) - else: - sinks = frozenset( - sink.id for sink in find_instructions(kernel, sink)) - - def insns_in_conflicting_groups(insn1_id, insn2_id): - insn1 = kernel.id_to_insn[insn1_id] - insn2 = kernel.id_to_insn[insn2_id] - return ( - bool(insn1.groups & insn2.conflicts_with_groups) - or - bool(insn2.groups & insn1.conflicts_with_groups)) - - from collections import defaultdict - nosync_to_add = defaultdict(set) - - for sink in sinks: - for source in sources: - - needs_nosync = force or ( - source in kernel.recursive_insn_dep_map()[sink] - or insns_in_conflicting_groups(source, sink)) - - if not needs_nosync: - continue - - nosync_to_add[sink].add((source, scope)) - if bidirectional: - nosync_to_add[source].add((sink, scope)) - - new_instructions = list(kernel.instructions) - - for i, insn in enumerate(new_instructions): - if insn.id in nosync_to_add: - new_instructions[i] = insn.copy(no_sync_with=insn.no_sync_with - | frozenset(nosync_to_add[insn.id])) - - return kernel.copy(instructions=new_instructions) - -# }}} - - -# {{{ uniquify_instruction_ids - -def uniquify_instruction_ids(kernel): - """Converts any ids that are :class:`loopy.UniqueName` or *None* into unique - strings. - - This function does *not* deduplicate existing instruction ids. - """ - - from loopy.kernel.creation import UniqueName - - insn_ids = set( - insn.id for insn in kernel.instructions - if insn.id is not None and not isinstance(insn.id, UniqueName)) - - from pytools import UniqueNameGenerator - insn_id_gen = UniqueNameGenerator(insn_ids) - - new_instructions = [] - - for insn in kernel.instructions: - if insn.id is None: - new_instructions.append( - insn.copy(id=insn_id_gen("insn"))) - elif isinstance(insn.id, UniqueName): - new_instructions.append( - insn.copy(id=insn_id_gen(insn.id.name))) - else: - new_instructions.append(insn) - - return kernel.copy(instructions=new_instructions) - -# }}} - - -# vim: foldmethod=marker diff --git a/loopy/transform/padding.py b/loopy/transform/padding.py index d695e3595..a22b1db93 100644 --- a/loopy/transform/padding.py +++ b/loopy/transform/padding.py @@ -83,7 +83,7 @@ def split_array_dim(kernel, arrays_and_axes, count, auto_split_inames=True, elif len(rest) == 2: return rest else: - raise RuntimeError("split instruction '%s' not understood" % rest) + raise RuntimeError("split statement '%s' not understood" % rest) if isinstance(arrays_and_axes, tuple): arrays_and_axes = [arrays_and_axes] diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 6077332c4..85fc34840 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -84,7 +84,7 @@ class RuleInvocationGatherer(RuleAwareIdentityMapper): process_me = process_me and self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack) if not process_me: @@ -136,7 +136,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): access_descriptors, array_base_map, storage_axis_names, storage_axis_sources, non1_storage_axis_names, - temporary_name, compute_insn_id, compute_dep_id, + temporary_name, compute_stmt_id, compute_dep_id, compute_read_variables): super(RuleInvocationReplacer, self).__init__(rule_mapping_context) @@ -152,18 +152,18 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): self.non1_storage_axis_names = non1_storage_axis_names self.temporary_name = temporary_name - self.compute_insn_id = compute_insn_id + self.compute_stmt_id = compute_stmt_id self.compute_dep_id = compute_dep_id self.compute_read_variables = compute_read_variables - self.compute_insn_depends_on = set() + self.compute_stmt_depends_on = set() def map_substitution(self, name, tag, arguments, expn_state): if not ( name == self.subst_name and self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack) and (self.subst_tag is None or self.subst_tag == tag)): return super(RuleInvocationReplacer, self).map_substitution( @@ -222,34 +222,34 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): return new_outer_expr def map_kernel(self, kernel): - new_insns = [] + new_stmts = [] - excluded_insn_ids = set([self.compute_insn_id, self.compute_dep_id]) + excluded_stmt_ids = set([self.compute_stmt_id, self.compute_dep_id]) - for insn in kernel.instructions: + for stmt in kernel.statements: self.replaced_something = False - insn = insn.with_transformed_expressions(self, kernel, insn) + stmt = stmt.with_transformed_expressions(self, kernel, stmt) if self.replaced_something: - insn = insn.copy( + stmt = stmt.copy( depends_on=( - insn.depends_on + stmt.depends_on | frozenset([self.compute_dep_id]))) - for dep in insn.depends_on: - if dep in excluded_insn_ids: + for dep in stmt.depends_on: + if dep in excluded_stmt_ids: continue - dep_insn = kernel.id_to_insn[dep] - if (frozenset(dep_insn.assignee_var_names()) + dep_stmt = kernel.id_to_stmt[dep] + if (frozenset(dep_stmt.assignee_var_names()) & self.compute_read_variables): - self.compute_insn_depends_on.update( - insn.depends_on - excluded_insn_ids) + self.compute_stmt_depends_on.update( + stmt.depends_on - excluded_stmt_ids) - new_insns.append(insn) + new_stmts.append(stmt) - return kernel.copy(instructions=new_insns) + return kernel.copy(statements=new_stmts) # }}} @@ -260,7 +260,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, storage_axis_to_tag={}, default_tag="l.auto", dtype=None, fetch_bounding_box=False, temporary_scope=None, temporary_is_local=None, - compute_insn_id=None): + compute_stmt_id=None): """Precompute the expression described in the substitution rule determined by *subst_use* and store it in a temporary array. A precomputation needs two things to operate, a list of *sweep_inames* (order irrelevant) and an @@ -325,10 +325,10 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, May also equivalently be a comma-separated string. :arg precompute_outer_inames: A :class:`frozenset` of inames within which - the compute instruction is nested. If *None*, make an educated guess. + the compute statement is nested. If *None*, make an educated guess. May also be specified as a comma-separated string. - :arg compute_insn_id: The ID of the instruction generated to perform the + :arg compute_stmt_id: The ID of the statement generated to perform the precomputation. If `storage_axes` is not specified, it defaults to the arrangement @@ -473,11 +473,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, del rule_mapping_context import loopy as lp - for insn in kernel.instructions: - if isinstance(insn, lp.MultiAssignmentBase): - for assignee in insn.assignees: - invg(assignee, kernel, insn) - invg(insn.expression, kernel, insn) + for stmt in kernel.statements: + if isinstance(stmt, lp.MultiAssignmentBase): + for assignee in stmt.assignees: + invg(assignee, kernel, stmt) + invg(stmt.expression, kernel, stmt) access_descriptors = invg.access_descriptors if not access_descriptors: @@ -754,7 +754,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, kernel = kernel.copy(domains=new_kernel_domains) - # {{{ set up compute insn + # {{{ set up compute stmt if temporary_name is None: temporary_name = var_name_gen(based_on=c_subst_name) @@ -765,7 +765,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, assignee = assignee[ tuple(var(iname) for iname in non1_storage_axis_names)] - # {{{ process substitutions on compute instruction + # {{{ process substitutions on compute statement storage_axis_subst_dict = {} @@ -792,29 +792,29 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # }}} from loopy.kernel.data import Assignment - if compute_insn_id is None: - compute_insn_id = kernel.make_unique_instruction_id(based_on=c_subst_name) + if compute_stmt_id is None: + compute_stmt_id = kernel.make_unique_statement_id(based_on=c_subst_name) - compute_insn = Assignment( - id=compute_insn_id, + compute_stmt = Assignment( + id=compute_stmt_id, assignee=assignee, expression=compute_expression, # within_inames determined below ) - compute_dep_id = compute_insn_id - added_compute_insns = [compute_insn] + compute_dep_id = compute_stmt_id + added_compute_stmts = [compute_stmt] if temporary_scope == temp_var_scope.GLOBAL: - barrier_insn_id = kernel.make_unique_instruction_id( + barrier_stmt_id = kernel.make_unique_statement_id( based_on=c_subst_name+"_barrier") - from loopy.kernel.instruction import BarrierInstruction - barrier_insn = BarrierInstruction( - id=barrier_insn_id, - depends_on=frozenset([compute_insn_id]), + from loopy.kernel.statement import BarrierStatement + barrier_stmt = BarrierStatement( + id=barrier_stmt_id, + depends_on=frozenset([compute_stmt_id]), kind="global") - compute_dep_id = barrier_insn_id + compute_dep_id = barrier_stmt_id - added_compute_insns.append(barrier_insn) + added_compute_stmts.append(barrier_stmt) # }}} @@ -828,58 +828,58 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, access_descriptors, abm, storage_axis_names, storage_axis_sources, non1_storage_axis_names, - temporary_name, compute_insn_id, compute_dep_id, + temporary_name, compute_stmt_id, compute_dep_id, compute_read_variables=get_dependencies(expander(compute_expression))) kernel = invr.map_kernel(kernel) kernel = kernel.copy( - instructions=added_compute_insns + kernel.instructions) + statements=added_compute_stmts + kernel.statements) kernel = rule_mapping_context.finish_kernel(kernel) # }}} - # {{{ add dependencies to compute insn + # {{{ add dependencies to compute stmt kernel = kernel.copy( - instructions=[ - insn.copy(depends_on=frozenset(invr.compute_insn_depends_on)) - if insn.id == compute_insn_id - else insn - for insn in kernel.instructions]) + statements=[ + stmt.copy(depends_on=frozenset(invr.compute_stmt_depends_on)) + if stmt.id == compute_stmt_id + else stmt + for stmt in kernel.statements]) # }}} - # {{{ propagate storage iname subst to dependencies of compute instructions + # {{{ propagate storage iname subst to dependencies of compute statements from loopy.kernel.tools import find_recursive_dependencies compute_deps = find_recursive_dependencies( - kernel, frozenset([compute_insn_id])) + kernel, frozenset([compute_stmt_id])) # FIXME: Need to verify that there are no outside dependencies # on compute_deps prior_storage_axis_names = frozenset(storage_axis_subst_dict) - new_insns = [] - for insn in kernel.instructions: - if (insn.id in compute_deps - and insn.within_inames & prior_storage_axis_names): - insn = (insn + new_stmts = [] + for stmt in kernel.statements: + if (stmt.id in compute_deps + and stmt.within_inames & prior_storage_axis_names): + stmt = (stmt .with_transformed_expressions( - lambda expr: expr_subst_map(expr, kernel, insn)) + lambda expr: expr_subst_map(expr, kernel, stmt)) .copy(within_inames=frozenset( storage_axis_subst_dict.get(iname, var(iname)).name - for iname in insn.within_inames))) + for iname in stmt.within_inames))) - new_insns.append(insn) + new_stmts.append(stmt) else: - new_insns.append(insn) + new_stmts.append(stmt) - kernel = kernel.copy(instructions=new_insns) + kernel = kernel.copy(statements=new_stmts) # }}} - # {{{ determine inames for compute insn + # {{{ determine inames for compute stmt if precompute_outer_inames is None: from loopy.kernel.tools import guess_iname_deps_based_on_var_use @@ -888,7 +888,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, | frozenset( (expanding_usage_arg_deps | value_inames) - sweep_inames_set) - | guess_iname_deps_based_on_var_use(kernel, compute_insn)) + | guess_iname_deps_based_on_var_use(kernel, compute_stmt)) else: if not isinstance(precompute_outer_inames, frozenset): raise TypeError("precompute_outer_inames must be a frozenset") @@ -897,11 +897,11 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, | frozenset(non1_storage_axis_names) kernel = kernel.copy( - instructions=[ - insn.copy(within_inames=precompute_outer_inames) - if insn.id == compute_insn_id - else insn - for insn in kernel.instructions]) + statements=[ + stmt.copy(within_inames=precompute_outer_inames) + if stmt.id == compute_stmt_id + else stmt + for stmt in kernel.statements]) # }}} diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 2ba2338b0..39824fbd4 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -30,7 +30,7 @@ import six from loopy.kernel.data import auto, temp_var_scope from pytools import memoize_method, Record from loopy.schedule import ( - EnterLoop, LeaveLoop, RunInstruction, + EnterLoop, LeaveLoop, RunStatement, CallKernel, ReturnFromKernel, Barrier) from loopy.schedule.tools import get_block_boundaries @@ -51,12 +51,12 @@ __doc__ = """ class LivenessResult(dict): - class InstructionResult(Record): + class StatementResult(Record): __slots__ = ["live_in", "live_out"] @classmethod def make_empty(cls, nscheditems): - return cls((idx, cls.InstructionResult(live_in=set(), live_out=set())) + return cls((idx, cls.StatementResult(live_in=set(), live_out=set())) for idx in range(nscheditems)) @@ -83,7 +83,7 @@ class LivenessAnalysis(object): # Account for empty loop loop_end = block_bounds[sched_idx + 1] after = successors[loop_end] | set([sched_idx + 1]) - elif isinstance(next_item, (LeaveLoop, RunInstruction, + elif isinstance(next_item, (LeaveLoop, RunStatement, CallKernel, ReturnFromKernel, Barrier)): after = set([sched_idx + 1]) else: @@ -95,7 +95,7 @@ class LivenessAnalysis(object): # Account for loop loop_begin = block_bounds[sched_idx] after |= set([loop_begin]) - elif not isinstance(item, (EnterLoop, RunInstruction, + elif not isinstance(item, (EnterLoop, RunStatement, CallKernel, ReturnFromKernel, Barrier)): raise LoopyError("unexpected type of schedule item: {ty}" .format(ty=type(item).__name__)) @@ -109,13 +109,13 @@ class LivenessAnalysis(object): kill = dict((idx, set()) for idx in range(len(self.schedule))) for sched_idx, sched_item in enumerate(self.schedule): - if not isinstance(sched_item, RunInstruction): + if not isinstance(sched_item, RunStatement): continue - insn = self.kernel.id_to_insn[sched_item.insn_id] - for var in insn.assignee_var_names(): + stmt = self.kernel.id_to_stmt[sched_item.stmt_id] + for var in stmt.assignee_var_names(): if var not in self.kernel.temporary_variables: continue - if not insn.predicates: + if not stmt.predicates: # Fully kills the liveness only when unconditional. kill[sched_idx].add(var) if len(self.kernel.temporary_variables[var].shape) > 0: @@ -127,7 +127,7 @@ class LivenessAnalysis(object): # or a full write. Instead, we analyze the access # footprint later on to determine how much to reload/save. gen[sched_idx].add(var) - for var in insn.read_dependency_names(): + for var in stmt.read_dependency_names(): if var not in self.kernel.temporary_variables: continue gen[sched_idx].add(var) @@ -174,10 +174,10 @@ class LivenessAnalysis(object): def __getitem__(self, sched_idx): """ - :arg insn: An instruction name or instance of - :class:`loopy.instruction.InstructionBase` + :arg stmt: An statement name or instance of + :class:`loopy.statement.StatementBase` - :returns: A :class:`LivenessResult` associated with `insn` + :returns: A :class:`LivenessResult` associated with `stmt` """ return self.liveness()[sched_idx] @@ -238,20 +238,20 @@ class TemporarySaver(object): def __init__(self, kernel): self.kernel = kernel self.var_name_gen = kernel.get_var_name_generator() - self.insn_name_gen = kernel.get_instruction_id_generator() + self.stmt_name_gen = kernel.get_statement_id_generator() # These fields keep track of updates to the kernel. - self.insns_to_insert = [] - self.insns_to_update = {} + self.stmts_to_insert = [] + self.stmts_to_update = {} self.extra_args_to_add = {} self.updated_iname_to_tag = {} self.updated_temporary_variables = {} - # temporary name -> save or reload insn ids + # temporary name -> save or reload stmt ids from collections import defaultdict self.temporary_to_save_ids = defaultdict(set) self.temporary_to_reload_ids = defaultdict(set) - self.subkernel_to_newly_added_insn_ids = defaultdict(set) + self.subkernel_to_newly_added_stmt_ids = defaultdict(set) # Maps names of base_storage to the name of the temporary # representative chosen for saves/reloads @@ -268,9 +268,9 @@ class TemporarySaver(object): arg.name for arg in kernel.args if isinstance(arg, ValueArg))))) - def find_accessing_instructions_in_subkernel(self, temporary, subkernel): - # Find all accessing instructions in the subkernel. If base_storage is - # present, this includes instructions that access aliasing memory. + def find_accessing_statements_in_subkernel(self, temporary, subkernel): + # Find all accessing statements in the subkernel. If base_storage is + # present, this includes statements that access aliasing memory. aliasing_names = set([temporary]) base_storage = self.kernel.temporary_variables[temporary].base_storage @@ -278,24 +278,24 @@ class TemporarySaver(object): if base_storage is not None: aliasing_names |= self.base_storage_to_temporary_map[base_storage] - from loopy.kernel.tools import get_subkernel_to_insn_id_map - accessing_insns_in_subkernel = set() - subkernel_insns = get_subkernel_to_insn_id_map(self.kernel)[subkernel] + from loopy.kernel.tools import get_subkernel_to_stmt_id_map + accessing_stmts_in_subkernel = set() + subkernel_stmts = get_subkernel_to_stmt_id_map(self.kernel)[subkernel] for name in aliasing_names: try: - accessing_insns_in_subkernel |= ( - self.kernel.reader_map()[name] & subkernel_insns) + accessing_stmts_in_subkernel |= ( + self.kernel.reader_map()[name] & subkernel_stmts) except KeyError: pass try: - accessing_insns_in_subkernel |= ( - self.kernel.writer_map()[name] & subkernel_insns) + accessing_stmts_in_subkernel |= ( + self.kernel.writer_map()[name] & subkernel_stmts) except KeyError: pass - return frozenset(accessing_insns_in_subkernel) + return frozenset(accessing_stmts_in_subkernel) @property @memoize_method @@ -356,14 +356,14 @@ class TemporarySaver(object): try: pre_barrier = next(item for item in self.kernel.schedule[subkernel_start::-1] - if is_global_barrier(item)).originating_insn_id + if is_global_barrier(item)).originating_stmt_id except StopIteration: pre_barrier = None try: post_barrier = next(item for item in self.kernel.schedule[subkernel_end:] - if is_global_barrier(item)).originating_insn_id + if is_global_barrier(item)).originating_stmt_id except StopIteration: post_barrier = None @@ -379,7 +379,7 @@ class TemporarySaver(object): In the case of local temporaries, inames that are tagged hw-local do not contribute to the global storage shape. """ - accessor_insn_ids = frozenset( + accessor_stmt_ids = frozenset( self.kernel.reader_map()[temporary.name] | self.kernel.writer_map()[temporary.name]) @@ -389,13 +389,13 @@ class TemporarySaver(object): def _sortedtags(tags): return sorted(tags, key=lambda tag: tag.axis) - for insn_id in accessor_insn_ids: - insn = self.kernel.id_to_insn[insn_id] + for stmt_id in accessor_stmt_ids: + stmt = self.kernel.id_to_stmt[stmt_id] my_group_tags = [] my_local_tags = [] - for iname in insn.within_inames: + for iname in stmt.within_inames: tag = self.kernel.iname_to_tag.get(iname) if tag is None: @@ -418,25 +418,25 @@ class TemporarySaver(object): if group_tags is None: group_tags = _sortedtags(my_group_tags) local_tags = _sortedtags(my_local_tags) - group_tags_originating_insn_id = insn_id + group_tags_originating_stmt_id = stmt_id if ( group_tags != _sortedtags(my_group_tags) or local_tags != _sortedtags(my_local_tags)): raise LoopyError( - "inconsistent parallel tags across instructions that access " - "'%s' (specifically, instruction '%s' has tags '%s' but " - "instruction '%s' has tags '%s')" + "inconsistent parallel tags across statements that access " + "'%s' (specifically, statement '%s' has tags '%s' but " + "statement '%s' has tags '%s')" % (temporary.name, - group_tags_originating_insn_id, group_tags + local_tags, - insn_id, my_group_tags + my_local_tags)) + group_tags_originating_stmt_id, group_tags + local_tags, + stmt_id, my_group_tags + my_local_tags)) if group_tags is None: assert local_tags is None return (), () group_sizes, local_sizes = ( - self.kernel.get_grid_sizes_for_insn_ids_as_exprs(accessor_insn_ids)) + self.kernel.get_grid_sizes_for_stmt_ids_as_exprs(accessor_stmt_ids)) if temporary.scope == lp.temp_var_scope.LOCAL: # Elide local axes in the save slot for local temporaries. @@ -506,7 +506,7 @@ class TemporarySaver(object): self.new_subdomain = new_subdomain - save_or_load_insn_id = self.insn_name_gen( + save_or_load_stmt_id = self.stmt_name_gen( "{name}.{mode}".format(name=temporary, mode=mode)) def add_subscript_if_subscript_nonempty(agg, subscript=()): @@ -532,15 +532,15 @@ class TemporarySaver(object): if mode == "save": args = reversed(args) - accessing_insns_in_subkernel = self.find_accessing_instructions_in_subkernel( + accessing_stmts_in_subkernel = self.find_accessing_statements_in_subkernel( temporary, subkernel) if mode == "save": - depends_on = accessing_insns_in_subkernel + depends_on = accessing_stmts_in_subkernel update_deps = frozenset() elif mode == "reload": depends_on = frozenset() - update_deps = accessing_insns_in_subkernel + update_deps = accessing_stmts_in_subkernel pre_barrier, post_barrier = self.get_enclosing_global_barrier_pair(subkernel) @@ -550,11 +550,11 @@ class TemporarySaver(object): if post_barrier is not None: update_deps |= set([post_barrier]) - # Create the load / store instruction. + # Create the load / store statement. from loopy.kernel.data import Assignment - save_or_load_insn = Assignment( + save_or_load_stmt = Assignment( *args, - id=save_or_load_insn_id, + id=save_or_load_stmt_id, within_inames=( self.subkernel_to_surrounding_inames[subkernel] | frozenset(hw_inames + dim_inames)), @@ -564,18 +564,18 @@ class TemporarySaver(object): boostable_into=frozenset()) if mode == "save": - self.temporary_to_save_ids[temporary].add(save_or_load_insn_id) + self.temporary_to_save_ids[temporary].add(save_or_load_stmt_id) else: - self.temporary_to_reload_ids[temporary].add(save_or_load_insn_id) + self.temporary_to_reload_ids[temporary].add(save_or_load_stmt_id) - self.subkernel_to_newly_added_insn_ids[subkernel].add(save_or_load_insn_id) + self.subkernel_to_newly_added_stmt_ids[subkernel].add(save_or_load_stmt_id) - self.insns_to_insert.append(save_or_load_insn) + self.stmts_to_insert.append(save_or_load_stmt) - for insn_id in update_deps: - insn = self.insns_to_update.get(insn_id, self.kernel.id_to_insn[insn_id]) - self.insns_to_update[insn_id] = insn.copy( - depends_on=insn.depends_on | frozenset([save_or_load_insn_id])) + for stmt_id in update_deps: + stmt = self.stmts_to_update.get(stmt_id, self.kernel.id_to_stmt[stmt_id]) + self.stmts_to_update[stmt_id] = stmt.copy( + depends_on=stmt.depends_on | frozenset([save_or_load_stmt_id])) self.updated_temporary_variables[promoted_temporary.name] = ( promoted_temporary.as_kernel_temporary(self.kernel)) @@ -584,17 +584,17 @@ class TemporarySaver(object): @memoize_method def finish(self): - new_instructions = [] + new_statements = [] - insns_to_insert = dict((insn.id, insn) for insn in self.insns_to_insert) + stmts_to_insert = dict((stmt.id, stmt) for stmt in self.stmts_to_insert) - for orig_insn in self.kernel.instructions: - if orig_insn.id in self.insns_to_update: - new_instructions.append(self.insns_to_update[orig_insn.id]) + for orig_stmt in self.kernel.statements: + if orig_stmt.id in self.stmts_to_update: + new_statements.append(self.stmts_to_update[orig_stmt.id]) else: - new_instructions.append(orig_insn) - new_instructions.extend( - sorted(insns_to_insert.values(), key=lambda insn: insn.id)) + new_statements.append(orig_stmt) + new_statements.extend( + sorted(stmts_to_insert.values(), key=lambda stmt: stmt.id)) self.updated_iname_to_tag.update(self.kernel.iname_to_tag) self.updated_temporary_variables.update(self.kernel.temporary_variables) @@ -606,22 +606,22 @@ class TemporarySaver(object): kernel = self.kernel.copy( domains=new_domains, - instructions=new_instructions, + statements=new_statements, iname_to_tag=self.updated_iname_to_tag, temporary_variables=self.updated_temporary_variables, - overridden_get_grid_sizes_for_insn_ids=None) + overridden_get_grid_sizes_for_stmt_ids=None) # Add nosync directives to any saves or reloads that were added with a # potential dependency chain. from loopy.kernel.tools import get_subkernels for subkernel in get_subkernels(kernel): - relevant_insns = self.subkernel_to_newly_added_insn_ids[subkernel] + relevant_stmts = self.subkernel_to_newly_added_stmt_ids[subkernel] from itertools import product for temporary in self.temporary_to_reload_ids: for source, sink in product( - relevant_insns & self.temporary_to_reload_ids[temporary], - relevant_insns & self.temporary_to_save_ids[temporary]): + relevant_stmts & self.temporary_to_reload_ids[temporary], + relevant_stmts & self.temporary_to_save_ids[temporary]): kernel = lp.add_nosync(kernel, "global", source, sink) from loopy.kernel.tools import assign_automatic_axes @@ -662,7 +662,7 @@ class TemporarySaver(object): + len(promoted_temporary.hw_dims)) for dim_idx, dim_size in enumerate(promoted_temporary.non_hw_dims): - new_iname = self.insn_name_gen("{name}_{mode}_axis_{dim}_{sk}". + new_iname = self.stmt_name_gen("{name}_{mode}_axis_{dim}_{sk}". format(name=orig_temporary.name, mode=mode, dim=dim_idx, @@ -689,7 +689,7 @@ class TemporarySaver(object): # Add hardware dims. for hw_iname_idx, (hw_tag, dim) in enumerate( zip(promoted_temporary.hw_tags, promoted_temporary.hw_dims)): - new_iname = self.insn_name_gen("{name}_{mode}_hw_dim_{dim}_{sk}". + new_iname = self.stmt_name_gen("{name}_{mode}_hw_dim_{dim}_{sk}". format(name=orig_temporary.name, mode=mode, dim=hw_iname_idx, @@ -721,7 +721,7 @@ class TemporarySaver(object): def save_and_reload_temporaries(knl): """ - Add instructions to save and reload temporary variables that are live + Add statements to save and reload temporary variables that are live across kernel calls. The basic code transformation turns schedule segments:: diff --git a/loopy/transform/statement.py b/loopy/transform/statement.py new file mode 100644 index 000000000..afea0430e --- /dev/null +++ b/loopy/transform/statement.py @@ -0,0 +1,339 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six # noqa + +from loopy.diagnostic import LoopyError + + +# {{{ find_statements + +def find_statements(kernel, stmt_match): + from loopy.match import parse_match + match = parse_match(stmt_match) + return [stmt for stmt in kernel.statements if match(kernel, stmt)] + +# }}} + + +# {{{ map_statements + +def map_statements(kernel, stmt_match, f): + from loopy.match import parse_match + match = parse_match(stmt_match) + + new_stmts = [] + + for stmt in kernel.statements: + if match(kernel, stmt): + new_stmts.append(f(stmt)) + else: + new_stmts.append(stmt) + + return kernel.copy(statements=new_stmts) + +# }}} + + +# {{{ set_statement_priority + +def set_statement_priority(kernel, stmt_match, priority): + """Set the priority of statements matching *stmt_match* to *priority*. + + *stmt_match* may be any statement id match understood by + :func:`loopy.match.parse_match`. + """ + + def set_prio(stmt): + return stmt.copy(priority=priority) + + return map_statements(kernel, stmt_match, set_prio) + +# }}} + + +# {{{ add_dependency + +def add_dependency(kernel, stmt_match, depends_on): + """Add the statement dependency *dependency* to the statements matched + by *stmt_match*. + + *stmt_match* and *depends_on* may be any statement id match understood by + :func:`loopy.match.parse_match`. + + .. versionchanged:: 2016.3 + + Third argument renamed to *depends_on* for clarity, allowed to + be not just ID but also match expression. + """ + + if isinstance(depends_on, str) and depends_on in kernel.id_to_stmt: + added_deps = frozenset([depends_on]) + else: + added_deps = frozenset( + dep.id for dep in find_statements(kernel, depends_on)) + + if not added_deps: + raise LoopyError("no statements found matching '%s' " + "(to add as dependencies)" % depends_on) + + matched = [False] + + def add_dep(stmt): + new_deps = stmt.depends_on + matched[0] = True + if new_deps is None: + new_deps = added_deps + else: + new_deps = new_deps | added_deps + + return stmt.copy(depends_on=new_deps) + + result = map_statements(kernel, stmt_match, add_dep) + + if not matched[0]: + raise LoopyError("no statements found matching '%s' " + "(to which dependencies would be added)" % stmt_match) + + return result + +# }}} + + +# {{{ remove_statements + +def remove_statements(kernel, stmt_ids): + """Return a new kernel with statements in *stmt_ids* removed. + + Dependencies across (one, for now) deleted isntructions are propagated. + Behavior is undefined for now for chains of dependencies within the + set of deleted statements. + + This also updates *no_sync_with* for all statements. + """ + + if not stmt_ids: + return kernel + + assert isinstance(stmt_ids, set) + id_to_stmt = kernel.id_to_stmt + + new_stmts = [] + for stmt in kernel.statements: + if stmt.id in stmt_ids: + continue + + # transitively propagate dependencies + # (only one level for now) + if stmt.depends_on is None: + depends_on = frozenset() + else: + depends_on = stmt.depends_on + + new_deps = depends_on - stmt_ids + + for dep_id in depends_on & stmt_ids: + new_deps = new_deps | id_to_stmt[dep_id].depends_on + + # update no_sync_with + + new_no_sync_with = frozenset((stmt_id, scope) + for stmt_id, scope in stmt.no_sync_with + if stmt_id not in stmt_ids) + + new_stmts.append( + stmt.copy(depends_on=new_deps, no_sync_with=new_no_sync_with)) + + return kernel.copy( + statements=new_stmts) + +# }}} + + +# {{{ replace_statement_ids + +def replace_statement_ids(kernel, replacements): + new_stmts = [] + + for stmt in kernel.statements: + changed = False + new_depends_on = [] + new_no_sync_with = [] + + for dep in stmt.depends_on: + if dep in replacements: + new_depends_on.extend(replacements[dep]) + changed = True + else: + new_depends_on.append(dep) + + for stmt_id, scope in stmt.no_sync_with: + if stmt_id in replacements: + new_no_sync_with.extend( + (repl, scope) for repl in replacements[stmt_id]) + changed = True + else: + new_no_sync_with.append((stmt_id, scope)) + + new_stmts.append( + stmt.copy( + depends_on=frozenset(new_depends_on), + no_sync_with=frozenset(new_no_sync_with)) + if changed else stmt) + + return kernel.copy(statements=new_stmts) + +# }}} + + +# {{{ tag_statements + +def tag_statements(kernel, new_tag, within=None): + from loopy.match import parse_match + within = parse_match(within) + + new_stmts = [] + for stmt in kernel.statements: + if within(kernel, stmt): + new_stmts.append( + stmt.copy(tags=stmt.tags | frozenset([new_tag]))) + else: + new_stmts.append(stmt) + + return kernel.copy(statements=new_stmts) + +# }}} + + +# {{{ add nosync + +def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): + """Add a *no_sync_with* directive between *source* and *sink*. + *no_sync_with* is only added if *sink* depends on *source* or + if the statement pair is in a conflicting group. + + This function does not check for the presence of a memory dependency. + + :arg kernel: The kernel + :arg source: Either a single statement id, or any statement id + match understood by :func:`loopy.match.parse_match`. + :arg sink: Either a single statement id, or any statement id + match understood by :func:`loopy.match.parse_match`. + :arg scope: A valid *no_sync_with* scope. See + :attr:`loopy.StatementBase.no_sync_with` for allowable scopes. + :arg bidirectional: A :class:`bool`. If *True*, add a *no_sync_with* + to both the source and sink statements, otherwise the directive + is only added to the sink statements. + :arg force: A :class:`bool`. If *True*, add a *no_sync_with* directive + even without the presence of a dependency edge or conflicting + statement group. + + :return: The updated kernel + """ + + if isinstance(source, str) and source in kernel.id_to_stmt: + sources = frozenset([source]) + else: + sources = frozenset( + source.id for source in find_statements(kernel, source)) + + if isinstance(sink, str) and sink in kernel.id_to_stmt: + sinks = frozenset([sink]) + else: + sinks = frozenset( + sink.id for sink in find_statements(kernel, sink)) + + def stmts_in_conflicting_groups(stmt1_id, stmt2_id): + stmt1 = kernel.id_to_stmt[stmt1_id] + stmt2 = kernel.id_to_stmt[stmt2_id] + return ( + bool(stmt1.groups & stmt2.conflicts_with_groups) + or + bool(stmt2.groups & stmt1.conflicts_with_groups)) + + from collections import defaultdict + nosync_to_add = defaultdict(set) + + for sink in sinks: + for source in sources: + + needs_nosync = force or ( + source in kernel.recursive_stmt_dep_map()[sink] + or stmts_in_conflicting_groups(source, sink)) + + if not needs_nosync: + continue + + nosync_to_add[sink].add((source, scope)) + if bidirectional: + nosync_to_add[source].add((sink, scope)) + + new_statements = list(kernel.statements) + + for i, stmt in enumerate(new_statements): + if stmt.id in nosync_to_add: + new_statements[i] = stmt.copy(no_sync_with=stmt.no_sync_with + | frozenset(nosync_to_add[stmt.id])) + + return kernel.copy(statements=new_statements) + +# }}} + + +# {{{ uniquify_statement_ids + +def uniquify_statement_ids(kernel): + """Converts any ids that are :class:`loopy.UniqueName` or *None* into unique + strings. + + This function does *not* deduplicate existing statement ids. + """ + + from loopy.kernel.creation import UniqueName + + stmt_ids = set( + stmt.id for stmt in kernel.statements + if stmt.id is not None and not isinstance(stmt.id, UniqueName)) + + from pytools import UniqueNameGenerator + stmt_id_gen = UniqueNameGenerator(stmt_ids) + + new_statements = [] + + for stmt in kernel.statements: + if stmt.id is None: + new_statements.append( + stmt.copy(id=stmt_id_gen("stmt"))) + elif isinstance(stmt.id, UniqueName): + new_statements.append( + stmt.copy(id=stmt_id_gen(stmt.id.name))) + else: + new_statements.append(stmt) + + return kernel.copy(statements=new_statements) + +# }}} + + +# vim: foldmethod=marker diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index 79ceff9fd..0fb706e2d 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -40,7 +40,7 @@ logger = logging.getLogger(__name__) class ExprDescriptor(ImmutableRecord): - __slots__ = ["insn", "expr", "unif_var_dict"] + __slots__ = ["stmt", "expr", "unif_var_dict"] # {{{ extract_subst @@ -128,7 +128,7 @@ def extract_subst(kernel, subst_name, template, parameters=()): expr_descriptors.append( ExprDescriptor( - insn=insn, + stmt=stmt, expr=expr, unif_var_dict=dict((lhs.name, rhs) for lhs, rhs in urec.equations))) @@ -140,8 +140,8 @@ def extract_subst(kernel, subst_name, template, parameters=()): CallbackMapper, WalkMapper, IdentityMapper) dfmapper = CallbackMapper(gather_exprs, WalkMapper()) - for insn in kernel.instructions: - dfmapper(insn.expression) + for stmt in kernel.statements: + dfmapper(stmt.expression) for sr in six.itervalues(kernel.substitutions): dfmapper(sr.expression) @@ -151,7 +151,7 @@ def extract_subst(kernel, subst_name, template, parameters=()): if not expr_descriptors: raise RuntimeError("no expressions matching '%s'" % template) - # {{{ substitute rule into instructions + # {{{ substitute rule into statements def replace_exprs(expr, mapper): found = False @@ -175,11 +175,11 @@ def extract_subst(kernel, subst_name, template, parameters=()): cbmapper = CallbackMapper(replace_exprs, IdentityMapper()) - new_insns = [] + new_stmts = [] - for insn in kernel.instructions: - new_expr = cbmapper(insn.expression) - new_insns.append(insn.copy(expression=new_expr)) + for stmt in kernel.statements: + new_expr = cbmapper(stmt.expression) + new_stmts.append(stmt.copy(expression=new_expr)) from loopy.kernel.data import SubstitutionRule new_substs = { @@ -196,7 +196,7 @@ def extract_subst(kernel, subst_name, template, parameters=()): # }}} return kernel.copy( - instructions=new_insns, + statements=new_stmts, substitutions=new_substs) # }}} @@ -205,14 +205,14 @@ def extract_subst(kernel, subst_name, template, parameters=()): # {{{ assignment_to_subst class AssignmentToSubstChanger(RuleAwareIdentityMapper): - def __init__(self, rule_mapping_context, lhs_name, definition_insn_ids, + def __init__(self, rule_mapping_context, lhs_name, definition_stmt_ids, usage_to_definition, extra_arguments, within): self.var_name_gen = rule_mapping_context.make_unique_var_name super(AssignmentToSubstChanger, self).__init__(rule_mapping_context) self.lhs_name = lhs_name - self.definition_insn_ids = definition_insn_ids + self.definition_stmt_ids = definition_stmt_ids self.usage_to_definition = usage_to_definition from pymbolic import var @@ -220,18 +220,18 @@ class AssignmentToSubstChanger(RuleAwareIdentityMapper): self.within = within - self.definition_insn_id_to_subst_name = {} + self.definition_stmt_id_to_subst_name = {} self.saw_unmatched_usage_sites = {} - for def_id in self.definition_insn_ids: + for def_id in self.definition_stmt_ids: self.saw_unmatched_usage_sites[def_id] = False - def get_subst_name(self, def_insn_id): + def get_subst_name(self, def_stmt_id): try: - return self.definition_insn_id_to_subst_name[def_insn_id] + return self.definition_stmt_id_to_subst_name[def_stmt_id] except KeyError: subst_name = self.var_name_gen(self.lhs_name+"_subst") - self.definition_insn_id_to_subst_name[def_insn_id] = subst_name + self.definition_stmt_id_to_subst_name[def_stmt_id] = subst_name return subst_name def map_variable(self, expr, expn_state): @@ -255,16 +255,16 @@ class AssignmentToSubstChanger(RuleAwareIdentityMapper): expr, expn_state) def transform_access(self, index, expn_state): - my_insn_id = expn_state.insn_id + my_stmt_id = expn_state.stmt_id - if my_insn_id in self.definition_insn_ids: + if my_stmt_id in self.definition_stmt_ids: return None - my_def_id = self.usage_to_definition[my_insn_id] + my_def_id = self.usage_to_definition[my_stmt_id] if not self.within( expn_state.kernel, - expn_state.instruction, + expn_state.statement, expn_state.stack): self.saw_unmatched_usage_sites[my_def_id] = True return None @@ -314,31 +314,31 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, from loopy.kernel.creation import apply_single_writer_depencency_heuristic dep_kernel = apply_single_writer_depencency_heuristic(dep_kernel) - id_to_insn = dep_kernel.id_to_insn + id_to_stmt = dep_kernel.id_to_stmt - def get_relevant_definition_insn_id(usage_insn_id): - insn = id_to_insn[usage_insn_id] + def get_relevant_definition_stmt_id(usage_stmt_id): + stmt = id_to_stmt[usage_stmt_id] def_id = set() - for dep_id in insn.depends_on: - dep_insn = id_to_insn[dep_id] - if lhs_name in dep_insn.write_dependency_names(): - if lhs_name in dep_insn.read_dependency_names(): - raise LoopyError("instruction '%s' both reads *and* " + for dep_id in stmt.depends_on: + dep_stmt = id_to_stmt[dep_id] + if lhs_name in dep_stmt.write_dependency_names(): + if lhs_name in dep_stmt.read_dependency_names(): + raise LoopyError("statement '%s' both reads *and* " "writes '%s'--cannot transcribe to substitution " "rule" % (dep_id, lhs_name)) def_id.add(dep_id) else: - rec_result = get_relevant_definition_insn_id(dep_id) + rec_result = get_relevant_definition_stmt_id(dep_id) if rec_result is not None: def_id.add(rec_result) if len(def_id) > 1: raise LoopyError("more than one write to '%s' found in " "depdendencies of '%s'--definition cannot be resolved " - "(writer instructions ids: %s)" - % (lhs_name, usage_insn_id, ", ".join(def_id))) + "(writer statements ids: %s)" + % (lhs_name, usage_stmt_id, ", ".join(def_id))) if not def_id: return None @@ -349,26 +349,26 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, usage_to_definition = {} - for insn in dep_kernel.instructions: - if lhs_name not in insn.read_dependency_names(): + for stmt in dep_kernel.statements: + if lhs_name not in stmt.read_dependency_names(): continue - def_id = get_relevant_definition_insn_id(insn.id) + def_id = get_relevant_definition_stmt_id(stmt.id) if def_id is None: raise LoopyError("no write to '%s' found in dependency tree " "of '%s'--definition cannot be resolved" - % (lhs_name, insn.id)) + % (lhs_name, stmt.id)) - usage_to_definition[insn.id] = def_id + usage_to_definition[stmt.id] = def_id - definition_insn_ids = set() - for insn in kernel.instructions: - if lhs_name in insn.write_dependency_names(): - definition_insn_ids.add(insn.id) + definition_stmt_ids = set() + for stmt in kernel.statements: + if lhs_name in stmt.write_dependency_names(): + definition_stmt_ids.add(stmt.id) # }}} - if not definition_insn_ids: + if not definition_stmt_ids: raise LoopyError("no assignments to variable '%s' found" % lhs_name) @@ -378,7 +378,7 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) tts = AssignmentToSubstChanger(rule_mapping_context, - lhs_name, definition_insn_ids, + lhs_name, definition_stmt_ids, usage_to_definition, extra_arguments, within) kernel = rule_mapping_context.finish_kernel(tts.map_kernel(kernel)) @@ -388,27 +388,27 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, # {{{ create new substitution rules new_substs = kernel.substitutions.copy() - for def_id, subst_name in six.iteritems(tts.definition_insn_id_to_subst_name): - def_insn = kernel.id_to_insn[def_id] + for def_id, subst_name in six.iteritems(tts.definition_stmt_id_to_subst_name): + def_stmt = kernel.id_to_stmt[def_id] from loopy.kernel.data import Assignment - assert isinstance(def_insn, Assignment) + assert isinstance(def_stmt, Assignment) from pymbolic.primitives import Variable, Subscript - if isinstance(def_insn.assignee, Subscript): - indices = def_insn.assignee.index_tuple - elif isinstance(def_insn.assignee, Variable): + if isinstance(def_stmt.assignee, Subscript): + indices = def_stmt.assignee.index_tuple + elif isinstance(def_stmt.assignee, Variable): indices = () else: raise LoopyError( "Unrecognized LHS type: %s" - % type(def_insn.assignee).__name__) + % type(def_stmt.assignee).__name__) arguments = [] for i in indices: if not isinstance(i, Variable): - raise LoopyError("In defining instruction '%s': " + raise LoopyError("In defining statement '%s': " "asignee index '%s' is not a plain variable. " "Perhaps use loopy.affine_map_inames() " "to perform substitution." % (def_id, i)) @@ -418,7 +418,7 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, new_substs[subst_name] = SubstitutionRule( name=subst_name, arguments=tuple(arguments) + extra_arguments, - expression=def_insn.expression) + expression=def_stmt.expression) # }}} @@ -450,11 +450,11 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, # }}} import loopy as lp - kernel = lp.remove_instructions( + kernel = lp.remove_statements( kernel, set( - insn_id - for insn_id, still_used in six.iteritems( + stmt_id + for stmt_id, still_used in six.iteritems( tts.saw_unmatched_usage_sites) if not still_used)) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 409cbbc5e..b38ab7fa5 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -418,17 +418,17 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): type_inf_mapper = type_inf_mapper.copy() - for writer_insn_id in kernel.writer_map().get(var_name, []): - writer_insn = kernel.id_to_insn[writer_insn_id] - if not isinstance(writer_insn, lp.MultiAssignmentBase): + for writer_stmt_id in kernel.writer_map().get(var_name, []): + writer_stmt = kernel.id_to_stmt[writer_stmt_id] + if not isinstance(writer_stmt, lp.MultiAssignmentBase): continue - expr = subst_expander(writer_insn.expression) + expr = subst_expander(writer_stmt.expression) debug(" via expr %s", expr) - if isinstance(writer_insn, lp.Assignment): + if isinstance(writer_stmt, lp.Assignment): result = type_inf_mapper(expr, return_dtype_set=True) - elif isinstance(writer_insn, lp.CallInstruction): + elif isinstance(writer_stmt, lp.CallStatement): return_dtype_set = type_inf_mapper(expr, return_tuple=True, return_dtype_set=True) @@ -437,7 +437,7 @@ def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander): result_i = None found = False for assignee, comp_dtype_set in zip( - writer_insn.assignee_var_names(), return_dtype_set): + writer_stmt.assignee_var_names(), return_dtype_set): if assignee == var_name: found = True result_i = comp_dtype_set @@ -526,8 +526,8 @@ def infer_unknown_types(kernel, expect_completion=False): dep_graph = dict( (written_var, set( read_var - for insn_id in writer_map.get(written_var, []) - for read_var in kernel.id_to_insn[insn_id].read_dependency_names() + for stmt_id in writer_map.get(written_var, []) + for read_var in kernel.id_to_stmt[stmt_id].read_dependency_names() if read_var in names_for_type_inference)) for written_var in names_for_type_inference) diff --git a/proto-tests/test_fem_assembly.py b/proto-tests/test_fem_assembly.py index a2cba7c57..ba5c7ecaa 100644 --- a/proto-tests/test_fem_assembly.py +++ b/proto-tests/test_fem_assembly.py @@ -51,7 +51,7 @@ def test_laplacian_stiffness(ctx_factory): # This (mostly) reproduces Figure 3.1. knl = lp.tag_inames(knl, {"dx_axis": "unr"}) - return knl, ["K", "i", "j", "q", "ax_b_insn"] + return knl, ["K", "i", "j", "q", "ax_b_stmt"] def variant_pg4(knl): # This (mostly) reproduces the unlabeled code snippet on pg. 4. @@ -60,7 +60,7 @@ def test_laplacian_stiffness(ctx_factory): Ncloc = 16 knl = lp.split_iname(knl, "K", Ncloc, outer_iname="Ko", inner_iname="Kloc") - return knl, ["Ko", "Kloc", "i", "j", "q", "ax_b_insn"] + return knl, ["Ko", "Kloc", "i", "j", "q", "ax_b_stmt"] def variant_fig32(knl): # This (mostly) reproduces Figure 3.2. @@ -71,7 +71,7 @@ def test_laplacian_stiffness(ctx_factory): knl = lp.precompute(knl, "dPsi", np.float32, ["i", "q", "dx_axis"], default_tag=None) knl = lp.tag_inames(knl, {"dx_axis": "unr", "dxi": "unr"}) - return knl, ["Ko", "Kloc", "dPsi_q", "ij", "i", "j", "q", "ax_b_insn"] + return knl, ["Ko", "Kloc", "dPsi_q", "ij", "i", "j", "q", "ax_b_stmt"] def variant_fig33(knl): # This is meant to (mostly) reproduce Figure 3.3. @@ -97,7 +97,7 @@ def test_laplacian_stiffness(ctx_factory): outer_iname="Ko", inner_iname="Kloc", outer_tag="g.0") knl = lp.tag_inames(knl, {"i": "l.1", "j": "l.0"}) - return knl, ["K", "i", "j", "q", "ax_b_insn"] + return knl, ["K", "i", "j", "q", "ax_b_stmt"] def variant_simple_gpu_prefetch(knl): # This adds prefetching to the GPU variant above. @@ -116,7 +116,7 @@ def test_laplacian_stiffness(ctx_factory): knl = lp.add_prefetch(knl, "DPsi", [0, 1, 2]) knl = lp.add_prefetch(knl, "jacInv", [0, 1, 3]) knl = lp.add_prefetch(knl, "jacDet", [1]) - return knl, ["K", "i", "j", "q", "ax_b_insn"] + return knl, ["K", "i", "j", "q", "ax_b_stmt"] # Plug in variant name here # | diff --git a/stmt-compat-fixes.patch b/stmt-compat-fixes.patch index 7c7c9322e..7cd61941e 100644 --- a/stmt-compat-fixes.patch +++ b/stmt-compat-fixes.patch @@ -19,8 +19,8 @@ index 68fcca1..6d788df 100644 + overridden_get_grid_sizes_for_stmt_ids=None, + + # compat -+ instructions=None, -+ overridden_get_grid_sizes_for_insn_ids=None, ++ statements=None, ++ overridden_get_grid_sizes_for_stmt_ids=None, + ): """ :arg overridden_get_grid_sizes_for_stmt_ids: A callable. When kernels get @@ -29,22 +29,22 @@ index 68fcca1..6d788df 100644 from loopy.kernel.tools import SetOperationCacheManager cache_manager = SetOperationCacheManager() -+ if statements is not None and instructions is not None: -+ raise TypeError("may not specify both instructions and statements") -+ elif statements is None and instructions is None: ++ if statements is not None and statements is not None: ++ raise TypeError("may not specify both statements and statements") ++ elif statements is None and statements is None: + raise TypeError( -+ "must specify exactly one of instructions and statements") -+ elif instructions is not None: -+ statements = instructions ++ "must specify exactly one of statements and statements") ++ elif statements is not None: ++ statements = statements + + if (overridden_get_grid_sizes_for_stmt_ids is not None + and overridden_get_grid_sizes_for_stmt_ids is not None): + raise TypeError("may not specify both " + "overridden_get_grid_sizes_for_stmt_ids " -+ "and overridden_get_grid_sizes_for_insn_ids{") -+ elif overridden_get_grid_sizes_for_insn_ids is not None: ++ "and overridden_get_grid_sizes_for_stmt_ids{") ++ elif overridden_get_grid_sizes_for_stmt_ids is not None: + overridden_get_grid_sizes_for_stmt_ids = \ -+ overridden_get_grid_sizes_for_insn_ids ++ overridden_get_grid_sizes_for_stmt_ids + # {{{ process assumptions @@ -55,7 +55,7 @@ index 68fcca1..6d788df 100644 "rules", - "statements", + "Statements", -+ "instructions", ++ "statements", "Dependencies", "schedule", ]) @@ -64,7 +64,7 @@ index 68fcca1..6d788df 100644 lines.append(str(kernel.substitutions[rule_name])) - if "statements" in what: -+ if "Statements" in what or "instructions" in what: ++ if "Statements" in what or "statements" in what: lines.extend(sep) if show_labels: lines.append("STATEMENTS:") @@ -72,17 +72,17 @@ index 68fcca1..6d788df 100644 # }}} -+ # {{{ "instruction" compat goop ++ # {{{ "statement" compat goop + + @property -+ def id_to_insn(self): ++ def id_to_stmt(self): + return self.id_to_stmt + + @property -+ def instructions(self): ++ def statements(self): + return self.statements + -+ def get_instruction_id_generator(self, based_on="insn"): ++ def get_statement_id_generator(self, based_on="stmt"): + return self.get_statement_id_generator(based_on) + + # }}} diff --git a/test/test_diff.py b/test/test_diff.py index 95471f9b1..c4d752349 100644 --- a/test/test_diff.py +++ b/test/test_diff.py @@ -65,7 +65,7 @@ def test_diff(ctx_factory): dknl, diff_map = diff_kernel(knl, "z", "x") dknl = lp.remove_unused_arguments(dknl) - dknl = lp.add_inames_to_insn(dknl, "diff_i0", "writes:a_dx or writes:a") + dknl = lp.add_inames_to_stmt(dknl, "diff_i0", "writes:a_dx or writes:a") print(dknl) diff --git a/test/test_fortran.py b/test/test_fortran.py index 6e05aa6ad..4fe79c2bf 100644 --- a/test/test_fortran.py +++ b/test/test_fortran.py @@ -267,7 +267,7 @@ def test_tagged(ctx_factory): knl, = lp.parse_fortran(fortran_src) - assert sum(1 for insn in lp.find_instructions(knl, "tag:input")) == 2 + assert sum(1 for stmt in lp.find_statements(knl, "tag:input")) == 2 @pytest.mark.parametrize("buffer_inames", [ diff --git a/test/test_linalg.py b/test/test_linalg.py index 772d536d1..e7c689757 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -290,7 +290,7 @@ def test_rank_one(ctx_factory): knl = lp.add_prefetch(knl, "a") knl = lp.add_prefetch(knl, "b") knl = lp.prioritize_loops(knl, ["i", "j"]) - knl = lp.add_inames_to_insn(knl, "i", "writes:b_fetch") + knl = lp.add_inames_to_stmt(knl, "i", "writes:b_fetch") return knl def variant_2(knl): diff --git a/test/test_loopy.py b/test/test_loopy.py index 704fd391f..8fcba1e21 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -628,14 +628,14 @@ def test_vector_ilp_with_prefetch(ctx_factory): assert len(list(re.finditer("barrier", code))) == 1 -def test_c_instruction(ctx_factory): +def test_c_statement(ctx_factory): #logging.basicConfig(level=logging.DEBUG) ctx = ctx_factory() knl = lp.make_kernel( "{[i,j]: 0<=i,jt = 1 {id=insn1,nosync=insn1} - t = 2 {id=insn2,nosync=insn1:insn2} - t = 3 {id=insn3,nosync=insn1@local:insn2@global:insn3@any} - t = 4 {id=insn4,nosync_query=id:insn*@local} - t = 5 {id=insn5,nosync_query=id:insn1} + <>t = 1 {id=stmt1,nosync=stmt1} + t = 2 {id=stmt2,nosync=stmt1:stmt2} + t = 3 {id=stmt3,nosync=stmt1@local:stmt2@global:stmt3@any} + t = 4 {id=stmt4,nosync_query=id:stmt*@local} + t = 5 {id=stmt5,nosync_query=id:stmt1} """, options=lp.Options(allow_terminal_colors=False)) kernel_str = str(knl) print(kernel_str) - assert "id=insn1, no_sync_with=insn1@any" in kernel_str - assert "id=insn2, no_sync_with=insn1@any:insn2@any" in kernel_str - assert "id=insn3, no_sync_with=insn1@local:insn2@global:insn3@any" in kernel_str - assert "id=insn4, no_sync_with=insn1@local:insn2@local:insn3@local:insn5@local" in kernel_str # noqa - assert "id=insn5, no_sync_with=insn1@any" in kernel_str + assert "id=stmt1, no_sync_with=stmt1@any" in kernel_str + assert "id=stmt2, no_sync_with=stmt1@any:stmt2@any" in kernel_str + assert "id=stmt3, no_sync_with=stmt1@local:stmt2@global:stmt3@any" in kernel_str + assert "id=stmt4, no_sync_with=stmt1@local:stmt2@local:stmt3@local:stmt5@local" in kernel_str # noqa + assert "id=stmt5, no_sync_with=stmt1@any" in kernel_str def assert_barrier_between(knl, id1, id2, ignore_barriers_in_levels=()): - from loopy.schedule import (RunInstruction, Barrier, EnterLoop, LeaveLoop) + from loopy.schedule import (RunStatement, Barrier, EnterLoop, LeaveLoop) watch_for_barrier = False seen_barrier = False loop_level = 0 for sched_item in knl.schedule: - if isinstance(sched_item, RunInstruction): - if sched_item.insn_id == id1: + if isinstance(sched_item, RunStatement): + if sched_item.stmt_id == id1: watch_for_barrier = True - elif sched_item.insn_id == id2: + elif sched_item.stmt_id == id2: assert watch_for_barrier assert seen_barrier return @@ -2313,17 +2313,17 @@ def test_barrier_in_overridden_get_grid_size_expanded_kernel(): vecsize = 16 knl = lp.split_iname(knl, 'i', vecsize, inner_tag='l.0') - # artifically expand via overridden_get_grid_sizes_for_insn_ids + # artifically expand via overridden_get_grid_sizes_for_stmt_ids class GridOverride(object): def __init__(self, clean, vecsize=vecsize): self.clean = clean self.vecsize = vecsize - def __call__(self, insn_ids, ignore_auto=True): - gsize, _ = self.clean.get_grid_sizes_for_insn_ids(insn_ids, ignore_auto) + def __call__(self, stmt_ids, ignore_auto=True): + gsize, _ = self.clean.get_grid_sizes_for_stmt_ids(stmt_ids, ignore_auto) return gsize, (self.vecsize,) - knl = knl.copy(overridden_get_grid_sizes_for_insn_ids=GridOverride( + knl = knl.copy(overridden_get_grid_sizes_for_stmt_ids=GridOverride( knl.copy(), vecsize)) # make sure we can generate the code lp.generate_code_v2(knl) @@ -2384,7 +2384,7 @@ def test_global_barrier_order_finding(): assert lp.get_global_barrier_order(knl) == ("top", "yoink", "postloop") - for insn, barrier in ( + for stmt, barrier in ( ("nop", None), ("top", None), ("wr_z", "top"), @@ -2392,7 +2392,7 @@ def test_global_barrier_order_finding(): ("yoink", "top"), ("postloop", "yoink"), ("zzzv", "postloop")): - assert lp.find_most_recent_global_barrier(knl, insn) == barrier + assert lp.find_most_recent_global_barrier(knl, stmt) == barrier def test_global_barrier_error_if_unordered(): diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py index 0de08f5f6..5f0d03e72 100644 --- a/test/test_numa_diff.py +++ b/test/test_numa_diff.py @@ -59,8 +59,8 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa knl for knl in lp.parse_fortran(source, filename, auto_dependencies=False) if "KernelR" in knl.name or "KernelS" in knl.name ] - hsv_r = lp.tag_instructions(hsv_r, "rknl") - hsv_s = lp.tag_instructions(hsv_s, "sknl") + hsv_r = lp.tag_statements(hsv_r, "rknl") + hsv_s = lp.tag_statements(hsv_s, "sknl") hsv = lp.fuse_kernels([hsv_r, hsv_s], ["_r", "_s"]) #hsv = hsv_s @@ -92,8 +92,8 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa # turn the first reads into subst rules local_prep_var_names = set() - for insn in lp.find_instructions(hsv, "tag:local_prep"): - assignee, = insn.assignee_var_names() + for stmt in lp.find_statements(hsv, "tag:local_prep"): + assignee, = stmt.assignee_var_names() local_prep_var_names.add(assignee) hsv = lp.assignment_to_subst(hsv, assignee) @@ -101,8 +101,8 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa hsv = lp.assignment_to_subst(hsv, "JinvD_r") hsv = lp.assignment_to_subst(hsv, "JinvD_s") - r_fluxes = lp.find_instructions(hsv, "tag:compute_fluxes and tag:rknl") - s_fluxes = lp.find_instructions(hsv, "tag:compute_fluxes and tag:sknl") + r_fluxes = lp.find_statements(hsv, "tag:compute_fluxes and tag:rknl") + s_fluxes = lp.find_statements(hsv, "tag:compute_fluxes and tag:sknl") if ilp_multiple > 1: hsv = lp.split_iname(hsv, "k", 2, inner_tag="ilp") @@ -117,15 +117,15 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa flux_store_idx = 0 - for rflux_insn, sflux_insn in zip(r_fluxes, s_fluxes): - for knl_tag, insn, flux_inames, tmps, flux_precomp_inames in [ - ("rknl", rflux_insn, ("j", "n",), rtmps, ("jj", "ii",)), - ("sknl", sflux_insn, ("i", "n",), stmps, ("ii", "jj",)), + for rflux_stmt, sflux_stmt in zip(r_fluxes, s_fluxes): + for knl_tag, stmt, flux_inames, tmps, flux_precomp_inames in [ + ("rknl", rflux_stmt, ("j", "n",), rtmps, ("jj", "ii",)), + ("sknl", sflux_stmt, ("i", "n",), stmps, ("ii", "jj",)), ]: - flux_var, = insn.assignee_var_names() - print(insn) + flux_var, = stmt.assignee_var_names() + print(stmt) - reader, = lp.find_instructions(hsv, + reader, = lp.find_statements(hsv, "tag:{knl_tag} and reads:{flux_var}" .format(knl_tag=knl_tag, flux_var=flux_var)) diff --git a/test/test_reduction.py b/test/test_reduction.py index 555b8c0cc..86b917a42 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -415,7 +415,7 @@ def test_parallel_multi_output_reduction(ctx_factory): def test_reduction_with_conditional(): # Test whether realization of a reduction inherits predicates - # of the original instruction. Tested with the CTarget, because + # of the original statement. Tested with the CTarget, because # the PyOpenCL target will hoist the conditional into the host # code in this minimal example. knl = lp.make_kernel( diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py index 0571e4191..7056c25a3 100644 --- a/test/test_sem_reagan.py +++ b/test/test_sem_reagan.py @@ -94,7 +94,7 @@ def test_tim2d(ctx_factory): knl = lp.tag_inames(knl, dict(o="unr")) knl = lp.tag_inames(knl, dict(m="unr")) - knl = lp.set_instruction_priority(knl, "id:D_fetch", 5) + knl = lp.set_statement_priority(knl, "id:D_fetch", 5) print(knl) return knl diff --git a/test/test_transform.py b/test/test_transform.py index d17f6c707..5bd140e0d 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -210,8 +210,8 @@ def test_extract_subst(ctx_factory): from loopy.symbolic import parse - insn, = knl.instructions - assert insn.expression == parse("bsquare(23) + bsquare(25)") + stmt, = knl.statements + assert stmt.expression == parse("bsquare(23) + bsquare(25)") def test_join_inames(ctx_factory): @@ -405,14 +405,14 @@ def test_precompute_with_preexisting_inames_fail(): def test_add_nosync(): orig_knl = lp.make_kernel("{[i]: 0<=i<10}", """ - <>tmp[i] = 10 {id=insn1} - <>tmp2[i] = 10 {id=insn2} + <>tmp[i] = 10 {id=stmt1} + <>tmp2[i] = 10 {id=stmt2} - <>tmp3[2*i] = 0 {id=insn3} - <>tmp4 = 1 + tmp3[2*i] {id=insn4} + <>tmp3[2*i] = 0 {id=stmt3} + <>tmp4 = 1 + tmp3[2*i] {id=stmt4} - <>tmp5[i] = 0 {id=insn5,groups=g1} - tmp5[i] = 1 {id=insn6,conflicts=g1} + <>tmp5[i] = 0 {id=stmt5,groups=g1} + tmp5[i] = 1 {id=stmt6,conflicts=g1} """) orig_knl = lp.set_temporary_scope(orig_knl, "tmp3", "local") @@ -420,39 +420,39 @@ def test_add_nosync(): # No dependency present - don't add nosync knl = lp.add_nosync(orig_knl, "any", "writes:tmp", "writes:tmp2") - assert frozenset() == knl.id_to_insn["insn2"].no_sync_with + assert frozenset() == knl.id_to_stmt["stmt2"].no_sync_with # Dependency present knl = lp.add_nosync(orig_knl, "local", "writes:tmp3", "reads:tmp3") - assert frozenset() == knl.id_to_insn["insn3"].no_sync_with - assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with + assert frozenset() == knl.id_to_stmt["stmt3"].no_sync_with + assert frozenset([("stmt3", "local")]) == knl.id_to_stmt["stmt4"].no_sync_with # Bidirectional knl = lp.add_nosync( orig_knl, "local", "writes:tmp3", "reads:tmp3", bidirectional=True) - assert frozenset([("insn4", "local")]) == knl.id_to_insn["insn3"].no_sync_with - assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with + assert frozenset([("stmt4", "local")]) == knl.id_to_stmt["stmt3"].no_sync_with + assert frozenset([("stmt3", "local")]) == knl.id_to_stmt["stmt4"].no_sync_with # Groups - knl = lp.add_nosync(orig_knl, "local", "insn5", "insn6") - assert frozenset([("insn5", "local")]) == knl.id_to_insn["insn6"].no_sync_with + knl = lp.add_nosync(orig_knl, "local", "stmt5", "stmt6") + assert frozenset([("stmt5", "local")]) == knl.id_to_stmt["stmt6"].no_sync_with -def test_uniquify_instruction_ids(): +def test_uniquify_statement_ids(): i1 = lp.Assignment("b", 1, id=None) i2 = lp.Assignment("b", 1, id=None) i3 = lp.Assignment("b", 1, id=lp.UniqueName("b")) i4 = lp.Assignment("b", 1, id=lp.UniqueName("b")) - knl = lp.make_kernel("{[i]: i = 1}", []).copy(instructions=[i1, i2, i3, i4]) + knl = lp.make_kernel("{[i]: i = 1}", []).copy(statements=[i1, i2, i3, i4]) - from loopy.transform.instruction import uniquify_instruction_ids - knl = uniquify_instruction_ids(knl) + from loopy.transform.statement import uniquify_statement_ids + knl = uniquify_statement_ids(knl) - insn_ids = set(insn.id for insn in knl.instructions) + stmt_ids = set(stmt.id for stmt in knl.statements) - assert len(insn_ids) == 4 - assert all(isinstance(id, str) for id in insn_ids) + assert len(stmt_ids) == 4 + assert all(isinstance(id, str) for id in stmt_ids) if __name__ == "__main__": -- GitLab