From 0cb709f1fd50518468443742652f047744db286b Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Fri, 25 Sep 2020 02:23:17 -0500 Subject: [PATCH 1/7] Add caching to batched diff --- grudge/execution.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/grudge/execution.py b/grudge/execution.py index 54f93a63..83f98f81 100644 --- a/grudge/execution.py +++ b/grudge/execution.py @@ -528,14 +528,18 @@ class ExecutionMapper(mappers.Evaluator, if in_grp.nelements == 0: continue - matrices = repr_op.matrices(out_grp, in_grp) - - # FIXME: Should transfer matrices to device and cache them - matrices_ary = np.empty(( - noperators, out_grp.nunit_dofs, in_grp.nunit_dofs)) - for i, op in enumerate(insn.operators): - matrices_ary[i] = matrices[op.rst_axis] - matrices_ary_dev = self.array_context.from_numpy(matrices_ary) + # Cache operator + cache_key = "diff_batch", in_grp, out_grp, repr_op, field.dtype + try: + matrices_ary_dev = self.bound_op.operator_data_cache[cache_key] + except KeyError: + matrices = repr_op.matrices(out_grp, in_grp) + matrices_ary = np.empty(( + noperators, out_grp.nunit_dofs, in_grp.nunit_dofs)) + for i, op in enumerate(insn.operators): + matrices_ary[i] = matrices[op.rst_axis] + matrices_ary_dev = self.array_context.from_numpy(matrices_ary) + self.bound_op.operator_data_cache[cache_key] = matrices_ary_dev self.array_context.call_loopy( prg(noperators), -- GitLab From eaf829292b5dcb916bbb3e169584a097a72e057e Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Fri, 25 Sep 2020 15:05:52 -0500 Subject: [PATCH 2/7] try slicing --- grudge/execution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grudge/execution.py b/grudge/execution.py index 83f98f81..989d1ccc 100644 --- a/grudge/execution.py +++ b/grudge/execution.py @@ -537,7 +537,7 @@ class ExecutionMapper(mappers.Evaluator, matrices_ary = np.empty(( noperators, out_grp.nunit_dofs, in_grp.nunit_dofs)) for i, op in enumerate(insn.operators): - matrices_ary[i] = matrices[op.rst_axis] + matrices_ary[i,:,:] = matrices[op.rst_axis][:,:] matrices_ary_dev = self.array_context.from_numpy(matrices_ary) self.bound_op.operator_data_cache[cache_key] = matrices_ary_dev -- GitLab From 7819362faf778363e5b59c461b4a906adaf3bdfd Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Sat, 26 Sep 2020 17:49:44 -0500 Subject: [PATCH 3/7] Use all operators in cache key --- grudge/execution.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/grudge/execution.py b/grudge/execution.py index 989d1ccc..f8420e78 100644 --- a/grudge/execution.py +++ b/grudge/execution.py @@ -529,7 +529,7 @@ class ExecutionMapper(mappers.Evaluator, continue # Cache operator - cache_key = "diff_batch", in_grp, out_grp, repr_op, field.dtype + cache_key = "diff_batch", in_grp, out_grp, tuple(insn.operators), field.dtype try: matrices_ary_dev = self.bound_op.operator_data_cache[cache_key] except KeyError: @@ -537,10 +537,10 @@ class ExecutionMapper(mappers.Evaluator, matrices_ary = np.empty(( noperators, out_grp.nunit_dofs, in_grp.nunit_dofs)) for i, op in enumerate(insn.operators): - matrices_ary[i,:,:] = matrices[op.rst_axis][:,:] + matrices_ary[i] = matrices[op.rst_axis] matrices_ary_dev = self.array_context.from_numpy(matrices_ary) self.bound_op.operator_data_cache[cache_key] = matrices_ary_dev - + self.array_context.call_loopy( prg(noperators), diff_mat=matrices_ary_dev, -- GitLab From 90d98074a41cebdb0aa339cb828eebc823a36747 Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Sat, 26 Sep 2020 18:11:07 -0500 Subject: [PATCH 4/7] Appease Flake8 --- grudge/execution.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/grudge/execution.py b/grudge/execution.py index f8420e78..a28e0b26 100644 --- a/grudge/execution.py +++ b/grudge/execution.py @@ -529,7 +529,8 @@ class ExecutionMapper(mappers.Evaluator, continue # Cache operator - cache_key = "diff_batch", in_grp, out_grp, tuple(insn.operators), field.dtype + cache_key = "diff_batch", in_grp, out_grp, tuple(insn.operators),\ + field.dtype try: matrices_ary_dev = self.bound_op.operator_data_cache[cache_key] except KeyError: @@ -540,7 +541,7 @@ class ExecutionMapper(mappers.Evaluator, matrices_ary[i] = matrices[op.rst_axis] matrices_ary_dev = self.array_context.from_numpy(matrices_ary) self.bound_op.operator_data_cache[cache_key] = matrices_ary_dev - + self.array_context.call_loopy( prg(noperators), diff_mat=matrices_ary_dev, -- GitLab From c3aaac67e74dc22c051b0e7fb404cb3d0bcb9181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Kl=C3=B6ckner?= Date: Tue, 29 Sep 2020 17:45:07 -0500 Subject: [PATCH 5/7] Do not run tests in parallel on GIthub CI to try and avoid OOM situation --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 25e262d0..bcf5fc62 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,6 +34,10 @@ jobs: sudo apt-get update sudo apt-get install openmpi-bin libopenmpi-dev CONDA_ENVIRONMENT=.test-conda-env-py3.yml + + # https://github.com/inducer/grudge/pull/19 + CISUPPORT_PARALLEL_PYTEST=no + curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh . ./build-and-test-py-project-within-miniconda.sh -- GitLab From 5956e2ae3580acef3473b8880b4ce5a35009f36f Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 29 Sep 2020 18:05:28 -0500 Subject: [PATCH 6/7] Revert "Do not run tests in parallel on GIthub CI to try and avoid OOM situation" This reverts commit c3aaac67e74dc22c051b0e7fb404cb3d0bcb9181. --- .github/workflows/ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bcf5fc62..25e262d0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,10 +34,6 @@ jobs: sudo apt-get update sudo apt-get install openmpi-bin libopenmpi-dev CONDA_ENVIRONMENT=.test-conda-env-py3.yml - - # https://github.com/inducer/grudge/pull/19 - CISUPPORT_PARALLEL_PYTEST=no - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh . ./build-and-test-py-project-within-miniconda.sh -- GitLab From 636182032f5f12f859e4f092ad0947d0dd488be8 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 29 Sep 2020 18:17:59 -0500 Subject: [PATCH 7/7] Save time and memory on Github CI --- examples/dagrt-fusion.py | 112 +++++++++++++++++++++++---------------- test/test_grudge.py | 4 ++ 2 files changed, 69 insertions(+), 47 deletions(-) diff --git a/examples/dagrt-fusion.py b/examples/dagrt-fusion.py index 2a285ad5..85ddda7d 100755 --- a/examples/dagrt-fusion.py +++ b/examples/dagrt-fusion.py @@ -448,6 +448,7 @@ def _get_source_term(dims): def get_wave_op_with_discr(actx, dims=2, order=4): from meshmode.mesh.generation import generate_regular_rect_mesh + mesh = generate_regular_rect_mesh( a=(-0.5,)*dims, b=(0.5,)*dims, @@ -948,7 +949,10 @@ def test_stepper_timing(ctx_factory, use_fusion): properties=cl.command_queue_properties.PROFILING_ENABLE) actx = PyOpenCLArrayContext(queue) - dims = 3 + if os.environ.get("GITHUB_ACTIONS") == "true": + dims = 2 + else: + dims = 3 sym_operator, discr = get_wave_op_with_discr( actx, dims=dims, order=3) @@ -1188,27 +1192,26 @@ def scalar_assignment_percent_of_total_mem_ops_table(): actx = PyOpenCLArrayContext(queue) result2d = mem_ops_results(actx, 2) - result3d = mem_ops_results(actx, 3) + do3d = os.environ.get("GITHUB_ACTIONS") != "true" + if do3d: + result3d = mem_ops_results(actx, 3) with open_output_file("scalar-assignments-mem-op-percentage.tex") as outf: if not PAPER_OUTPUT: print("==== Scalar Assigment % of Total Mem Ops ====", file=outf) - print( - table( - "lr", - ("Operator", - r"\parbox{1in}{\centering \% Memory Ops. " - r"Due to Scalar Assignments}"), - ( - ("2D: Baseline", - "%.1f" % ( - 100 * result2d["nonfused_bytes_total_by_scalar_assignments"] - / result2d["nonfused_bytes_total"])), - ("2D: Inlined", - "%.1f" % ( - 100 * result2d["fused_bytes_total_by_scalar_assignments"] - / result2d["fused_bytes_total"])), + rows = ( + ("2D: Baseline", + "%.1f" % ( + 100 * result2d["nonfused_bytes_total_by_scalar_assignments"] + / result2d["nonfused_bytes_total"])), + ("2D: Inlined", + "%.1f" % ( + 100 * result2d["fused_bytes_total_by_scalar_assignments"] + / result2d["fused_bytes_total"])), + ) + if do3d: + rows = rows + ( ("3D: Baseline", "%.1f" % ( 100 * result3d["nonfused_bytes_total_by_scalar_assignments"] @@ -1217,7 +1220,15 @@ def scalar_assignment_percent_of_total_mem_ops_table(): "%.1f" % ( 100 * result3d["fused_bytes_total_by_scalar_assignments"] / result3d["fused_bytes_total"])), - )), + ) + print( + table( + "lr", + ("Operator", + r"\parbox{1in}{\centering \% Memory Ops. " + r"Due to Scalar Assignments}"), + rows + ), file=outf) logger.info("Wrote '%s'", outf.name) @@ -1228,39 +1239,37 @@ def scalar_assignment_effect_of_fusion_mem_ops_table(): queue = cl.CommandQueue(cl_ctx) result2d = mem_ops_results(queue, 2) - result3d = mem_ops_results(queue, 3) + do3d = os.environ.get("GITHUB_ACTIONS") != "true" + if do3d: + result3d = mem_ops_results(queue, 3) with open_output_file("scalar-assignments-fusion-impact.tex") as outf: if not PAPER_OUTPUT: print("==== Scalar Assigment Inlining Impact ====", file=outf) - print( - table( - "lrrrr", - ("Operator", - r"Bytes Read", - r"Bytes Written", - r"Total", - r"\% of Baseline"), - ( - ("2D: Baseline", - r"\num{%d}" % ( - result2d["nonfused_bytes_read_by_scalar_assignments"]), - r"\num{%d}" % ( - result2d["nonfused_bytes_written_by_scalar_assignments"]), - r"\num{%d}" % ( - result2d["nonfused_bytes_total_by_scalar_assignments"]), - "100"), - ("2D: Inlined", - r"\num{%d}" % ( - result2d["fused_bytes_read_by_scalar_assignments"]), - r"\num{%d}" % ( - result2d["fused_bytes_written_by_scalar_assignments"]), - r"\num{%d}" % ( - result2d["fused_bytes_total_by_scalar_assignments"]), - r"%.1f" % ( - 100 * result2d["fused_bytes_total_by_scalar_assignments"] - / result2d["nonfused_bytes_total_by_scalar_assignments"])), + rows = ( + ("2D: Baseline", + r"\num{%d}" % ( + result2d["nonfused_bytes_read_by_scalar_assignments"]), + r"\num{%d}" % ( + result2d["nonfused_bytes_written_by_scalar_assignments"]), + r"\num{%d}" % ( + result2d["nonfused_bytes_total_by_scalar_assignments"]), + "100"), + ("2D: Inlined", + r"\num{%d}" % ( + result2d["fused_bytes_read_by_scalar_assignments"]), + r"\num{%d}" % ( + result2d["fused_bytes_written_by_scalar_assignments"]), + r"\num{%d}" % ( + result2d["fused_bytes_total_by_scalar_assignments"]), + r"%.1f" % ( + 100 * result2d["fused_bytes_total_by_scalar_assignments"] + / result2d["nonfused_bytes_total_by_scalar_assignments"])), + ) + + if do3d: + rows = rows + ( ("3D: Baseline", r"\num{%d}" % ( result3d["nonfused_bytes_read_by_scalar_assignments"]), @@ -1279,7 +1288,16 @@ def scalar_assignment_effect_of_fusion_mem_ops_table(): r"%.1f" % ( 100 * result3d["fused_bytes_total_by_scalar_assignments"] / result3d["nonfused_bytes_total_by_scalar_assignments"])), - )), + ) + print( + table( + "lrrrr", + ("Operator", + r"Bytes Read", + r"Bytes Written", + r"Total", + r"\% of Baseline"), + rows), file=outf) logger.info("Wrote '%s'", outf.name) diff --git a/test/test_grudge.py b/test/test_grudge.py index 9fd6ca3d..755ae304 100644 --- a/test/test_grudge.py +++ b/test/test_grudge.py @@ -805,6 +805,10 @@ def test_convergence_advec(actx_factory, mesh_name, mesh_pars, op_type, flux_typ def test_convergence_maxwell(actx_factory, order): """Test whether 3D Maxwell's actually converges""" + import os + if os.environ.get("GITHUB_ACTIONS") == "true" and order >= 4: + pytest.skip("not enough memory on Github CI") + actx = actx_factory() from pytools.convergence import EOCRecorder -- GitLab