diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fcee7997ab48e1ac9c51782aae6347c73e10a3bf..373083d53b04a006fb7087d4387497184fbcc1ea 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -68,6 +68,23 @@ Python 3 POCL MPI: reports: junit: test/pytest.xml +Python 3 POCL Examples: + script: + - export PY_EXE=python3 + - export PYOPENCL_TEST=portable + - export EXTRA_INSTALL="pybind11 numpy mako mpi4py pyvisfile pymetis" + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-py-project-and-run-examples.sh + - ". ./build-py-project-and-run-examples.sh" + tags: + - python3 + - pocl + - large-node + except: + - tags + artifacts: + reports: + junit: test/pytest.xml + Documentation: script: - EXTRA_INSTALL="pybind11 numpy" diff --git a/examples/advection/var-velocity.py b/examples/advection/var-velocity.py index 00e4f0c8496dced20a21e808e68986ecec1d5be2..bc720731a820fd063fda36657218315174f3c5f3 100644 --- a/examples/advection/var-velocity.py +++ b/examples/advection/var-velocity.py @@ -40,6 +40,9 @@ from grudge import sym, bind, DGDiscretizationWithBoundaries from pytools.obj_array import join_fields +FINAL_TIME = 5 + + def main(write_output=True, order=4): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) @@ -105,10 +108,9 @@ def main(write_output=True, order=4): def rhs(t, u): return bound_op(queue, t=t, u=u) - final_time = 50 dt = dt_factor * h/order**2 - nsteps = (final_time // dt) + 1 - dt = final_time/nsteps + 1e-15 + nsteps = (FINAL_TIME // dt) + 1 + dt = FINAL_TIME/nsteps + 1e-15 from grudge.shortcuts import set_up_rk4 dt_stepper = set_up_rk4("u", dt, u, rhs) @@ -118,14 +120,14 @@ def main(write_output=True, order=4): step = 0 - for event in dt_stepper.run(t_end=final_time): + for event in dt_stepper.run(t_end=FINAL_TIME): if isinstance(event, dt_stepper.StateComputed): step += 1 if step % 30 == 0: print(step) - vis.write_vtk_file("fld-%04d.vtu" % step, + vis.write_vtk_file("fld-var-velocity-%04d.vtu" % step, [("u", event.state_component)]) diff --git a/examples/advection/weak.py b/examples/advection/weak.py index cdadb49e79e85c08a611f856c54970bf3c417b12..bd9d61ca3e2bbfd416a271ffbbabca22f366e3fd 100644 --- a/examples/advection/weak.py +++ b/examples/advection/weak.py @@ -103,7 +103,7 @@ def main(write_output=True, order=4): step += 1 #print(step, event.t, norm(queue, u=event.state_component[0])) - vis.write_vtk_file("fld-%04d.vtu" % step, + vis.write_vtk_file("fld-weak-%04d.vtu" % step, [ ("u", event.state_component) ]) diff --git a/examples/conftest.py b/examples/conftest.py deleted file mode 100644 index 273b1c4a6bd099f96831d406678c910fc9371722..0000000000000000000000000000000000000000 --- a/examples/conftest.py +++ /dev/null @@ -1,7 +0,0 @@ -# This file tells py.test to scan for test_xxx() functions in -# any file below here, not just those named test_xxxx.py. - - -def pytest_collect_file(path, parent): - if "grudge/examples" in str(path.dirpath()) and path.ext == ".py": - return parent.Module(path, parent=parent) diff --git a/examples/dagrt-fusion.py b/examples/dagrt-fusion.py new file mode 100755 index 0000000000000000000000000000000000000000..a69f936cb3da95c0c6d251997a2c7e7d7e01aafa --- /dev/null +++ b/examples/dagrt-fusion.py @@ -0,0 +1,1212 @@ +#!/usr/bin/env python3 +"""Study of operator fusion (inlining) for time integration operators in Grudge. +""" + +from __future__ import division, print_function + +__copyright__ = """ +Copyright (C) 2015 Andreas Kloeckner +Copyright (C) 2019 Matt Wala +""" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import logging +import numpy as np +import six +import sys +import pyopencl as cl +import pyopencl.array # noqa +import pytest + +import dagrt.language as lang +import pymbolic.primitives as p +import grudge.symbolic.mappers as gmap +import grudge.symbolic.operators as op +from grudge.execution import ExecutionMapper +from grudge.function_registry import base_function_registry +from pymbolic.mapper import Mapper +from pymbolic.mapper.evaluator import EvaluationMapper \ + as PymbolicEvaluationMapper +from pytools import memoize + +from grudge import sym, bind, DGDiscretizationWithBoundaries +from leap.rk import LSRK4Method + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +logging.basicConfig(level=logging.INFO) + +logger = logging.getLogger(__name__) + + +PRINT_RESULTS_TO_STDOUT = True + + +# {{{ topological sort + +def topological_sort(stmts, root_deps): + id_to_stmt = {stmt.id: stmt for stmt in stmts} + + ordered_stmts = [] + satisfied = set() + + def satisfy_dep(name): + if name in satisfied: + return + + stmt = id_to_stmt[name] + for dep in sorted(stmt.depends_on): + satisfy_dep(dep) + ordered_stmts.append(stmt) + satisfied.add(name) + + for d in root_deps: + satisfy_dep(d) + + return ordered_stmts + +# }}} + + +# {{{ leap to grudge translation + +# Use evaluation, not identity mappers to propagate symbolic vectors to +# outermost level. + +class DagrtToGrudgeRewriter(PymbolicEvaluationMapper): + def __init__(self, context): + self.context = context + + def map_variable(self, expr): + return self.context[expr.name] + + def map_call(self, expr): + raise ValueError("function call not expected") + + +class GrudgeArgSubstitutor(gmap.SymbolicEvaluator): + def __init__(self, args): + super().__init__(context={}) + self.args = args + + def map_grudge_variable(self, expr): + if expr.name in self.args: + return self.args[expr.name] + return super().map_variable(expr) + + +def transcribe_phase(dag, field_var_name, field_components, phase_name, + sym_operator): + """Generate a Grudge operator for a Dagrt time integrator phase. + + Arguments: + + dag: The Dagrt code object for the time integrator + + field_var_name: The name of the simulation variable + + field_components: The number of components (fields) in the variable + + phase_name: The name of the phase to transcribe + + sym_operator: The Grudge symbolic expression to substitue for the + right-hand side evaluation in the Dagrt code + """ + sym_operator = gmap.OperatorBinder()(sym_operator) + phase = dag.phases[phase_name] + + ctx = { + "": sym.var("input_t", sym.DD_SCALAR), + "
": sym.var("input_dt", sym.DD_SCALAR), + f"{field_var_name}": sym.make_sym_array( + f"input_{field_var_name}", field_components), + f"

residual": sym.make_sym_array( + "input_residual", field_components), + } + + rhs_name = f"{field_var_name}" + output_vars = [v for v in ctx] + yielded_states = [] + + from dagrt.codegen.transform import isolate_function_calls_in_phase + ordered_stmts = topological_sort( + isolate_function_calls_in_phase( + phase, + dag.get_stmt_id_generator(), + dag.get_var_name_generator()).statements, + phase.depends_on) + + for stmt in ordered_stmts: + if stmt.condition is not True: + raise NotImplementedError( + "non-True condition (in statement '%s') not supported" + % stmt.id) + + if isinstance(stmt, lang.Nop): + pass + + elif isinstance(stmt, lang.Assign): + if not isinstance(stmt.lhs, p.Variable): + raise NotImplementedError("lhs of statement %s is not a variable: %s" + % (stmt.id, stmt.lhs)) + ctx[stmt.lhs.name] = sym.cse( + DagrtToGrudgeRewriter(ctx)(stmt.rhs), + ( + stmt.lhs.name + .replace("<", "") + .replace(">", ""))) + + elif isinstance(stmt, lang.AssignFunctionCall): + if stmt.function_id != rhs_name: + raise NotImplementedError( + "statement '%s' calls unsupported function '%s'" + % (stmt.id, stmt.function_id)) + + if stmt.parameters: + raise NotImplementedError( + "statement '%s' calls function '%s' with positional arguments" + % (stmt.id, stmt.function_id)) + + kwargs = {name: sym.cse(DagrtToGrudgeRewriter(ctx)(arg)) + for name, arg in stmt.kw_parameters.items()} + + if len(stmt.assignees) != 1: + raise NotImplementedError( + "statement '%s' calls function '%s' " + "with more than one LHS" + % (stmt.id, stmt.function_id)) + + assignee, = stmt.assignees + ctx[assignee] = GrudgeArgSubstitutor(kwargs)(sym_operator) + + elif isinstance(stmt, lang.YieldState): + d2g = DagrtToGrudgeRewriter(ctx) + yielded_states.append( + ( + stmt.time_id, + d2g(stmt.time), + stmt.component_id, + d2g(stmt.expression))) + + else: + raise NotImplementedError("statement %s is of unsupported type ''%s'" + % (stmt.id, type(stmt).__name__)) + + return output_vars, [ctx[ov] for ov in output_vars], yielded_states + +# }}} + + +# {{{ time integrator implementations + +class RK4TimeStepperBase(object): + + def __init__(self, queue, component_getter): + self.queue = queue + self.component_getter = component_getter + + def get_initial_context(self, fields, t_start, dt): + from pytools.obj_array import join_fields + + # Flatten fields. + flattened_fields = [] + for field in fields: + if isinstance(field, list): + flattened_fields.extend(field) + else: + flattened_fields.append(field) + flattened_fields = join_fields(*flattened_fields) + del fields + + return { + "input_t": t_start, + "input_dt": dt, + self.state_name: flattened_fields, + "input_residual": flattened_fields, + } + + def set_up_stepper(self, discr, field_var_name, sym_rhs, num_fields, + function_registry=base_function_registry, + exec_mapper_factory=ExecutionMapper): + dt_method = LSRK4Method(component_id=field_var_name) + dt_code = dt_method.generate() + self.field_var_name = field_var_name + self.state_name = f"input_{field_var_name}" + + # Transcribe the phase. + output_vars, results, yielded_states = transcribe_phase( + dt_code, field_var_name, num_fields, + "primary", sym_rhs) + + # Build the bound operator for the time integrator. + output_t = results[0] + output_dt = results[1] + output_states = results[2] + output_residuals = results[3] + + assert len(output_states) == num_fields + assert len(output_states) == len(output_residuals) + + from pytools.obj_array import join_fields + flattened_results = join_fields(output_t, output_dt, *output_states) + + self.bound_op = bind( + discr, flattened_results, + function_registry=function_registry, + exec_mapper_factory=exec_mapper_factory) + + def run(self, fields, t_start, dt, t_end, return_profile_data=False): + context = self.get_initial_context(fields, t_start, dt) + + t = t_start + + while t <= t_end: + if return_profile_data: + profile_data = dict() + else: + profile_data = None + + results = self.bound_op( + self.queue, + profile_data=profile_data, + **context) + + if return_profile_data: + results = results[0] + + t = results[0] + context["input_t"] = t + context["input_dt"] = results[1] + output_states = results[2:] + context[self.state_name] = output_states + + result = (t, self.component_getter(output_states)) + if return_profile_data: + result += (profile_data,) + + yield result + + +class RK4TimeStepper(RK4TimeStepperBase): + + def __init__(self, queue, discr, field_var_name, grudge_bound_op, + num_fields, component_getter, exec_mapper_factory=ExecutionMapper): + """Arguments: + + field_var_name: The name of the simulation variable + + grudge_bound_op: The BoundExpression for the right-hand side + + num_fields: The number of components in the simulation variable + + component_getter: A function, which, given an object array + representing the simulation variable, splits the array into + its components + + """ + super().__init__(queue, component_getter) + + # Construct sym_rhs to have the effect of replacing the RHS calls in the + # dagrt code with calls of the grudge operator. + from grudge.symbolic.primitives import FunctionSymbol, Variable + call = sym.cse( + FunctionSymbol("grudge_op")(*( + (Variable("t", dd=sym.DD_SCALAR),) + + tuple( + Variable(field_var_name, dd=sym.DD_VOLUME)[i] + for i in range(num_fields))))) + + from pytools.obj_array import join_fields + sym_rhs = join_fields(*(call[i] for i in range(num_fields))) + + self.queue = queue + self.grudge_bound_op = grudge_bound_op + + from grudge.function_registry import register_external_function + + freg = register_external_function( + base_function_registry, + "grudge_op", + implementation=self._bound_op, + dd=sym.DD_VOLUME) + + self.set_up_stepper( + discr, field_var_name, sym_rhs, num_fields, + freg, + exec_mapper_factory) + + self.component_getter = component_getter + + def _bound_op(self, queue, t, *args, profile_data=None): + from pytools.obj_array import join_fields + context = { + "t": t, + self.field_var_name: join_fields(*args)} + result = self.grudge_bound_op( + queue, profile_data=profile_data, **context) + if profile_data is not None: + result = result[0] + return result + + def get_initial_context(self, fields, t_start, dt): + context = super().get_initial_context(fields, t_start, dt) + context["grudge_op"] = self._bound_op + return context + + +class FusedRK4TimeStepper(RK4TimeStepperBase): + + def __init__(self, queue, discr, field_var_name, sym_rhs, num_fields, + component_getter, exec_mapper_factory=ExecutionMapper): + super().__init__(queue, component_getter) + self.set_up_stepper( + discr, field_var_name, sym_rhs, num_fields, + base_function_registry, + exec_mapper_factory) + +# }}} + + +# {{{ problem setup code + +def get_strong_wave_op_with_discr(cl_ctx, dims=2, order=4): + from meshmode.mesh.generation import generate_regular_rect_mesh + mesh = generate_regular_rect_mesh( + a=(-0.5,)*dims, + b=(0.5,)*dims, + n=(16,)*dims) + + logger.debug("%d elements", mesh.nelements) + + discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order) + + source_center = np.array([0.1, 0.22, 0.33])[:dims] + source_width = 0.05 + source_omega = 3 + + sym_x = sym.nodes(mesh.dim) + sym_source_center_dist = sym_x - source_center + sym_t = sym.ScalarVariable("t") + + from grudge.models.wave import StrongWaveOperator + from meshmode.mesh import BTAG_ALL, BTAG_NONE + op = StrongWaveOperator(-0.1, dims, + source_f=( + sym.sin(source_omega*sym_t) + * sym.exp( + -np.dot(sym_source_center_dist, sym_source_center_dist) + / source_width**2)), + dirichlet_tag=BTAG_NONE, + neumann_tag=BTAG_NONE, + radiation_tag=BTAG_ALL, + flux_type="upwind") + + op.check_bc_coverage(mesh) + + return (op, discr) + + +def get_strong_wave_component(state_component): + return (state_component[0], state_component[1:]) + +# }}} + + +# {{{ equivalence check between fused and non-fused versions + +def test_stepper_equivalence(ctx_factory, order=4): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + dims = 2 + + op, discr = get_strong_wave_op_with_discr(cl_ctx, dims=dims, order=order) + + if dims == 2: + dt = 0.04 + elif dims == 3: + dt = 0.02 + + from pytools.obj_array import join_fields + ic = join_fields(discr.zeros(queue), + [discr.zeros(queue) for i in range(discr.dim)]) + + bound_op = bind(discr, op.sym_operator()) + + stepper = RK4TimeStepper( + queue, discr, "w", bound_op, 1 + discr.dim, get_strong_wave_component) + + fused_stepper = FusedRK4TimeStepper( + queue, discr, "w", op.sym_operator(), 1 + discr.dim, + get_strong_wave_component) + + t_start = 0 + t_end = 0.5 + nsteps = int(np.ceil((t_end + 1e-9) / dt)) + print("dt=%g nsteps=%d" % (dt, nsteps)) + + step = 0 + + norm = bind(discr, sym.norm(2, sym.var("u_ref") - sym.var("u"))) + + fused_steps = fused_stepper.run(ic, t_start, dt, t_end) + + for t_ref, (u_ref, v_ref) in stepper.run(ic, t_start, dt, t_end): + step += 1 + logger.debug("step %d/%d", step, nsteps) + t, (u, v) = next(fused_steps) + assert t == t_ref, step + assert norm(queue, u=u, u_ref=u_ref) <= 1e-13, step + +# }}} + + +# {{{ execution mapper wrapper + +class ExecutionMapperWrapper(Mapper): + + def __init__(self, queue, context, bound_op): + self.inner_mapper = ExecutionMapper(queue, context, bound_op) + self.queue = queue + self.context = context + self.bound_op = bound_op + + def map_variable(self, expr): + # Needed, because bound op execution can ask for variable values. + return self.inner_mapper.map_variable(expr) + + def map_grudge_variable(self, expr): + # See map_variable() + return self.inner_mapper.map_grudge_variable(expr) + +# }}} + + +# {{{ mem op counter implementation + +class ExecutionMapperWithMemOpCounting(ExecutionMapperWrapper): + # This is a skeleton implementation that only has just enough functionality + # for the wave-min example to work. + + # {{{ expressions + + def map_profiled_call(self, expr, profile_data): + args = [self.inner_mapper.rec(p) for p in expr.parameters] + return self.inner_mapper.function_registry[expr.function.name]( + self.queue, *args, profile_data=profile_data) + + def map_profiled_essentially_elementwise_linear(self, op, field_expr, + profile_data): + result = getattr(self.inner_mapper, op.mapper_method)(op, field_expr) + + if profile_data is not None: + # We model the cost to load the input and write the output. In + # particular, we assume the elementwise matrices are negligible in + # size and thus ignorable. + + field = self.inner_mapper.rec(field_expr) + profile_data["bytes_read"] = ( + profile_data.get("bytes_read", 0) + field.nbytes) + profile_data["bytes_written"] = ( + profile_data.get("bytes_written", 0) + result.nbytes) + + if op.mapper_method == "map_interpolation": + profile_data["interp_bytes_read"] = ( + profile_data.get("interp_bytes_read", 0) + field.nbytes) + profile_data["interp_bytes_written"] = ( + profile_data.get("interp_bytes_written", 0) + result.nbytes) + + return result + + # }}} + + # {{{ instruction mappings + + def process_assignment_expr(self, expr, profile_data): + if isinstance(expr, p.Call): + assert expr.mapper_method == "map_call" + val = self.map_profiled_call(expr, profile_data) + + elif isinstance(expr, sym.OperatorBinding): + if isinstance( + expr.op, + ( + # TODO: Not comprehensive. + op.InterpolationOperator, + op.RefFaceMassOperator, + op.RefInverseMassOperator, + op.OppositeInteriorFaceSwap)): + val = self.map_profiled_essentially_elementwise_linear( + expr.op, expr.field, profile_data) + + else: + assert False, ("unknown operator: %s" % expr.op) + + else: + logger.debug("assignment not profiled: %s", expr) + val = self.inner_mapper.rec(expr) + + return val + + def map_insn_assign(self, insn, profile_data): + result = [] + for name, expr in zip(insn.names, insn.exprs): + result.append((name, self.process_assignment_expr(expr, profile_data))) + return result, [] + + def map_insn_loopy_kernel(self, insn, profile_data): + kwargs = {} + kdescr = insn.kernel_descriptor + for name, expr in six.iteritems(kdescr.input_mappings): + val = self.inner_mapper.rec(expr) + kwargs[name] = val + assert not isinstance(val, np.ndarray) + if profile_data is not None and isinstance(val, pyopencl.array.Array): + profile_data["bytes_read"] = ( + profile_data.get("bytes_read", 0) + val.nbytes) + profile_data["bytes_read_by_scalar_assignments"] = ( + profile_data.get("bytes_read_by_scalar_assignments", 0) + + val.nbytes) + + discr = self.inner_mapper.discrwb.discr_from_dd(kdescr.governing_dd) + for name in kdescr.scalar_args(): + v = kwargs[name] + if isinstance(v, (int, float)): + kwargs[name] = discr.real_dtype.type(v) + elif isinstance(v, complex): + kwargs[name] = discr.complex_dtype.type(v) + elif isinstance(v, np.number): + pass + else: + raise ValueError("unrecognized scalar type for variable '%s': %s" + % (name, type(v))) + + kwargs["grdg_n"] = discr.nnodes + evt, result_dict = kdescr.loopy_kernel(self.queue, **kwargs) + + for val in result_dict.values(): + assert not isinstance(val, np.ndarray) + if profile_data is not None and isinstance(val, pyopencl.array.Array): + profile_data["bytes_written"] = ( + profile_data.get("bytes_written", 0) + val.nbytes) + profile_data["bytes_written_by_scalar_assignments"] = ( + profile_data.get("bytes_written_by_scalar_assignments", 0) + + val.nbytes) + + return list(result_dict.items()), [] + + def map_insn_assign_to_discr_scoped(self, insn, profile_data=None): + assignments = [] + + for name, expr in zip(insn.names, insn.exprs): + logger.debug("assignment not profiled: %s <- %s", name, expr) + inner_mapper = self.inner_mapper + value = inner_mapper.rec(expr) + inner_mapper.discrwb._discr_scoped_subexpr_name_to_value[name] = value + assignments.append((name, value)) + + return assignments, [] + + def map_insn_assign_from_discr_scoped(self, insn, profile_data=None): + return [( + insn.name, + self.inner_mapper. + discrwb._discr_scoped_subexpr_name_to_value[insn.name])], [] + + def map_insn_rank_data_swap(self, insn, profile_data): + raise NotImplementedError("no profiling for instruction: %s" % insn) + + def map_insn_diff_batch_assign(self, insn, profile_data): + assignments, futures = self.inner_mapper.map_insn_diff_batch_assign(insn) + + if profile_data is not None: + # We model the cost to load the input and write the output. In + # particular, we assume the elementwise matrices are negligible in + # size and thus ignorable. + + field = self.inner_mapper.rec(insn.field) + profile_data["bytes_read"] = ( + profile_data.get("bytes_read", 0) + field.nbytes) + + for _, value in assignments: + profile_data["bytes_written"] = ( + profile_data.get("bytes_written", 0) + value.nbytes) + + return assignments, futures + + # }}} + +# }}} + + +# {{{ mem op counter check + +def test_assignment_memory_model(ctx_factory): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + _, discr = get_strong_wave_op_with_discr(cl_ctx, dims=2, order=3) + + # Assignment instruction + bound_op = bind( + discr, + sym.Variable("input0", sym.DD_VOLUME) + + sym.Variable("input1", sym.DD_VOLUME), + exec_mapper_factory=ExecutionMapperWithMemOpCounting) + + input0 = discr.zeros(queue) + input1 = discr.zeros(queue) + + result, profile_data = bound_op( + queue, + profile_data={}, + input0=input0, + input1=input1) + + assert profile_data["bytes_read"] == input0.nbytes + input1.nbytes + assert profile_data["bytes_written"] == result.nbytes + + +@pytest.mark.parametrize("use_fusion", (True, False)) +def test_stepper_mem_ops(ctx_factory, use_fusion): + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + + dims = 2 + + op, discr = get_strong_wave_op_with_discr(cl_ctx, dims=dims, order=3) + + t_start = 0 + dt = 0.04 + t_end = 0.2 + + from pytools.obj_array import join_fields + ic = join_fields(discr.zeros(queue), + [discr.zeros(queue) for i in range(discr.dim)]) + + if not use_fusion: + bound_op = bind( + discr, op.sym_operator(), + exec_mapper_factory=ExecutionMapperWithMemOpCounting) + + stepper = RK4TimeStepper( + queue, discr, "w", bound_op, 1 + discr.dim, + get_strong_wave_component, + exec_mapper_factory=ExecutionMapperWithMemOpCounting) + + else: + stepper = FusedRK4TimeStepper( + queue, discr, "w", op.sym_operator(), 1 + discr.dim, + get_strong_wave_component, + exec_mapper_factory=ExecutionMapperWithMemOpCounting) + + step = 0 + + nsteps = int(np.ceil((t_end + 1e-9) / dt)) + for (_, _, profile_data) in stepper.run( + ic, t_start, dt, t_end, return_profile_data=True): + step += 1 + logger.info("step %d/%d", step, nsteps) + + logger.info("using fusion? %s", use_fusion) + logger.info("bytes read: %d", profile_data["bytes_read"]) + logger.info("bytes written: %d", profile_data["bytes_written"]) + logger.info("bytes total: %d", + profile_data["bytes_read"] + profile_data["bytes_written"]) + +# }}} + + +# {{{ execution mapper with timing + +SECONDS_PER_NANOSECOND = 10**9 + + +class TimingFuture(object): + + def __init__(self, start_event, stop_event): + self.start_event = start_event + self.stop_event = stop_event + + def elapsed(self): + cl.wait_for_events([self.start_event, self.stop_event]) + return ( + self.stop_event.profile.end + - self.start_event.profile.end) / SECONDS_PER_NANOSECOND + + +from collections.abc import MutableSequence + + +class TimingFutureList(MutableSequence): + + def __init__(self, *args, **kwargs): + self._list = list(*args, **kwargs) + + def __len__(self): + return len(self._list) + + def __getitem__(self, idx): + return self._list[idx] + + def __setitem__(self, idx, val): + self._list[idx] = val + + def __delitem__(self, idx): + del self._list[idx] + + def insert(self, idx, val): + self._list.insert(idx, val) + + def elapsed(self): + return sum(future.elapsed() for future in self._list) + + +def time_insn(f): + time_field_name = "time_%s" % f.__name__ + + def wrapper(self, insn, profile_data): + if profile_data is None: + return f(self, insn, profile_data) + + start = cl.enqueue_marker(self.queue) + retval = f(self, insn, profile_data) + end = cl.enqueue_marker(self.queue) + profile_data\ + .setdefault(time_field_name, TimingFutureList())\ + .append(TimingFuture(start, end)) + + return retval + + return wrapper + + +class ExecutionMapperWithTiming(ExecutionMapperWrapper): + + def map_profiled_call(self, expr, profile_data): + args = [self.inner_mapper.rec(p) for p in expr.parameters] + return self.inner_mapper.function_registry[expr.function.name]( + self.queue, *args, profile_data=profile_data) + + def map_profiled_operator_binding(self, expr, profile_data): + if profile_data is None: + return self.inner_mapper.map_operator_binding(expr) + + start = cl.enqueue_marker(self.queue) + retval = self.inner_mapper.map_operator_binding(expr) + end = cl.enqueue_marker(self.queue) + time_field_name = "time_op_%s" % expr.op.mapper_method + profile_data\ + .setdefault(time_field_name, TimingFutureList())\ + .append(TimingFuture(start, end)) + + return retval + + def map_insn_assign_to_discr_scoped(self, insn, profile_data): + return self.inner_mapper.map_insn_assign_to_discr_scoped(insn, profile_data) + + def map_insn_assign_from_discr_scoped(self, insn, profile_data): + return self.\ + inner_mapper.map_insn_assign_from_discr_scoped(insn, profile_data) + + @time_insn + def map_insn_loopy_kernel(self, *args, **kwargs): + return self.inner_mapper.map_insn_loopy_kernel(*args, **kwargs) + + def map_insn_assign(self, insn, profile_data): + if len(insn.exprs) == 1: + if isinstance(insn.exprs[0], p.Call): + assert insn.exprs[0].mapper_method == "map_call" + val = self.map_profiled_call(insn.exprs[0], profile_data) + return [(insn.names[0], val)], [] + elif isinstance(insn.exprs[0], sym.OperatorBinding): + assert insn.exprs[0].mapper_method == "map_operator_binding" + val = self.map_profiled_operator_binding(insn.exprs[0], profile_data) + return [(insn.names[0], val)], [] + + return self.inner_mapper.map_insn_assign(insn, profile_data) + + @time_insn + def map_insn_diff_batch_assign(self, insn, profile_data): + return self.inner_mapper.map_insn_diff_batch_assign(insn, profile_data) + +# }}} + + +# {{{ timing check + +@pytest.mark.parametrize("use_fusion", (True, False)) +def test_stepper_timing(ctx_factory, use_fusion): + cl_ctx = ctx_factory() + queue = cl.CommandQueue( + cl_ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + + dims = 3 + + op, discr = get_strong_wave_op_with_discr(cl_ctx, dims=dims, order=3) + + t_start = 0 + dt = 0.04 + t_end = 0.1 + + from pytools.obj_array import join_fields + ic = join_fields(discr.zeros(queue), + [discr.zeros(queue) for i in range(discr.dim)]) + + if not use_fusion: + bound_op = bind( + discr, op.sym_operator(), + exec_mapper_factory=ExecutionMapperWithTiming) + + stepper = RK4TimeStepper( + queue, discr, "w", bound_op, 1 + discr.dim, + get_strong_wave_component, + exec_mapper_factory=ExecutionMapperWithTiming) + + else: + stepper = FusedRK4TimeStepper( + queue, discr, "w", op.sym_operator(), 1 + discr.dim, + get_strong_wave_component, + exec_mapper_factory=ExecutionMapperWithTiming) + + step = 0 + + import time + t = time.time() + nsteps = int(np.ceil((t_end + 1e-9) / dt)) + for (_, _, profile_data) in stepper.run( + ic, t_start, dt, t_end, return_profile_data=True): + step += 1 + tn = time.time() + logger.info("step %d/%d: %f", step, nsteps, tn - t) + t = tn + + logger.info("fusion? %s", use_fusion) + for key, value in profile_data.items(): + if isinstance(value, TimingFutureList): + print(key, value.elapsed()) + +# }}} + + +# {{{ paper outputs + +def get_example_stepper(queue, dims=2, order=3, use_fusion=True, + exec_mapper_factory=ExecutionMapper, + return_ic=False): + op, discr = get_strong_wave_op_with_discr(queue.context, dims=dims, order=3) + + if not use_fusion: + bound_op = bind( + discr, op.sym_operator(), + exec_mapper_factory=exec_mapper_factory) + + stepper = RK4TimeStepper( + queue, discr, "w", bound_op, 1 + discr.dim, + get_strong_wave_component, + exec_mapper_factory=exec_mapper_factory) + + else: + stepper = FusedRK4TimeStepper( + queue, discr, "w", op.sym_operator(), 1 + discr.dim, + get_strong_wave_component, + exec_mapper_factory=exec_mapper_factory) + + if return_ic: + from pytools.obj_array import join_fields + ic = join_fields(discr.zeros(queue), + [discr.zeros(queue) for i in range(discr.dim)]) + return stepper, ic + + return stepper + + +def latex_table(table_format, header, rows): + result = [] + _ = result.append + _(rf"\begin{{tabular}}{{{table_format}}}") + _(r"\toprule") + _(" & ".join(rf"\multicolumn{{1}}{{c}}{{{item}}}" for item in header) + r" \\") + _(r"\midrule") + for row in rows: + _(" & ".join(row) + r" \\") + _(r"\bottomrule") + _(r"\end{tabular}") + return "\n".join(result) + + +def ascii_table(table_format, header, rows): + from pytools import Table + table = Table() + table.add_row(header) + + for input_row in rows: + row = [] + for item in input_row: + if item.startswith(r"\num{"): + # Strip \num{...} formatting + row.append(item[5:-1]) + else: + row.append(item) + table.add_row(row) + + return str(table) + + +if PRINT_RESULTS_TO_STDOUT: + table = ascii_table +else: + table = latex_table + + +def problem_stats(order=3): + cl_ctx = cl.create_some_context() + + _, dg_discr_2d = get_strong_wave_op_with_discr(cl_ctx, dims=2, order=order) + print("Number of 2D elements:", dg_discr_2d.mesh.nelements) + vol_discr_2d = dg_discr_2d.discr_from_dd("vol") + dofs_2d = {group.nunit_nodes for group in vol_discr_2d.groups} + from pytools import one + print("Number of DOFs per 2D element:", one(dofs_2d)) + + _, dg_discr_3d = get_strong_wave_op_with_discr(cl_ctx, dims=3, order=order) + print("Number of 3D elements:", dg_discr_3d.mesh.nelements) + vol_discr_3d = dg_discr_3d.discr_from_dd("vol") + dofs_3d = {group.nunit_nodes for group in vol_discr_3d.groups} + from pytools import one + print("Number of DOFs per 3D element:", one(dofs_3d)) + + +def statement_counts_table(): + cl_ctx = cl.create_some_context() + queue = cl.CommandQueue(cl_ctx) + + fused_stepper = get_example_stepper(queue, use_fusion=True) + stepper = get_example_stepper(queue, use_fusion=False) + + if PRINT_RESULTS_TO_STDOUT: + print("==== Statement Counts ====") + outf = sys.stdout + else: + out_path = "statement-counts.tex" + outf = open(out_path, "w") + + print( + table( + "lr", + ("Operator", "Grudge Node Count"), + ( + ("Time integration: baseline", + r"\num{%d}" % len(stepper.bound_op.eval_code.instructions)), + ("Right-hand side: baseline", + r"\num{%d}" % len(stepper.grudge_bound_op.eval_code.instructions)), + ("Inlined operator", + r"\num{%d}" % len(fused_stepper.bound_op.eval_code.instructions)) + )), + file=outf) + + +@memoize(key=lambda queue, dims: dims) +def mem_ops_results(queue, dims): + fused_stepper = get_example_stepper( + queue, + dims=dims, + use_fusion=True, + exec_mapper_factory=ExecutionMapperWithMemOpCounting) + + stepper, ic = get_example_stepper( + queue, + dims=dims, + use_fusion=False, + exec_mapper_factory=ExecutionMapperWithMemOpCounting, + return_ic=True) + + t_start = 0 + dt = 0.02 + t_end = 0.02 + + result = {} + + for (_, _, profile_data) in stepper.run( + ic, t_start, dt, t_end, return_profile_data=True): + pass + + result["nonfused_bytes_read"] = profile_data["bytes_read"] + result["nonfused_bytes_written"] = profile_data["bytes_written"] + result["nonfused_bytes_total"] = \ + result["nonfused_bytes_read"] \ + + result["nonfused_bytes_written"] + + result["nonfused_bytes_read_by_scalar_assignments"] = \ + profile_data["bytes_read_by_scalar_assignments"] + result["nonfused_bytes_written_by_scalar_assignments"] = \ + profile_data["bytes_written_by_scalar_assignments"] + result["nonfused_bytes_total_by_scalar_assignments"] = \ + result["nonfused_bytes_read_by_scalar_assignments"] \ + + result["nonfused_bytes_written_by_scalar_assignments"] + + for (_, _, profile_data) in fused_stepper.run( + ic, t_start, dt, t_end, return_profile_data=True): + pass + + result["fused_bytes_read"] = profile_data["bytes_read"] + result["fused_bytes_written"] = profile_data["bytes_written"] + result["fused_bytes_total"] = \ + result["fused_bytes_read"] \ + + result["fused_bytes_written"] + + result["fused_bytes_read_by_scalar_assignments"] = \ + profile_data["bytes_read_by_scalar_assignments"] + result["fused_bytes_written_by_scalar_assignments"] = \ + profile_data["bytes_written_by_scalar_assignments"] + result["fused_bytes_total_by_scalar_assignments"] = \ + result["fused_bytes_read_by_scalar_assignments"] \ + + result["fused_bytes_written_by_scalar_assignments"] + + return result + + +def scalar_assignment_percent_of_total_mem_ops_table(): + cl_ctx = cl.create_some_context() + queue = cl.CommandQueue(cl_ctx) + + result2d = mem_ops_results(queue, 2) + result3d = mem_ops_results(queue, 3) + + if PRINT_RESULTS_TO_STDOUT: + print("==== Scalar Assigment % of Total Mem Ops ====") + outf = sys.stdout + else: + out_path = "scalar-assignments-mem-op-percentage.tex" + outf = open(out_path, "w") + + print( + table( + "lr", + ("Operator", + r"\parbox{1in}{\centering \% Memory Ops. Due to Scalar Assignments}"), + ( + ("2D: Baseline", + "%.1f" % ( + 100 * result2d["nonfused_bytes_total_by_scalar_assignments"] + / result2d["nonfused_bytes_total"])), + ("2D: Inlined", + "%.1f" % ( + 100 * result2d["fused_bytes_total_by_scalar_assignments"] + / result2d["fused_bytes_total"])), + ("3D: Baseline", + "%.1f" % ( + 100 * result3d["nonfused_bytes_total_by_scalar_assignments"] + / result3d["nonfused_bytes_total"])), + ("3D: Inlined", + "%.1f" % ( + 100 * result3d["fused_bytes_total_by_scalar_assignments"] + / result3d["fused_bytes_total"])), + )), + file=outf) + + +def scalar_assignment_effect_of_fusion_mem_ops_table(): + cl_ctx = cl.create_some_context() + queue = cl.CommandQueue(cl_ctx) + + result2d = mem_ops_results(queue, 2) + result3d = mem_ops_results(queue, 3) + + if PRINT_RESULTS_TO_STDOUT: + print("==== Scalar Assigment Inlining Impact ====") + outf = sys.stdout + else: + out_path = "scalar-assignments-mem-op-percentage.tex" + outf = open(out_path, "w") + + print( + table( + "lrrrr", + ("Operator", + r"Bytes Read", + r"Bytes Written", + r"Total", + r"\% of Baseline"), + ( + ("2D: Baseline", + r"\num{%d}" % ( + result2d["nonfused_bytes_read_by_scalar_assignments"]), + r"\num{%d}" % ( + result2d["nonfused_bytes_written_by_scalar_assignments"]), + r"\num{%d}" % ( + result2d["nonfused_bytes_total_by_scalar_assignments"]), + "100"), + ("2D: Inlined", + r"\num{%d}" % ( + result2d["fused_bytes_read_by_scalar_assignments"]), + r"\num{%d}" % ( + result2d["fused_bytes_written_by_scalar_assignments"]), + r"\num{%d}" % ( + result2d["fused_bytes_total_by_scalar_assignments"]), + r"%.1f" % ( + 100 * result2d["fused_bytes_total_by_scalar_assignments"] + / result2d["nonfused_bytes_total_by_scalar_assignments"])), + ("3D: Baseline", + r"\num{%d}" % ( + result3d["nonfused_bytes_read_by_scalar_assignments"]), + r"\num{%d}" % ( + result3d["nonfused_bytes_written_by_scalar_assignments"]), + r"\num{%d}" % ( + result3d["nonfused_bytes_total_by_scalar_assignments"]), + "100"), + ("3D: Inlined", + r"\num{%d}" % ( + result3d["fused_bytes_read_by_scalar_assignments"]), + r"\num{%d}" % ( + result3d["fused_bytes_written_by_scalar_assignments"]), + r"\num{%d}" % ( + result3d["fused_bytes_total_by_scalar_assignments"]), + r"%.1f" % ( + 100 * result3d["fused_bytes_total_by_scalar_assignments"] + / result3d["nonfused_bytes_total_by_scalar_assignments"])), + )), + file=outf) + +# }}} + + +def main(): + if 1: + # Run tests. + from py.test import main + result = main([__file__]) + assert result == 0 + + # Run examples. + problem_stats() + statement_counts_table() + scalar_assignment_percent_of_total_mem_ops_table() + scalar_assignment_effect_of_fusion_mem_ops_table() + + +if __name__ == "__main__": + main() diff --git a/examples/geometry.py b/examples/geometry.py index e10254bfb9b10323b61c150dd5f9105a2525df79..6e146f21dd31545b392d8006d45d94bed7a9db02 100644 --- a/examples/geometry.py +++ b/examples/geometry.py @@ -26,7 +26,7 @@ THE SOFTWARE. import numpy as np # noqa import pyopencl as cl -from grudge import sym, bind, Discretization, shortcuts +from grudge import sym, bind, DGDiscretizationWithBoundaries, shortcuts def main(write_output=True): @@ -36,14 +36,14 @@ def main(write_output=True): from meshmode.mesh.generation import generate_warped_rect_mesh mesh = generate_warped_rect_mesh(dim=2, order=4, n=6) - discr = Discretization(cl_ctx, mesh, order=4) + discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=4) sym_op = sym.normal(sym.BTAG_ALL, mesh.dim) #sym_op = sym.nodes(mesh.dim, where=sym.BTAG_ALL) print(sym.pretty(sym_op)) op = bind(discr, sym_op) print() - print(op.code) + print(op.eval_code) vec = op(queue) diff --git a/examples/maxwell/cavities.py b/examples/maxwell/cavities.py index 85a7c3dec0cef99b4a72fe9c23439c4212c0c32d..a58f27399ccfb6e32a16d6cf594ed2a3c78076b2 100644 --- a/examples/maxwell/cavities.py +++ b/examples/maxwell/cavities.py @@ -28,11 +28,14 @@ THE SOFTWARE. import numpy as np import pyopencl as cl from grudge.shortcuts import set_up_rk4 -from grudge import sym, bind, Discretization +from grudge import sym, bind, DGDiscretizationWithBoundaries from grudge.models.em import get_rectangular_cavity_mode +STEPS = 60 + + def main(dims, write_output=True, order=4): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) @@ -43,7 +46,7 @@ def main(dims, write_output=True, order=4): b=(1.0,)*dims, n=(5,)*dims) - discr = Discretization(cl_ctx, mesh, order=order) + discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order) if 0: epsilon0 = 8.8541878176e-12 # C**2 / (N m**2) @@ -84,7 +87,7 @@ def main(dims, write_output=True, order=4): dt_stepper = set_up_rk4("w", dt, fields, rhs) - final_t = dt * 600 + final_t = dt * STEPS nsteps = int(final_t/dt) print("dt=%g nsteps=%d" % (dt, nsteps)) @@ -102,7 +105,7 @@ def main(dims, write_output=True, order=4): e, h = op.split_eh(fields) if 1: - vis.write_vtk_file("fld-%04d.vtu" % step, + vis.write_vtk_file("fld-cavities-%04d.vtu" % step, [ ("e", e), ("h", h), @@ -119,7 +122,7 @@ def main(dims, write_output=True, order=4): time()-t_last_step) if step % 10 == 0: e, h = op.split_eh(event.state_component) - vis.write_vtk_file("fld-%04d.vtu" % step, + vis.write_vtk_file("fld-cavities-%04d.vtu" % step, [ ("e", e), ("h", h), diff --git a/examples/wave/var-propagation-speed.py b/examples/wave/var-propagation-speed.py index f161d0346c1349e508b5ed5a32737e1cccf61312..a7a634f7a8b79f3006525059e70cc4379581127c 100644 --- a/examples/wave/var-propagation-speed.py +++ b/examples/wave/var-propagation-speed.py @@ -35,7 +35,7 @@ def main(write_output=True, order=4): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) - dims = 3 + dims = 2 from meshmode.mesh.generation import generate_regular_rect_mesh mesh = generate_regular_rect_mesh( a=(-0.5,)*dims, @@ -90,7 +90,7 @@ def main(write_output=True, order=4): dt_stepper = set_up_rk4("w", dt, fields, rhs) - final_t = 10 + final_t = 1 nsteps = int(final_t/dt) print("dt=%g nsteps=%d" % (dt, nsteps)) @@ -113,7 +113,7 @@ def main(write_output=True, order=4): print(step, event.t, norm(queue, u=event.state_component[0]), time()-t_last_step) if step % 10 == 0: - vis.write_vtk_file("fld-%04d.vtu" % step, + vis.write_vtk_file("fld-var-propogation-speed-%04d.vtu" % step, [ ("u", event.state_component[0]), ("v", event.state_component[1:]), diff --git a/examples/wave/wave-min-mpi.py b/examples/wave/wave-min-mpi.py index 04d0b8a371bef57013441932343085a22659782b..08705655cf874bf7701bb1c3f998c0d6ea7a6e06 100644 --- a/examples/wave/wave-min-mpi.py +++ b/examples/wave/wave-min-mpi.py @@ -134,7 +134,7 @@ def main(write_output=True, order=4): time()-t_last_step) if step % 10 == 0: vis.write_vtk_file( - "fld-%03d-%04d.vtu" % ( + "fld-wave-min-mpi-%03d-%04d.vtu" % ( rank, step, ), diff --git a/examples/wave/wave-min.py b/examples/wave/wave-min.py index d7ebffd33a7732c621d159622804fed064f365b3..d793e3a09716b883d91c90f1fddfb2f96bacf1a2 100644 --- a/examples/wave/wave-min.py +++ b/examples/wave/wave-min.py @@ -113,7 +113,7 @@ def main(write_output=True, order=4): print(step, event.t, norm(queue, u=event.state_component[0]), time()-t_last_step) if step % 10 == 0: - vis.write_vtk_file("fld-%04d.vtu" % step, + vis.write_vtk_file("fld-wave-min-%04d.vtu" % step, [ ("u", event.state_component[0]), ("v", event.state_component[1:]), diff --git a/grudge/execution.py b/grudge/execution.py index 9e48aba39cd6e3aa5481f0bf9f31f3340381e4f6..4fe592dda96a69d410d4ec0d2b2e1ac28614415d 100644 --- a/grudge/execution.py +++ b/grudge/execution.py @@ -287,7 +287,7 @@ class ExecutionMapper(mappers.Evaluator, # {{{ instruction execution functions - def map_insn_rank_data_swap(self, insn): + def map_insn_rank_data_swap(self, insn, profile_data=None): local_data = self.rec(insn.field).get(self.queue) comm = self.discrwb.mpi_communicator @@ -302,7 +302,7 @@ class ExecutionMapper(mappers.Evaluator, MPIRecvFuture(recv_req, insn.name, remote_data_host, self.queue), MPISendFuture(send_req)] - def map_insn_loopy_kernel(self, insn): + def map_insn_loopy_kernel(self, insn, profile_data=None): kwargs = {} kdescr = insn.kernel_descriptor for name, expr in six.iteritems(kdescr.input_mappings): @@ -325,11 +325,11 @@ class ExecutionMapper(mappers.Evaluator, evt, result_dict = kdescr.loopy_kernel(self.queue, **kwargs) return list(result_dict.items()), [] - def map_insn_assign(self, insn): + def map_insn_assign(self, insn, profile_data=None): return [(name, self.rec(expr)) for name, expr in zip(insn.names, insn.exprs)], [] - def map_insn_assign_to_discr_scoped(self, insn): + def map_insn_assign_to_discr_scoped(self, insn, profile_data=None): assignments = [] for name, expr in zip(insn.names, insn.exprs): value = self.rec(expr) @@ -338,11 +338,11 @@ class ExecutionMapper(mappers.Evaluator, return assignments, [] - def map_insn_assign_from_discr_scoped(self, insn): + def map_insn_assign_from_discr_scoped(self, insn, profile_data=None): return [(insn.name, self.discrwb._discr_scoped_subexpr_name_to_value[insn.name])], [] - def map_insn_diff_batch_assign(self, insn): + def map_insn_diff_batch_assign(self, insn, profile_data=None): field = self.rec(insn.field) repr_op = insn.operators[0] # FIXME: There's no real reason why differentiation is special, @@ -440,7 +440,7 @@ class MPISendFuture(object): class BoundOperator(object): def __init__(self, discrwb, discr_code, eval_code, debug_flags, - function_registry, allocator=None): + function_registry, exec_mapper_factory, allocator=None): self.discrwb = discrwb self.discr_code = discr_code self.eval_code = eval_code @@ -448,6 +448,7 @@ class BoundOperator(object): self.debug_flags = debug_flags self.function_registry = function_registry self.allocator = allocator + self.exec_mapper_factory = exec_mapper_factory def __str__(self): sep = 75 * "=" + "\n" @@ -481,7 +482,7 @@ class BoundOperator(object): # need to do discrwb-scope evaluation discrwb_eval_context = {} self.discr_code.execute( - ExecutionMapper(queue, discrwb_eval_context, self)) + self.exec_mapper_factory(queue, discrwb_eval_context, self)) # }}} @@ -490,7 +491,7 @@ class BoundOperator(object): new_context[name] = with_object_array_or_scalar(replace_queue, var) return self.eval_code.execute( - ExecutionMapper(queue, new_context, self), + self.exec_mapper_factory(queue, new_context, self), profile_data=profile_data, log_quantities=log_quantities) @@ -593,8 +594,9 @@ def process_sym_operator(discrwb, sym_operator, post_bind_mapper=None, def bind(discr, sym_operator, post_bind_mapper=lambda x: x, - debug_flags=set(), allocator=None, - function_registry=base_function_registry): + function_registry=base_function_registry, + exec_mapper_factory=ExecutionMapper, + debug_flags=frozenset(), allocator=None): # from grudge.symbolic.mappers import QuadratureUpsamplerRemover # sym_operator = QuadratureUpsamplerRemover(self.quad_min_degrees)( # sym_operator) @@ -621,7 +623,9 @@ def bind(discr, sym_operator, post_bind_mapper=lambda x: x, bound_op = BoundOperator(discr, discr_code, eval_code, function_registry=function_registry, - debug_flags=debug_flags, allocator=allocator) + exec_mapper_factory=exec_mapper_factory, + debug_flags=debug_flags, + allocator=allocator) if "dump_op_code" in debug_flags: from grudge.tools import open_unique_debug_file diff --git a/grudge/symbolic/compiler.py b/grudge/symbolic/compiler.py index 498e5172254ca7acb4a55fb25057e837d024fcfb..3aac28bb3c3a8f74d6db255863c811314d464d36 100644 --- a/grudge/symbolic/compiler.py +++ b/grudge/symbolic/compiler.py @@ -106,10 +106,13 @@ class LoopyKernelInstruction(Instruction): @memoize_method def get_dependencies(self): from pymbolic.primitives import Variable, Subscript - return set( - v - for v in self.kernel_descriptor.input_mappings.values() - if isinstance(v, (Variable, Subscript))) + result = set() + for v in self.kernel_descriptor.input_mappings.values(): + if isinstance(v, Variable): + result.add(v) + elif isinstance(v, Subscript): + result.add(v.aggregate) + return result def __str__(self): knl_str = "\n".join( @@ -523,7 +526,7 @@ class Code(object): log_quantities["rank_data_swap_timer"], log_quantities["rank_data_swap_counter"]) - assignments, new_futures = mapper_method(insn) + assignments, new_futures = mapper_method(insn, profile_data) for target, value in assignments: if pre_assign_check is not None: @@ -580,10 +583,6 @@ class Code(object): break if len(done_insns) < len(self.instructions): - print("Unreachable instructions:") - for insn in set(self.instructions) - done_insns: - print(" ", insn) - raise RuntimeError("not all instructions are reachable" "--did you forget to pass a value for a placeholder?") @@ -591,7 +590,7 @@ class Code(object): exec_sub_timer.stop().submit() from pytools.obj_array import with_object_array_or_scalar if profile_data is not None: - profile_data['total_time'] += time() - start_time + profile_data['total_time'] = time() - start_time return (with_object_array_or_scalar(exec_mapper, self.result), profile_data) return with_object_array_or_scalar(exec_mapper, self.result) diff --git a/grudge/symbolic/mappers/__init__.py b/grudge/symbolic/mappers/__init__.py index df53cadf86e69cf3ca97081cf9c3618570120450..0e9b8e02d67b6f2b86d217d8c230263281d49a19 100644 --- a/grudge/symbolic/mappers/__init__.py +++ b/grudge/symbolic/mappers/__init__.py @@ -1230,6 +1230,7 @@ class CollectorMixin(OperatorReducerMixin, LocalOpReducerMixin, FluxOpReducerMix def map_constant(self, expr, *args, **kwargs): return OrderedSet() + map_variable = map_constant map_grudge_variable = map_constant map_function_symbol = map_constant @@ -1275,6 +1276,33 @@ class FluxExchangeCollector(CSECachingMapperMixin, CollectorMixin, CombineMapper class Evaluator(pymbolic.mapper.evaluator.EvaluationMapper): pass + +class SymbolicEvaluator(pymbolic.mapper.evaluator.EvaluationMapper): + def map_operator_binding(self, expr, *args, **kwargs): + return expr.op(self.rec(expr.field, *args, **kwargs)) + + def map_node_coordinate_component(self, expr, *args, **kwargs): + return expr + + def map_call(self, expr, *args, **kwargs): + return type(expr)( + expr.function, + tuple(self.rec(child, *args, **kwargs) + for child in expr.parameters)) + + def map_call_with_kwargs(self, expr, *args, **kwargs): + return type(expr)( + expr.function, + tuple(self.rec(child, *args, **kwargs) + for child in expr.parameters), + dict( + (key, self.rec(val, *args, **kwargs)) + for key, val in six.iteritems(expr.kw_parameters)) + ) + + def map_common_subexpression(self, expr): + return type(expr)(self.rec(expr.child), expr.prefix, expr.scope) + # }}} diff --git a/grudge/symbolic/primitives.py b/grudge/symbolic/primitives.py index c59f1518b5034a9b69c3aca4a22c0c4bab92d7a6..63adf3dbf7bfa776f35a0691e63967488515fb00 100644 --- a/grudge/symbolic/primitives.py +++ b/grudge/symbolic/primitives.py @@ -76,6 +76,7 @@ Symbols .. autoclass:: ScalarVariable .. autoclass:: make_sym_array .. autoclass:: make_sym_mv +.. autoclass:: FunctionSymbol .. function :: sqrt(arg) .. function :: exp(arg) diff --git a/setup.py b/setup.py index 1f9ecbd04b99dbc64502f4a1bd4f2dc4a8376cdf..e975a5cbb9c9cf0f03f97099b2844d2eb51bbde3 100644 --- a/setup.py +++ b/setup.py @@ -53,8 +53,8 @@ def main(): "pymbolic>=2013.2", "loo.py>=2013.1beta", "cgen>=2013.1.2", - "leap>=2015.1", - "dagrt>=2015.1", + "leap>=2019.1", + "dagrt>=2019.1", "six>=1.8", ]) diff --git a/examples/advection/advection.py b/unported-examples/advection/advection.py similarity index 100% rename from examples/advection/advection.py rename to unported-examples/advection/advection.py diff --git a/examples/benchmark_grudge/benchmark_mpi.py b/unported-examples/benchmark_grudge/benchmark_mpi.py similarity index 100% rename from examples/benchmark_grudge/benchmark_mpi.py rename to unported-examples/benchmark_grudge/benchmark_mpi.py diff --git a/examples/benchmark_grudge/run_benchmark.sh b/unported-examples/benchmark_grudge/run_benchmark.sh similarity index 100% rename from examples/benchmark_grudge/run_benchmark.sh rename to unported-examples/benchmark_grudge/run_benchmark.sh diff --git a/examples/burgers/burgers.py b/unported-examples/burgers/burgers.py similarity index 100% rename from examples/burgers/burgers.py rename to unported-examples/burgers/burgers.py diff --git a/examples/gas_dynamics/box-in-box.py b/unported-examples/gas_dynamics/box-in-box.py similarity index 100% rename from examples/gas_dynamics/box-in-box.py rename to unported-examples/gas_dynamics/box-in-box.py diff --git a/examples/gas_dynamics/euler/sine-wave.py b/unported-examples/gas_dynamics/euler/sine-wave.py similarity index 100% rename from examples/gas_dynamics/euler/sine-wave.py rename to unported-examples/gas_dynamics/euler/sine-wave.py diff --git a/examples/gas_dynamics/euler/sod-2d.py b/unported-examples/gas_dynamics/euler/sod-2d.py similarity index 100% rename from examples/gas_dynamics/euler/sod-2d.py rename to unported-examples/gas_dynamics/euler/sod-2d.py diff --git a/examples/gas_dynamics/euler/vortex-adaptive-grid.py b/unported-examples/gas_dynamics/euler/vortex-adaptive-grid.py similarity index 100% rename from examples/gas_dynamics/euler/vortex-adaptive-grid.py rename to unported-examples/gas_dynamics/euler/vortex-adaptive-grid.py diff --git a/examples/gas_dynamics/euler/vortex-sources.py b/unported-examples/gas_dynamics/euler/vortex-sources.py similarity index 100% rename from examples/gas_dynamics/euler/vortex-sources.py rename to unported-examples/gas_dynamics/euler/vortex-sources.py diff --git a/examples/gas_dynamics/euler/vortex.py b/unported-examples/gas_dynamics/euler/vortex.py similarity index 100% rename from examples/gas_dynamics/euler/vortex.py rename to unported-examples/gas_dynamics/euler/vortex.py diff --git a/examples/gas_dynamics/gas_dynamics_initials.py b/unported-examples/gas_dynamics/gas_dynamics_initials.py similarity index 100% rename from examples/gas_dynamics/gas_dynamics_initials.py rename to unported-examples/gas_dynamics/gas_dynamics_initials.py diff --git a/examples/gas_dynamics/lbm-simple.py b/unported-examples/gas_dynamics/lbm-simple.py similarity index 100% rename from examples/gas_dynamics/lbm-simple.py rename to unported-examples/gas_dynamics/lbm-simple.py diff --git a/examples/gas_dynamics/naca.py b/unported-examples/gas_dynamics/naca.py similarity index 100% rename from examples/gas_dynamics/naca.py rename to unported-examples/gas_dynamics/naca.py diff --git a/examples/gas_dynamics/navierstokes/shearflow.py b/unported-examples/gas_dynamics/navierstokes/shearflow.py similarity index 100% rename from examples/gas_dynamics/navierstokes/shearflow.py rename to unported-examples/gas_dynamics/navierstokes/shearflow.py diff --git a/examples/gas_dynamics/square.py b/unported-examples/gas_dynamics/square.py similarity index 100% rename from examples/gas_dynamics/square.py rename to unported-examples/gas_dynamics/square.py diff --git a/examples/gas_dynamics/wing.py b/unported-examples/gas_dynamics/wing.py similarity index 100% rename from examples/gas_dynamics/wing.py rename to unported-examples/gas_dynamics/wing.py diff --git a/examples/heat/heat.py b/unported-examples/heat/heat.py similarity index 100% rename from examples/heat/heat.py rename to unported-examples/heat/heat.py diff --git a/examples/maxwell/.gitignore b/unported-examples/maxwell/.gitignore similarity index 100% rename from examples/maxwell/.gitignore rename to unported-examples/maxwell/.gitignore diff --git a/examples/maxwell/dipole.py b/unported-examples/maxwell/dipole.py similarity index 100% rename from examples/maxwell/dipole.py rename to unported-examples/maxwell/dipole.py diff --git a/examples/maxwell/generate-bessel-zeros.py b/unported-examples/maxwell/generate-bessel-zeros.py similarity index 100% rename from examples/maxwell/generate-bessel-zeros.py rename to unported-examples/maxwell/generate-bessel-zeros.py diff --git a/examples/maxwell/inhom-cavity.py b/unported-examples/maxwell/inhom-cavity.py similarity index 100% rename from examples/maxwell/inhom-cavity.py rename to unported-examples/maxwell/inhom-cavity.py diff --git a/examples/maxwell/inhomogeneous_waveguide.mac b/unported-examples/maxwell/inhomogeneous_waveguide.mac similarity index 100% rename from examples/maxwell/inhomogeneous_waveguide.mac rename to unported-examples/maxwell/inhomogeneous_waveguide.mac diff --git a/examples/maxwell/maxwell-2d.py b/unported-examples/maxwell/maxwell-2d.py similarity index 100% rename from examples/maxwell/maxwell-2d.py rename to unported-examples/maxwell/maxwell-2d.py diff --git a/examples/maxwell/maxwell-pml.py b/unported-examples/maxwell/maxwell-pml.py similarity index 100% rename from examples/maxwell/maxwell-pml.py rename to unported-examples/maxwell/maxwell-pml.py diff --git a/examples/maxwell/notes.tm b/unported-examples/maxwell/notes.tm similarity index 100% rename from examples/maxwell/notes.tm rename to unported-examples/maxwell/notes.tm diff --git a/examples/poisson/helmholtz.py b/unported-examples/poisson/helmholtz.py similarity index 100% rename from examples/poisson/helmholtz.py rename to unported-examples/poisson/helmholtz.py diff --git a/examples/poisson/poisson.py b/unported-examples/poisson/poisson.py similarity index 100% rename from examples/poisson/poisson.py rename to unported-examples/poisson/poisson.py diff --git a/examples/wave/wiggly.py b/unported-examples/wave/wiggly.py similarity index 100% rename from examples/wave/wiggly.py rename to unported-examples/wave/wiggly.py