diff --git a/examples/advection/var-velocity.py b/examples/advection/var-velocity.py index b6beb7a2d493aa77a154b96b8e20f6461fb757cd..388c4ce576fd8e248432831f14efaadb248fb4a1 100644 --- a/examples/advection/var-velocity.py +++ b/examples/advection/var-velocity.py @@ -26,11 +26,11 @@ import os import numpy as np import pyopencl as cl -import pyopencl.array -import pyopencl.clmath + +from meshmode.array_context import PyOpenCLArrayContext from grudge import bind, sym -from pytools.obj_array import join_fields +from pytools.obj_array import flat_obj_array import logging logger = logging.getLogger(__name__) @@ -92,6 +92,7 @@ class Plotter: def main(ctx_factory, dim=2, order=4, product_tag=None, visualize=True): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) # {{{ parameters @@ -119,7 +120,7 @@ def main(ctx_factory, dim=2, order=4, product_tag=None, visualize=True): c = sym_x else: # solid body rotation - c = join_fields( + c = flat_obj_array( np.pi * (d/2 - sym_x[1]), np.pi * (sym_x[0] - d/2), 0)[:dim] @@ -145,7 +146,7 @@ def main(ctx_factory, dim=2, order=4, product_tag=None, visualize=True): quad_tag_to_group_factory = {} from grudge import DGDiscretizationWithBoundaries - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order, + discr = DGDiscretizationWithBoundaries(actx, mesh, order=order, quad_tag_to_group_factory=quad_tag_to_group_factory) # }}} @@ -176,10 +177,10 @@ def main(ctx_factory, dim=2, order=4, product_tag=None, visualize=True): flux_type=flux_type) bound_op = bind(discr, op.sym_operator()) - u = bind(discr, f_gaussian(sym.nodes(dim)))(queue, t=0) + u = bind(discr, f_gaussian(sym.nodes(dim)))(actx, t=0) def rhs(t, u): - return bound_op(queue, t=t, u=u) + return bound_op(t=t, u=u) # }}} @@ -197,7 +198,7 @@ def main(ctx_factory, dim=2, order=4, product_tag=None, visualize=True): continue if step % 10 == 0: - norm_u = norm(queue, u=event.state_component) + norm_u = norm(u=event.state_component) plot(event, "fld-var-velocity-%04d" % step) step += 1 diff --git a/examples/advection/weak.py b/examples/advection/weak.py index 14ed08e1a757d149c1c96ecb9d84eb60d15400b1..88703a0e930ff703d15e3e9dee6d7be11974002a 100644 --- a/examples/advection/weak.py +++ b/examples/advection/weak.py @@ -22,8 +22,8 @@ import numpy as np import numpy.linalg as la import pyopencl as cl -import pyopencl.array -import pyopencl.clmath + +from meshmode.array_context import PyOpenCLArrayContext from grudge import bind, sym @@ -88,6 +88,7 @@ class Plotter: def main(ctx_factory, dim=2, order=4, visualize=True): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) # {{{ parameters @@ -123,7 +124,7 @@ def main(ctx_factory, dim=2, order=4, visualize=True): order=order) from grudge import DGDiscretizationWithBoundaries - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=order) # }}} @@ -142,10 +143,10 @@ def main(ctx_factory, dim=2, order=4, visualize=True): flux_type=flux_type) bound_op = bind(discr, op.sym_operator()) - u = bind(discr, u_analytic(sym.nodes(dim)))(queue, t=0) + u = bind(discr, u_analytic(sym.nodes(dim)))(actx, t=0) def rhs(t, u): - return bound_op(queue, t=t, u=u) + return bound_op(t=t, u=u) # }}} @@ -165,7 +166,7 @@ def main(ctx_factory, dim=2, order=4, visualize=True): continue if step % 10 == 0: - norm_u = norm(queue, u=event.state_component) + norm_u = norm(u=event.state_component) plot(event, "fld-weak-%04d" % step) step += 1 diff --git a/examples/dagrt-fusion.py b/examples/dagrt-fusion.py index fef376c4f94a32725b9e2537d151901a0667f00f..a493059e642e24867c2b6658722cad837189d3ae 100755 --- a/examples/dagrt-fusion.py +++ b/examples/dagrt-fusion.py @@ -34,7 +34,6 @@ import contextlib import logging import numpy as np import os -import six import sys import pyopencl as cl import pyopencl.array # noqa @@ -42,6 +41,10 @@ import pytest import dagrt.language as lang import pymbolic.primitives as p + +from meshmode.dof_array import DOFArray +from meshmode.array_context import PyOpenCLArrayContext + import grudge.symbolic.mappers as gmap import grudge.symbolic.operators as sym_op from grudge.execution import ExecutionMapper @@ -50,7 +53,7 @@ from pymbolic.mapper import Mapper from pymbolic.mapper.evaluator import EvaluationMapper \ as PymbolicEvaluationMapper from pytools import memoize -from pytools.obj_array import join_fields +from pytools.obj_array import flat_obj_array from grudge import sym, bind, DGDiscretizationWithBoundaries from leap.rk import LSRK4MethodBuilder @@ -82,6 +85,15 @@ def open_output_file(filename): outfile.close() +def dof_array_nbytes(ary: np.ndarray): + if isinstance(ary, np.ndarray) and ary.dtype.char == "O": + return sum( + dof_array_nbytes(ary[idx]) + for idx in np.ndindex(ary.shape)) + else: + return ary.nbytes + + # {{{ topological sort def topological_sort(stmts, root_deps): @@ -241,8 +253,7 @@ def transcribe_phase(dag, field_var_name, field_components, phase_name, class RK4TimeStepperBase(object): - def __init__(self, queue, component_getter): - self.queue = queue + def __init__(self, component_getter): self.component_getter = component_getter def get_initial_context(self, fields, t_start, dt): @@ -254,7 +265,7 @@ class RK4TimeStepperBase(object): flattened_fields.extend(field) else: flattened_fields.append(field) - flattened_fields = join_fields(*flattened_fields) + flattened_fields = flat_obj_array(*flattened_fields) del fields return { @@ -286,7 +297,7 @@ class RK4TimeStepperBase(object): assert len(output_states) == num_fields assert len(output_states) == len(output_residuals) - flattened_results = join_fields(output_t, output_dt, *output_states) + flattened_results = flat_obj_array(output_t, output_dt, *output_states) self.bound_op = bind( discr, flattened_results, @@ -305,7 +316,6 @@ class RK4TimeStepperBase(object): profile_data = None results = self.bound_op( - self.queue, profile_data=profile_data, **context) @@ -327,7 +337,7 @@ class RK4TimeStepperBase(object): class RK4TimeStepper(RK4TimeStepperBase): - def __init__(self, queue, discr, field_var_name, grudge_bound_op, + def __init__(self, discr, field_var_name, grudge_bound_op, num_fields, component_getter, exec_mapper_factory=ExecutionMapper): """Arguments: @@ -342,7 +352,7 @@ class RK4TimeStepper(RK4TimeStepperBase): its components """ - super().__init__(queue, component_getter) + super().__init__(component_getter) # Construct sym_rhs to have the effect of replacing the RHS calls in the # dagrt code with calls of the grudge operator. @@ -353,9 +363,8 @@ class RK4TimeStepper(RK4TimeStepperBase): + tuple( Variable(field_var_name, dd=sym.DD_VOLUME)[i] for i in range(num_fields))))) - sym_rhs = join_fields(*(call[i] for i in range(num_fields))) + sym_rhs = flat_obj_array(*(call[i] for i in range(num_fields))) - self.queue = queue self.grudge_bound_op = grudge_bound_op from grudge.function_registry import register_external_function @@ -373,12 +382,12 @@ class RK4TimeStepper(RK4TimeStepperBase): self.component_getter = component_getter - def _bound_op(self, queue, t, *args, profile_data=None): + def _bound_op(self, array_context, t, *args, profile_data=None): context = { "t": t, - self.field_var_name: join_fields(*args)} + self.field_var_name: flat_obj_array(*args)} result = self.grudge_bound_op( - queue, profile_data=profile_data, **context) + array_context, profile_data=profile_data, **context) if profile_data is not None: result = result[0] return result @@ -391,9 +400,9 @@ class RK4TimeStepper(RK4TimeStepperBase): class FusedRK4TimeStepper(RK4TimeStepperBase): - def __init__(self, queue, discr, field_var_name, sym_rhs, num_fields, + def __init__(self, discr, field_var_name, sym_rhs, num_fields, component_getter, exec_mapper_factory=ExecutionMapper): - super().__init__(queue, component_getter) + super().__init__(component_getter) self.set_up_stepper( discr, field_var_name, sym_rhs, num_fields, base_function_registry, @@ -404,7 +413,7 @@ class FusedRK4TimeStepper(RK4TimeStepperBase): # {{{ problem setup code -def get_strong_wave_op_with_discr(cl_ctx, dims=2, order=4): +def get_strong_wave_op_with_discr(actx, dims=2, order=4): from meshmode.mesh.generation import generate_regular_rect_mesh mesh = generate_regular_rect_mesh( a=(-0.5,)*dims, @@ -413,7 +422,7 @@ def get_strong_wave_op_with_discr(cl_ctx, dims=2, order=4): logger.debug("%d elements", mesh.nelements) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=order) source_center = np.array([0.1, 0.22, 0.33])[:dims] source_width = 0.05 @@ -448,22 +457,22 @@ def dg_flux(c, tpair): dims = len(v) normal = sym.normal(tpair.dd, dims) - flux_weak = join_fields( + flux_weak = flat_obj_array( np.dot(v.avg, normal), u.avg * normal) - flux_weak -= (1 if c > 0 else -1)*join_fields( + flux_weak -= (1 if c > 0 else -1)*flat_obj_array( 0.5*(u.int-u.ext), 0.5*(normal * np.dot(normal, v.int-v.ext))) - flux_strong = join_fields( + flux_strong = flat_obj_array( np.dot(v.int, normal), u.int * normal) - flux_weak return sym.project(tpair.dd, "all_faces")(c*flux_strong) -def get_strong_wave_op_with_discr_direct(cl_ctx, dims=2, order=4): +def get_strong_wave_op_with_discr_direct(actx, dims=2, order=4): from meshmode.mesh.generation import generate_regular_rect_mesh mesh = generate_regular_rect_mesh( a=(-0.5,)*dims, @@ -472,7 +481,7 @@ def get_strong_wave_op_with_discr_direct(cl_ctx, dims=2, order=4): logger.debug("%d elements", mesh.nelements) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=order) source_center = np.array([0.1, 0.22, 0.33])[:dims] source_width = 0.05 @@ -502,13 +511,13 @@ def get_strong_wave_op_with_discr_direct(cl_ctx, dims=2, order=4): rad_u = sym.cse(sym.project("vol", BTAG_ALL)(u)) rad_v = sym.cse(sym.project("vol", BTAG_ALL)(v)) - rad_bc = sym.cse(join_fields( + rad_bc = sym.cse(flat_obj_array( 0.5*(rad_u - sign*np.dot(rad_normal, rad_v)), 0.5*rad_normal*(np.dot(rad_normal, rad_v) - sign*rad_u) ), "rad_bc") sym_operator = ( - - join_fields( + - flat_obj_array( -c*np.dot(sym.nabla(dims), v) - source_f, -c*(sym.nabla(dims)*u) ) @@ -532,29 +541,30 @@ def get_strong_wave_component(state_component): def test_stepper_equivalence(ctx_factory, order=4): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) dims = 2 sym_operator, _ = get_strong_wave_op_with_discr( - cl_ctx, dims=dims, order=order) + actx, dims=dims, order=order) sym_operator_direct, discr = get_strong_wave_op_with_discr_direct( - cl_ctx, dims=dims, order=order) + actx, dims=dims, order=order) if dims == 2: dt = 0.04 elif dims == 3: dt = 0.02 - ic = join_fields(discr.zeros(queue), - [discr.zeros(queue) for i in range(discr.dim)]) + ic = flat_obj_array(discr.zeros(actx), + [discr.zeros(actx) for i in range(discr.dim)]) bound_op = bind(discr, sym_operator) stepper = RK4TimeStepper( - queue, discr, "w", bound_op, 1 + discr.dim, get_strong_wave_component) + discr, "w", bound_op, 1 + discr.dim, get_strong_wave_component) fused_stepper = FusedRK4TimeStepper( - queue, discr, "w", sym_operator_direct, 1 + discr.dim, + discr, "w", sym_operator_direct, 1 + discr.dim, get_strong_wave_component) t_start = 0 @@ -573,7 +583,7 @@ def test_stepper_equivalence(ctx_factory, order=4): logger.debug("step %d/%d", step, nsteps) t, (u, v) = next(fused_steps) assert t == t_ref, step - assert norm(queue, u=u, u_ref=u_ref) <= 1e-13, step + assert norm(u=u, u_ref=u_ref) <= 1e-13, step # }}} @@ -582,9 +592,9 @@ def test_stepper_equivalence(ctx_factory, order=4): class ExecutionMapperWrapper(Mapper): - def __init__(self, queue, context, bound_op): - self.inner_mapper = ExecutionMapper(queue, context, bound_op) - self.queue = queue + def __init__(self, array_context, context, bound_op): + self.inner_mapper = ExecutionMapper(array_context, context, bound_op) + self.array_context = array_context self.context = context self.bound_op = bound_op @@ -592,6 +602,9 @@ class ExecutionMapperWrapper(Mapper): # Needed, because bound op execution can ask for variable values. return self.inner_mapper.map_variable(expr) + def map_node_coordinate_component(self, expr): + return self.inner_mapper.map_node_coordinate_component(expr) + def map_grudge_variable(self, expr): # See map_variable() return self.inner_mapper.map_grudge_variable(expr) @@ -610,7 +623,7 @@ class ExecutionMapperWithMemOpCounting(ExecutionMapperWrapper): def map_profiled_call(self, expr, profile_data): args = [self.inner_mapper.rec(p) for p in expr.parameters] return self.inner_mapper.function_registry[expr.function.name]( - self.queue, *args, profile_data=profile_data) + self.array_context, *args, profile_data=profile_data) def map_profiled_essentially_elementwise_linear(self, op, field_expr, profile_data): @@ -623,15 +636,19 @@ class ExecutionMapperWithMemOpCounting(ExecutionMapperWrapper): field = self.inner_mapper.rec(field_expr) profile_data["bytes_read"] = ( - profile_data.get("bytes_read", 0) + field.nbytes) + profile_data.get("bytes_read", 0) + + dof_array_nbytes(field)) profile_data["bytes_written"] = ( - profile_data.get("bytes_written", 0) + result.nbytes) + profile_data.get("bytes_written", 0) + + dof_array_nbytes(result)) if op.mapper_method == "map_projection": profile_data["interp_bytes_read"] = ( - profile_data.get("interp_bytes_read", 0) + field.nbytes) + profile_data.get("interp_bytes_read", 0) + + dof_array_nbytes(field)) profile_data["interp_bytes_written"] = ( - profile_data.get("interp_bytes_written", 0) + result.nbytes) + profile_data.get("interp_bytes_written", 0) + + dof_array_nbytes(result)) return result @@ -672,45 +689,71 @@ class ExecutionMapperWithMemOpCounting(ExecutionMapperWrapper): return result, [] def map_insn_loopy_kernel(self, insn, profile_data): - kwargs = {} kdescr = insn.kernel_descriptor - for name, expr in six.iteritems(kdescr.input_mappings): - val = self.inner_mapper.rec(expr) - kwargs[name] = val - assert not isinstance(val, np.ndarray) - if profile_data is not None and isinstance(val, pyopencl.array.Array): - profile_data["bytes_read"] = ( - profile_data.get("bytes_read", 0) + val.nbytes) - profile_data["bytes_read_by_scalar_assignments"] = ( - profile_data.get("bytes_read_by_scalar_assignments", 0) - + val.nbytes) - discr = self.inner_mapper.discrwb.discr_from_dd(kdescr.governing_dd) + + dof_array_kwargs = {} + other_kwargs = {} + + for name, expr in kdescr.input_mappings.items(): + v = self.inner_mapper.rec(expr) + if isinstance(v, DOFArray): + dof_array_kwargs[name] = v + + if profile_data is not None: + size = dof_array_nbytes(v) + profile_data["bytes_read"] = ( + profile_data.get("bytes_read", 0) + size) + profile_data["bytes_read_by_scalar_assignments"] = ( + profile_data.get("bytes_read_by_scalar_assignments", 0) + + size) + else: + assert not isinstance(v, np.ndarray) + other_kwargs[name] = v + for name in kdescr.scalar_args(): - v = kwargs[name] + v = other_kwargs[name] if isinstance(v, (int, float)): - kwargs[name] = discr.real_dtype.type(v) + other_kwargs[name] = discr.real_dtype.type(v) elif isinstance(v, complex): - kwargs[name] = discr.complex_dtype.type(v) + other_kwargs[name] = discr.complex_dtype.type(v) elif isinstance(v, np.number): pass else: raise ValueError("unrecognized scalar type for variable '%s': %s" % (name, type(v))) - kwargs["grdg_n"] = discr.nnodes - evt, result_dict = kdescr.loopy_kernel(self.queue, **kwargs) + result = {} + for grp in discr.groups: + kwargs = other_kwargs.copy() + kwargs["nelements"] = grp.nelements + kwargs["nunit_dofs"] = grp.nunit_dofs + + for name, ary in dof_array_kwargs.items(): + kwargs[name] = ary[grp.index] + + knl_result = self.inner_mapper.array_context.call_loopy( + kdescr.loopy_kernel, **kwargs) + + for name, val in knl_result.items(): + result.setdefault(name, []).append(val) - for val in result_dict.values(): - assert not isinstance(val, np.ndarray) - if profile_data is not None and isinstance(val, pyopencl.array.Array): + result = { + name: DOFArray.from_list( + self.inner_mapper.array_context, val) + for name, val in result.items()} + + for val in result.values(): + assert isinstance(val, DOFArray) + if profile_data is not None: + size = dof_array_nbytes(val) profile_data["bytes_written"] = ( - profile_data.get("bytes_written", 0) + val.nbytes) + profile_data.get("bytes_written", 0) + size) profile_data["bytes_written_by_scalar_assignments"] = ( profile_data.get("bytes_written_by_scalar_assignments", 0) - + val.nbytes) + + size) - return list(result_dict.items()), [] + return list(result.items()), [] def map_insn_assign_to_discr_scoped(self, insn, profile_data=None): assignments = [] @@ -743,11 +786,12 @@ class ExecutionMapperWithMemOpCounting(ExecutionMapperWrapper): field = self.inner_mapper.rec(insn.field) profile_data["bytes_read"] = ( - profile_data.get("bytes_read", 0) + field.nbytes) + profile_data.get("bytes_read", 0) + dof_array_nbytes(field)) for _, value in assignments: profile_data["bytes_written"] = ( - profile_data.get("bytes_written", 0) + value.nbytes) + profile_data.get("bytes_written", 0) + + dof_array_nbytes(value)) return assignments, futures @@ -761,8 +805,9 @@ class ExecutionMapperWithMemOpCounting(ExecutionMapperWrapper): def test_assignment_memory_model(ctx_factory): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) - _, discr = get_strong_wave_op_with_discr_direct(cl_ctx, dims=2, order=3) + _, discr = get_strong_wave_op_with_discr_direct(actx, dims=2, order=3) # Assignment instruction bound_op = bind( @@ -771,35 +816,36 @@ def test_assignment_memory_model(ctx_factory): + sym.Variable("input1", sym.DD_VOLUME), exec_mapper_factory=ExecutionMapperWithMemOpCounting) - input0 = discr.zeros(queue) - input1 = discr.zeros(queue) + input0 = discr.zeros(actx) + input1 = discr.zeros(actx) result, profile_data = bound_op( - queue, profile_data={}, input0=input0, input1=input1) - assert profile_data["bytes_read"] == input0.nbytes + input1.nbytes - assert profile_data["bytes_written"] == result.nbytes + assert profile_data["bytes_read"] == \ + dof_array_nbytes(input0) + dof_array_nbytes(input1) + assert profile_data["bytes_written"] == dof_array_nbytes(result) @pytest.mark.parametrize("use_fusion", (True, False)) def test_stepper_mem_ops(ctx_factory, use_fusion): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) dims = 2 sym_operator, discr = get_strong_wave_op_with_discr_direct( - cl_ctx, dims=dims, order=3) + actx, dims=dims, order=3) t_start = 0 dt = 0.04 t_end = 0.2 - ic = join_fields(discr.zeros(queue), - [discr.zeros(queue) for i in range(discr.dim)]) + ic = flat_obj_array(discr.zeros(actx), + [discr.zeros(actx) for i in range(discr.dim)]) if not use_fusion: bound_op = bind( @@ -807,13 +853,13 @@ def test_stepper_mem_ops(ctx_factory, use_fusion): exec_mapper_factory=ExecutionMapperWithMemOpCounting) stepper = RK4TimeStepper( - queue, discr, "w", bound_op, 1 + discr.dim, + discr, "w", bound_op, 1 + discr.dim, get_strong_wave_component, exec_mapper_factory=ExecutionMapperWithMemOpCounting) else: stepper = FusedRK4TimeStepper( - queue, discr, "w", sym_operator, 1 + discr.dim, + discr, "w", sym_operator, 1 + discr.dim, get_strong_wave_component, exec_mapper_factory=ExecutionMapperWithMemOpCounting) @@ -886,9 +932,9 @@ def time_insn(f): if profile_data is None: return f(self, insn, profile_data) - start = cl.enqueue_marker(self.queue) + start = cl.enqueue_marker(self.array_context.queue) retval = f(self, insn, profile_data) - end = cl.enqueue_marker(self.queue) + end = cl.enqueue_marker(self.array_context.queue) profile_data\ .setdefault(time_field_name, TimingFutureList())\ .append(TimingFuture(start, end)) @@ -903,15 +949,15 @@ class ExecutionMapperWithTiming(ExecutionMapperWrapper): def map_profiled_call(self, expr, profile_data): args = [self.inner_mapper.rec(p) for p in expr.parameters] return self.inner_mapper.function_registry[expr.function.name]( - self.queue, *args, profile_data=profile_data) + self.array_context, *args, profile_data=profile_data) def map_profiled_operator_binding(self, expr, profile_data): if profile_data is None: return self.inner_mapper.map_operator_binding(expr) - start = cl.enqueue_marker(self.queue) + start = cl.enqueue_marker(self.array_context.queue) retval = self.inner_mapper.map_operator_binding(expr) - end = cl.enqueue_marker(self.queue) + end = cl.enqueue_marker(self.array_context.queue) time_field_name = "time_op_%s" % expr.op.mapper_method profile_data\ .setdefault(time_field_name, TimingFutureList())\ @@ -958,18 +1004,19 @@ def test_stepper_timing(ctx_factory, use_fusion): queue = cl.CommandQueue( cl_ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) + actx = PyOpenCLArrayContext(queue) dims = 3 sym_operator, discr = get_strong_wave_op_with_discr_direct( - cl_ctx, dims=dims, order=3) + actx, dims=dims, order=3) t_start = 0 dt = 0.04 t_end = 0.1 - ic = join_fields(discr.zeros(queue), - [discr.zeros(queue) for i in range(discr.dim)]) + ic = flat_obj_array(discr.zeros(actx), + [discr.zeros(actx) for i in range(discr.dim)]) if not use_fusion: bound_op = bind( @@ -977,13 +1024,13 @@ def test_stepper_timing(ctx_factory, use_fusion): exec_mapper_factory=ExecutionMapperWithTiming) stepper = RK4TimeStepper( - queue, discr, "w", bound_op, 1 + discr.dim, + discr, "w", bound_op, 1 + discr.dim, get_strong_wave_component, exec_mapper_factory=ExecutionMapperWithTiming) else: stepper = FusedRK4TimeStepper( - queue, discr, "w", sym_operator, 1 + discr.dim, + discr, "w", sym_operator, 1 + discr.dim, get_strong_wave_component, exec_mapper_factory=ExecutionMapperWithTiming) @@ -1009,11 +1056,11 @@ def test_stepper_timing(ctx_factory, use_fusion): # {{{ paper outputs -def get_example_stepper(queue, dims=2, order=3, use_fusion=True, +def get_example_stepper(actx, dims=2, order=3, use_fusion=True, exec_mapper_factory=ExecutionMapper, return_ic=False): sym_operator, discr = get_strong_wave_op_with_discr_direct( - queue.context, dims=dims, order=3) + actx, dims=dims, order=3) if not use_fusion: bound_op = bind( @@ -1021,19 +1068,19 @@ def get_example_stepper(queue, dims=2, order=3, use_fusion=True, exec_mapper_factory=exec_mapper_factory) stepper = RK4TimeStepper( - queue, discr, "w", bound_op, 1 + discr.dim, + discr, "w", bound_op, 1 + discr.dim, get_strong_wave_component, exec_mapper_factory=exec_mapper_factory) else: stepper = FusedRK4TimeStepper( - queue, discr, "w", sym_operator, 1 + discr.dim, + discr, "w", sym_operator, 1 + discr.dim, get_strong_wave_component, exec_mapper_factory=exec_mapper_factory) if return_ic: - ic = join_fields(discr.zeros(queue), - [discr.zeros(queue) for i in range(discr.dim)]) + ic = flat_obj_array(discr.zeros(actx), + [discr.zeros(actx) for i in range(discr.dim)]) return stepper, ic return stepper @@ -1079,21 +1126,23 @@ else: def problem_stats(order=3): cl_ctx = cl.create_some_context() + queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) with open_output_file("grudge-problem-stats.txt") as outf: _, dg_discr_2d = get_strong_wave_op_with_discr_direct( - cl_ctx, dims=2, order=order) + actx, dims=2, order=order) print("Number of 2D elements:", dg_discr_2d.mesh.nelements, file=outf) vol_discr_2d = dg_discr_2d.discr_from_dd("vol") - dofs_2d = {group.nunit_nodes for group in vol_discr_2d.groups} + dofs_2d = {group.nunit_dofs for group in vol_discr_2d.groups} from pytools import one print("Number of DOFs per 2D element:", one(dofs_2d), file=outf) _, dg_discr_3d = get_strong_wave_op_with_discr_direct( - cl_ctx, dims=3, order=order) + actx, dims=3, order=order) print("Number of 3D elements:", dg_discr_3d.mesh.nelements, file=outf) vol_discr_3d = dg_discr_3d.discr_from_dd("vol") - dofs_3d = {group.nunit_nodes for group in vol_discr_3d.groups} + dofs_3d = {group.nunit_dofs for group in vol_discr_3d.groups} from pytools import one print("Number of DOFs per 3D element:", one(dofs_3d), file=outf) @@ -1103,9 +1152,10 @@ def problem_stats(order=3): def statement_counts_table(): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) - fused_stepper = get_example_stepper(queue, use_fusion=True) - stepper = get_example_stepper(queue, use_fusion=False) + fused_stepper = get_example_stepper(actx, use_fusion=True) + stepper = get_example_stepper(actx, use_fusion=False) with open_output_file("statement-counts.tex") as outf: if not PAPER_OUTPUT: @@ -1131,15 +1181,15 @@ def statement_counts_table(): @memoize(key=lambda queue, dims: dims) -def mem_ops_results(queue, dims): +def mem_ops_results(actx, dims): fused_stepper = get_example_stepper( - queue, + actx, dims=dims, use_fusion=True, exec_mapper_factory=ExecutionMapperWithMemOpCounting) stepper, ic = get_example_stepper( - queue, + actx, dims=dims, use_fusion=False, exec_mapper_factory=ExecutionMapperWithMemOpCounting, @@ -1193,9 +1243,10 @@ def mem_ops_results(queue, dims): def scalar_assignment_percent_of_total_mem_ops_table(): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) - result2d = mem_ops_results(queue, 2) - result3d = mem_ops_results(queue, 3) + result2d = mem_ops_results(actx, 2) + result3d = mem_ops_results(actx, 3) with open_output_file("scalar-assignments-mem-op-percentage.tex") as outf: if not PAPER_OUTPUT: diff --git a/examples/geometry.py b/examples/geometry.py index 6e146f21dd31545b392d8006d45d94bed7a9db02..df81298d4865b0dce384952633ee4fd00717c72b 100644 --- a/examples/geometry.py +++ b/examples/geometry.py @@ -28,15 +28,18 @@ import numpy as np # noqa import pyopencl as cl from grudge import sym, bind, DGDiscretizationWithBoundaries, shortcuts +from meshmode.array_context import PyOpenCLArrayContext + def main(write_output=True): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) from meshmode.mesh.generation import generate_warped_rect_mesh mesh = generate_warped_rect_mesh(dim=2, order=4, n=6) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=4) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=4) sym_op = sym.normal(sym.BTAG_ALL, mesh.dim) #sym_op = sym.nodes(mesh.dim, where=sym.BTAG_ALL) @@ -45,7 +48,7 @@ def main(write_output=True): print() print(op.eval_code) - vec = op(queue) + vec = op(actx) vis = shortcuts.make_visualizer(discr, 4) vis.write_vtk_file("geo.vtu", [ diff --git a/examples/maxwell/cavities.py b/examples/maxwell/cavities.py index a58f27399ccfb6e32a16d6cf594ed2a3c78076b2..eb2ee28dae6b2ed74c2760d2359d9542a9df2f59 100644 --- a/examples/maxwell/cavities.py +++ b/examples/maxwell/cavities.py @@ -27,6 +27,9 @@ THE SOFTWARE. import numpy as np import pyopencl as cl + +from meshmode.array_context import PyOpenCLArrayContext + from grudge.shortcuts import set_up_rk4 from grudge import sym, bind, DGDiscretizationWithBoundaries @@ -39,6 +42,7 @@ STEPS = 60 def main(dims, write_output=True, order=4): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) from meshmode.mesh.generation import generate_regular_rect_mesh mesh = generate_regular_rect_mesh( @@ -46,7 +50,7 @@ def main(dims, write_output=True, order=4): b=(1.0,)*dims, n=(5,)*dims) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=order) if 0: epsilon0 = 8.8541878176e-12 # C**2 / (N m**2) @@ -60,14 +64,12 @@ def main(dims, write_output=True, order=4): from grudge.models.em import MaxwellOperator op = MaxwellOperator(epsilon, mu, flux_type=0.5, dimensions=dims) - queue = cl.CommandQueue(discr.cl_context) - if dims == 3: sym_mode = get_rectangular_cavity_mode(1, (1, 2, 2)) - fields = bind(discr, sym_mode)(queue, t=0, epsilon=epsilon, mu=mu) + fields = bind(discr, sym_mode)(actx, t=0, epsilon=epsilon, mu=mu) else: sym_mode = get_rectangular_cavity_mode(1, (2, 3)) - fields = bind(discr, sym_mode)(queue, t=0) + fields = bind(discr, sym_mode)(actx, t=0) # FIXME #dt = op.estimate_rk4_timestep(discr, fields=fields) @@ -78,7 +80,7 @@ def main(dims, write_output=True, order=4): bound_op = bind(discr, op.sym_operator()) def rhs(t, w): - return bound_op(queue, t=t, w=w) + return bound_op(t=t, w=w) if mesh.dim == 2: dt = 0.004 @@ -117,8 +119,8 @@ def main(dims, write_output=True, order=4): step += 1 - print(step, event.t, norm(queue, u=e[0]), norm(queue, u=e[1]), - norm(queue, u=h[0]), norm(queue, u=h[1]), + print(step, event.t, norm(u=e[0]), norm(u=e[1]), + norm(u=h[0]), norm(u=h[1]), time()-t_last_step) if step % 10 == 0: e, h = op.split_eh(event.state_component) diff --git a/examples/wave/var-propagation-speed.py b/examples/wave/var-propagation-speed.py index a7a634f7a8b79f3006525059e70cc4379581127c..f392b420d4e06c6fb547c6461c0290726388cf29 100644 --- a/examples/wave/var-propagation-speed.py +++ b/examples/wave/var-propagation-speed.py @@ -30,10 +30,13 @@ import pyopencl as cl from grudge.shortcuts import set_up_rk4 from grudge import sym, bind, DGDiscretizationWithBoundaries +from meshmode.array_context import PyOpenCLArrayContext + def main(write_output=True, order=4): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) dims = 2 from meshmode.mesh.generation import generate_regular_rect_mesh @@ -42,7 +45,7 @@ def main(write_output=True, order=4): b=(0.5,)*dims, n=(20,)*dims) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=order) source_center = np.array([0.1, 0.22, 0.33])[:mesh.dim] source_width = 0.05 @@ -69,19 +72,18 @@ def main(write_output=True, order=4): radiation_tag=BTAG_ALL, flux_type="upwind") - queue = cl.CommandQueue(discr.cl_context) - from pytools.obj_array import join_fields - fields = join_fields(discr.zeros(queue), - [discr.zeros(queue) for i in range(discr.dim)]) + from pytools.obj_array import flat_obj_array + fields = flat_obj_array(discr.zeros(actx), + [discr.zeros(actx) for i in range(discr.dim)]) op.check_bc_coverage(mesh) - c_eval = bind(discr, c)(queue) + c_eval = bind(discr, c)(actx) bound_op = bind(discr, op.sym_operator()) def rhs(t, w): - return bound_op(queue, t=t, w=w) + return bound_op(t=t, w=w) if mesh.dim == 2: dt = 0.04 * 0.3 @@ -110,7 +112,7 @@ def main(write_output=True, order=4): step += 1 - print(step, event.t, norm(queue, u=event.state_component[0]), + print(step, event.t, norm(u=event.state_component[0]), time()-t_last_step) if step % 10 == 0: vis.write_vtk_file("fld-var-propogation-speed-%04d.vtu" % step, diff --git a/examples/wave/wave-eager-mpi.py b/examples/wave/wave-eager-mpi.py new file mode 100644 index 0000000000000000000000000000000000000000..4b408bb599c5002a32325f5befd9ab66ab02cee9 --- /dev/null +++ b/examples/wave/wave-eager-mpi.py @@ -0,0 +1,206 @@ +from __future__ import division, print_function + +__copyright__ = "Copyright (C) 2020 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import numpy.linalg as la # noqa +import pyopencl as cl + +from pytools.obj_array import flat_obj_array, make_obj_array + +from meshmode.array_context import PyOpenCLArrayContext +from meshmode.dof_array import thaw + +from meshmode.mesh import BTAG_ALL, BTAG_NONE # noqa + +from grudge.eager import ( + EagerDGDiscretization, interior_trace_pair, cross_rank_trace_pairs) +from grudge.shortcuts import make_visualizer +from grudge.symbolic.primitives import TracePair +from mpi4py import MPI + + +# {{{ wave equation bits + +def scalar(arg): + return make_obj_array([arg]) + + +def wave_flux(discr, c, w_tpair): + u = w_tpair[0] + v = w_tpair[1:] + + normal = thaw(u.int.array_context, discr.normal(w_tpair.dd)) + + flux_weak = flat_obj_array( + np.dot(v.avg, normal), + normal*scalar(u.avg), + ) + + # upwind + v_jump = np.dot(normal, v.int-v.ext) + flux_weak -= flat_obj_array( + 0.5*(u.int-u.ext), + 0.5*normal*scalar(v_jump), + ) + + return discr.project(w_tpair.dd, "all_faces", c*flux_weak) + + +def wave_operator(discr, c, w): + u = w[0] + v = w[1:] + + dir_u = discr.project("vol", BTAG_ALL, u) + dir_v = discr.project("vol", BTAG_ALL, v) + dir_bval = flat_obj_array(dir_u, dir_v) + dir_bc = flat_obj_array(-dir_u, dir_v) + + return ( + discr.inverse_mass( + flat_obj_array( + c*discr.weak_div(v), + c*discr.weak_grad(u) + ) + - # noqa: W504 + discr.face_mass( + wave_flux(discr, c=c, w_tpair=interior_trace_pair(discr, w)) + + wave_flux(discr, c=c, w_tpair=TracePair( + BTAG_ALL, dir_bval, dir_bc)) + + sum( + wave_flux(discr, c=c, w_tpair=tpair) + for tpair in cross_rank_trace_pairs(discr, w)) + ) + ) + ) + +# }}} + + +def rk4_step(y, t, h, f): + k1 = f(t, y) + k2 = f(t+h/2, y + h/2*k1) + k3 = f(t+h/2, y + h/2*k2) + k4 = f(t+h, y + h*k3) + return y + h/6*(k1 + 2*k2 + 2*k3 + k4) + + +def bump(discr, actx, t=0): + source_center = np.array([0.2, 0.35, 0.1])[:discr.dim] + source_width = 0.05 + source_omega = 3 + + nodes = thaw(actx, discr.nodes()) + center_dist = flat_obj_array([ + nodes[i] - source_center[i] + for i in range(discr.dim) + ]) + + return ( + np.cos(source_omega*t) + * actx.np.exp( + -np.dot(center_dist, center_dist) + / source_width**2)) + + +def main(): + cl_ctx = cl.create_some_context() + queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) + + comm = MPI.COMM_WORLD + num_parts = comm.Get_size() + + from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis + mesh_dist = MPIMeshDistributor(comm) + + dim = 2 + nel_1d = 16 + + if mesh_dist.is_mananger_rank(): + from meshmode.mesh.generation import generate_regular_rect_mesh + mesh = generate_regular_rect_mesh( + a=(-0.5,)*dim, + b=(0.5,)*dim, + n=(nel_1d,)*dim) + + print("%d elements" % mesh.nelements) + + part_per_element = get_partition_by_pymetis(mesh, num_parts) + + local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts) + + del mesh + + else: + local_mesh = mesh_dist.receive_mesh_part() + + order = 3 + + discr = EagerDGDiscretization(actx, local_mesh, order=order, + mpi_communicator=comm) + + if dim == 2: + # no deep meaning here, just a fudge factor + dt = 0.75/(nel_1d*order**2) + elif dim == 3: + # no deep meaning here, just a fudge factor + dt = 0.45/(nel_1d*order**2) + else: + raise ValueError("don't have a stable time step guesstimate") + + fields = flat_obj_array( + bump(discr, actx), + [discr.zeros(actx) for i in range(discr.dim)] + ) + + vis = make_visualizer(discr, order+3 if dim == 2 else order) + + def rhs(t, w): + return wave_operator(discr, c=1, w=w) + + rank = comm.Get_rank() + + t = 0 + t_final = 3 + istep = 0 + while t < t_final: + fields = rk4_step(fields, t, dt, rhs) + + if istep % 10 == 0: + print(istep, t, discr.norm(fields[0])) + vis.write_vtk_file("fld-wave-eager-mpi-%03d-%04d.vtu" % (rank, istep), + [ + ("u", fields[0]), + ("v", fields[1:]), + ]) + + t += dt + istep += 1 + + +if __name__ == "__main__": + main() + +# vim: foldmethod=marker diff --git a/examples/wave/wave-eager-var-velocity.py b/examples/wave/wave-eager-var-velocity.py new file mode 100644 index 0000000000000000000000000000000000000000..721211314dfd5931374f0347e90435c624baf129 --- /dev/null +++ b/examples/wave/wave-eager-var-velocity.py @@ -0,0 +1,211 @@ +from __future__ import division, print_function + +__copyright__ = "Copyright (C) 2020 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import numpy.linalg as la # noqa +import pyopencl as cl + +from pytools.obj_array import flat_obj_array, make_obj_array + +from meshmode.array_context import PyOpenCLArrayContext +from meshmode.dof_array import thaw + +from meshmode.mesh import BTAG_ALL, BTAG_NONE # noqa + +from grudge.eager import EagerDGDiscretization, interior_trace_pair +from grudge.shortcuts import make_visualizer +from grudge.symbolic.primitives import TracePair, QTAG_NONE, DOFDesc + + +# {{{ wave equation bits + +def scalar(arg): + return make_obj_array([arg]) + + +def wave_flux(discr, c, w_tpair): + dd = w_tpair.dd + dd_quad = dd.with_qtag("vel_prod") + + u = w_tpair[0] + v = w_tpair[1:] + + normal = thaw(u.int.array_context, discr.normal(dd)) + + flux_weak = flat_obj_array( + np.dot(v.avg, normal), + normal*scalar(u.avg), + ) + + # upwind + v_jump = np.dot(normal, v.int-v.ext) + flux_weak -= flat_obj_array( + 0.5*(u.int-u.ext), + 0.5*normal*scalar(v_jump), + ) + + # FIMXE this flux is only correct for continuous c + dd_allfaces_quad = dd_quad.with_dtag("all_faces") + c_quad = discr.project("vol", dd_quad, c) + flux_quad = discr.project(dd, dd_quad, flux_weak) + + return discr.project(dd_quad, dd_allfaces_quad, scalar(c_quad)*flux_quad) + + +def wave_operator(discr, c, w): + u = w[0] + v = w[1:] + + dir_u = discr.project("vol", BTAG_ALL, u) + dir_v = discr.project("vol", BTAG_ALL, v) + dir_bval = flat_obj_array(dir_u, dir_v) + dir_bc = flat_obj_array(-dir_u, dir_v) + + dd_quad = DOFDesc("vol", "vel_prod") + c_quad = discr.project("vol", dd_quad, c) + w_quad = discr.project("vol", dd_quad, w) + u_quad = w_quad[0] + v_quad = w_quad[1:] + + dd_allfaces_quad = DOFDesc("all_faces", "vel_prod") + + # FIXME Fix sign issue + return ( + discr.inverse_mass( + flat_obj_array( + discr.weak_div(dd_quad, scalar(c_quad)*v_quad), + discr.weak_grad(dd_quad, c_quad*u_quad) + ) + - # noqa: W504 + discr.face_mass( + dd_allfaces_quad, + wave_flux(discr, c=c, w_tpair=interior_trace_pair(discr, w)) + + wave_flux(discr, c=c, w_tpair=TracePair( + BTAG_ALL, dir_bval, dir_bc)) + )) + ) + +# }}} + + +def rk4_step(y, t, h, f): + k1 = f(t, y) + k2 = f(t+h/2, y + h/2*k1) + k3 = f(t+h/2, y + h/2*k2) + k4 = f(t+h, y + h*k3) + return y + h/6*(k1 + 2*k2 + 2*k3 + k4) + + +def bump(actx, discr, t=0, width=0.05, center=None): + if center is None: + center = np.array([0.2, 0.35, 0.1]) + + center = center[:discr.dim] + source_omega = 3 + + nodes = thaw(actx, discr.nodes()) + center_dist = flat_obj_array([ + nodes[i] - center[i] + for i in range(discr.dim) + ]) + + return ( + np.cos(source_omega*t) + * actx.np.exp( + -np.dot(center_dist, center_dist) + / width**2)) + + +def main(): + cl_ctx = cl.create_some_context() + queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) + + dim = 2 + nel_1d = 16 + from meshmode.mesh.generation import generate_regular_rect_mesh + mesh = generate_regular_rect_mesh( + a=(-0.5,)*dim, + b=(0.5,)*dim, + n=(nel_1d,)*dim) + + order = 3 + + if dim == 2: + # no deep meaning here, just a fudge factor + dt = 0.75/(nel_1d*order**2) + elif dim == 3: + # no deep meaning here, just a fudge factor + dt = 0.45/(nel_1d*order**2) + else: + raise ValueError("don't have a stable time step guesstimate") + + print("%d elements" % mesh.nelements) + + from meshmode.discretization.poly_element import \ + QuadratureSimplexGroupFactory, \ + PolynomialWarpAndBlendGroupFactory + discr = EagerDGDiscretization(actx, mesh, + quad_tag_to_group_factory={ + QTAG_NONE: PolynomialWarpAndBlendGroupFactory(order), + "vel_prod": QuadratureSimplexGroupFactory(3*order), + }) + + # bounded above by 1 + c = 0.2 + 0.8*bump(actx, discr, center=np.zeros(3), width=0.5) + + fields = flat_obj_array( + bump(actx, discr, ), + [discr.zeros(actx) for i in range(discr.dim)] + ) + + vis = make_visualizer(discr, order+3 if dim == 2 else order) + + def rhs(t, w): + return wave_operator(discr, c=c, w=w) + + t = 0 + t_final = 3 + istep = 0 + while t < t_final: + fields = rk4_step(fields, t, dt, rhs) + + if istep % 10 == 0: + print(istep, t, discr.norm(fields[0])) + vis.write_vtk_file("fld-wave-eager-var-velocity-%04d.vtu" % istep, + [ + ("c", c), + ("u", fields[0]), + ("v", fields[1:]), + ]) + + t += dt + istep += 1 + + +if __name__ == "__main__": + main() + +# vim: foldmethod=marker diff --git a/examples/wave/wave-eager.py b/examples/wave/wave-eager.py index f060c364b558273d7ab97d678f021ac6e30eabd6..7450f435798c249f97dd11e03cd59dbd1e95b542 100644 --- a/examples/wave/wave-eager.py +++ b/examples/wave/wave-eager.py @@ -26,47 +26,41 @@ THE SOFTWARE. import numpy as np import numpy.linalg as la # noqa import pyopencl as cl -import pyopencl.array as cla # noqa -import pyopencl.clmath as clmath -from pytools.obj_array import ( - join_fields, make_obj_array, - with_object_array_or_scalar) + +from pytools.obj_array import flat_obj_array, make_obj_array + +from meshmode.array_context import PyOpenCLArrayContext +from meshmode.dof_array import thaw + from meshmode.mesh import BTAG_ALL, BTAG_NONE # noqa -from grudge.eager import EagerDGDiscretization, with_queue -from grudge.symbolic.primitives import TracePair + +from grudge.eager import EagerDGDiscretization, interior_trace_pair from grudge.shortcuts import make_visualizer +from grudge.symbolic.primitives import TracePair -def interior_trace_pair(discr, vec): - i = discr.project("vol", "int_faces", vec) - e = with_object_array_or_scalar( - lambda el: discr.opposite_face_connection()(el.queue, el), - i) - return TracePair("int_faces", i, e) +# {{{ wave equation bits +def scalar(arg): + return make_obj_array([arg]) -# {{{ wave equation bits def wave_flux(discr, c, w_tpair): u = w_tpair[0] v = w_tpair[1:] - normal = with_queue(u.int.queue, discr.normal(w_tpair.dd)) - - def normal_times(scalar): - # workaround for object array behavior - return make_obj_array([ni*scalar for ni in normal]) + normal = thaw(u.int.array_context, discr.normal(w_tpair.dd)) - flux_weak = join_fields( + flux_weak = flat_obj_array( np.dot(v.avg, normal), - normal_times(u.avg), + normal*scalar(u.avg), ) # upwind v_jump = np.dot(normal, v.int-v.ext) - flux_weak -= join_fields( + flux_weak -= flat_obj_array( 0.5*(u.int-u.ext), - 0.5*normal_times(v_jump), + 0.5*normal*scalar(v_jump), ) return discr.project(w_tpair.dd, "all_faces", c*flux_weak) @@ -78,12 +72,12 @@ def wave_operator(discr, c, w): dir_u = discr.project("vol", BTAG_ALL, u) dir_v = discr.project("vol", BTAG_ALL, v) - dir_bval = join_fields(dir_u, dir_v) - dir_bc = join_fields(-dir_u, dir_v) + dir_bval = flat_obj_array(dir_u, dir_v) + dir_bc = flat_obj_array(-dir_u, dir_v) return ( discr.inverse_mass( - join_fields( + flat_obj_array( c*discr.weak_div(v), c*discr.weak_grad(u) ) @@ -106,20 +100,20 @@ def rk4_step(y, t, h, f): return y + h/6*(k1 + 2*k2 + 2*k3 + k4) -def bump(discr, queue, t=0): +def bump(discr, actx, t=0): source_center = np.array([0.2, 0.35, 0.1])[:discr.dim] source_width = 0.05 source_omega = 3 - nodes = discr.nodes().with_queue(queue) - center_dist = join_fields([ + nodes = thaw(actx, discr.nodes()) + center_dist = flat_obj_array([ nodes[i] - source_center[i] for i in range(discr.dim) ]) return ( np.cos(source_omega*t) - * clmath.exp( + * actx.np.exp( -np.dot(center_dist, center_dist) / source_width**2)) @@ -127,6 +121,7 @@ def bump(discr, queue, t=0): def main(): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) dim = 2 nel_1d = 16 @@ -149,14 +144,14 @@ def main(): print("%d elements" % mesh.nelements) - discr = EagerDGDiscretization(cl_ctx, mesh, order=order) + discr = EagerDGDiscretization(actx, mesh, order=order) - fields = join_fields( - bump(discr, queue), - [discr.zeros(queue) for i in range(discr.dim)] + fields = flat_obj_array( + bump(discr, actx), + [discr.zeros(actx) for i in range(discr.dim)] ) - vis = make_visualizer(discr, discr.order+3 if dim == 2 else discr.order) + vis = make_visualizer(discr, order+3 if dim == 2 else order) def rhs(t, w): return wave_operator(discr, c=1, w=w) @@ -168,7 +163,8 @@ def main(): fields = rk4_step(fields, t, dt, rhs) if istep % 10 == 0: - print(istep, t, la.norm(fields[0].get())) + print(f"step: {istep} t: {t} L2: {discr.norm(fields[0])} " + f"sol max: {discr.nodal_max('vol', fields[0])}") vis.write_vtk_file("fld-wave-eager-%04d.vtu" % istep, [ ("u", fields[0]), diff --git a/examples/wave/wave-min-mpi.py b/examples/wave/wave-min-mpi.py index 08705655cf874bf7701bb1c3f998c0d6ea7a6e06..bca2610199ed1cf50ae0c178d28caf1579730e3c 100644 --- a/examples/wave/wave-min-mpi.py +++ b/examples/wave/wave-min-mpi.py @@ -27,6 +27,7 @@ THE SOFTWARE. import numpy as np import pyopencl as cl +from meshmode.array_context import PyOpenCLArrayContext from grudge.shortcuts import set_up_rk4 from grudge import sym, bind, DGDiscretizationWithBoundaries from mpi4py import MPI @@ -35,6 +36,7 @@ from mpi4py import MPI def main(write_output=True, order=4): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) comm = MPI.COMM_WORLD num_parts = comm.Get_size() @@ -61,7 +63,7 @@ def main(write_output=True, order=4): else: local_mesh = mesh_dist.receive_mesh_part() - discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order, + discr = DGDiscretizationWithBoundaries(actx, local_mesh, order=order, mpi_communicator=comm) if local_mesh.dim == 2: @@ -90,10 +92,10 @@ def main(write_output=True, order=4): radiation_tag=BTAG_ALL, flux_type="upwind") - queue = cl.CommandQueue(discr.cl_context) - from pytools.obj_array import join_fields - fields = join_fields(discr.zeros(queue), - [discr.zeros(queue) for i in range(discr.dim)]) + from pytools.obj_array import flat_obj_array + fields = flat_obj_array( + discr.zeros(actx), + [discr.zeros(actx) for i in range(discr.dim)]) # FIXME #dt = op.estimate_rk4_timestep(discr, fields=fields) @@ -104,7 +106,7 @@ def main(write_output=True, order=4): bound_op = bind(discr, op.sym_operator()) def rhs(t, w): - return bound_op(queue, t=t, w=w) + return bound_op(t=t, w=w) dt_stepper = set_up_rk4("w", dt, fields, rhs) @@ -130,7 +132,7 @@ def main(write_output=True, order=4): step += 1 - print(step, event.t, norm(queue, u=event.state_component[0]), + print(step, event.t, norm(u=event.state_component[0]), time()-t_last_step) if step % 10 == 0: vis.write_vtk_file( diff --git a/examples/wave/wave-min.py b/examples/wave/wave-min.py index d793e3a09716b883d91c90f1fddfb2f96bacf1a2..de6c9f92e75d449e6a42de80f7b6fa7270df8a15 100644 --- a/examples/wave/wave-min.py +++ b/examples/wave/wave-min.py @@ -27,6 +27,7 @@ THE SOFTWARE. import numpy as np import pyopencl as cl +from meshmode.array_context import PyOpenCLArrayContext from grudge.shortcuts import set_up_rk4 from grudge import sym, bind, DGDiscretizationWithBoundaries @@ -34,6 +35,7 @@ from grudge import sym, bind, DGDiscretizationWithBoundaries def main(write_output=True, order=4): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) dims = 2 from meshmode.mesh.generation import generate_regular_rect_mesh @@ -49,7 +51,7 @@ def main(write_output=True, order=4): print("%d elements" % mesh.nelements) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=order) source_center = np.array([0.1, 0.22, 0.33])[:mesh.dim] source_width = 0.05 @@ -72,10 +74,9 @@ def main(write_output=True, order=4): radiation_tag=BTAG_ALL, flux_type="upwind") - queue = cl.CommandQueue(discr.cl_context) - from pytools.obj_array import join_fields - fields = join_fields(discr.zeros(queue), - [discr.zeros(queue) for i in range(discr.dim)]) + from pytools.obj_array import flat_obj_array + fields = flat_obj_array(discr.zeros(actx), + [discr.zeros(actx) for i in range(discr.dim)]) # FIXME #dt = op.estimate_rk4_timestep(discr, fields=fields) @@ -86,7 +87,7 @@ def main(write_output=True, order=4): bound_op = bind(discr, op.sym_operator()) def rhs(t, w): - return bound_op(queue, t=t, w=w) + return bound_op(t=t, w=w) dt_stepper = set_up_rk4("w", dt, fields, rhs) @@ -110,7 +111,7 @@ def main(write_output=True, order=4): step += 1 - print(step, event.t, norm(queue, u=event.state_component[0]), + print(step, event.t, norm(u=event.state_component[0]), time()-t_last_step) if step % 10 == 0: vis.write_vtk_file("fld-wave-min-%04d.vtu" % step, diff --git a/grudge/discretization.py b/grudge/discretization.py index d24fb2c082ddf3585d7de13f58a7a0b371cc4ed0..7c0573c2e5ec3515843b6f8c377ad10301256aaa 100644 --- a/grudge/discretization.py +++ b/grudge/discretization.py @@ -25,9 +25,9 @@ THE SOFTWARE. import six from pytools import memoize_method -import pyopencl as cl from grudge import sym -import numpy as np +import numpy as np # noqa: F401 +from meshmode.array_context import ArrayContext # FIXME Naming not ideal @@ -40,7 +40,6 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): .. automethod :: discr_from_dd .. automethod :: connection_from_dds - .. autoattribute :: cl_context .. autoattribute :: dim .. autoattribute :: ambient_dim .. autoattribute :: mesh @@ -49,8 +48,8 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): .. automethod :: zeros """ - def __init__(self, cl_ctx, mesh, order, quad_tag_to_group_factory=None, - mpi_communicator=None): + def __init__(self, array_context, mesh, order=None, + quad_tag_to_group_factory=None, mpi_communicator=None): """ :param quad_tag_to_group_factory: A mapping from quadrature tags (typically strings--but may be any hashable/comparable object) to a @@ -60,15 +59,34 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): be carried out with the standard volume discretization. """ + self._setup_actx = array_context + + from meshmode.discretization.poly_element import \ + PolynomialWarpAndBlendGroupFactory + if quad_tag_to_group_factory is None: - quad_tag_to_group_factory = {} + if order is None: + raise TypeError("one of 'order' and " + "'quad_tag_to_group_factory' must be given") + + quad_tag_to_group_factory = { + sym.QTAG_NONE: PolynomialWarpAndBlendGroupFactory(order=order)} + else: + if order is not None: + quad_tag_to_group_factory = quad_tag_to_group_factory.copy() + if sym.QTAG_NONE in quad_tag_to_group_factory: + raise ValueError("if 'order' is given, " + "'quad_tag_to_group_factory' must not have a " + "key of QTAG_NONE") + + quad_tag_to_group_factory[sym.QTAG_NONE] = \ + PolynomialWarpAndBlendGroupFactory(order=order) - self.order = order self.quad_tag_to_group_factory = quad_tag_to_group_factory from meshmode.discretization import Discretization - self._volume_discr = Discretization(cl_ctx, mesh, + self._volume_discr = Discretization(array_context, mesh, self.group_factory_for_quadrature_tag(sym.QTAG_NONE)) # {{{ management of discretization-scoped common subexpressions @@ -81,9 +99,9 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): # }}} - with cl.CommandQueue(cl_ctx) as queue: - self._dist_boundary_connections = \ - self._set_up_distributed_communication(mpi_communicator, queue) + self._dist_boundary_connections = \ + self._set_up_distributed_communication( + mpi_communicator, array_context) self.mpi_communicator = mpi_communicator @@ -97,7 +115,7 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): return self.mpi_communicator.Get_rank() \ == self._get_management_rank_index() - def _set_up_distributed_communication(self, mpi_communicator, queue): + def _set_up_distributed_communication(self, mpi_communicator, array_context): from_dd = sym.DOFDesc("vol", sym.QTAG_NONE) from meshmode.distributed import get_connected_partitions @@ -118,7 +136,8 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): from_dd, sym.DOFDesc(sym.BTAG_PARTITION(i_remote_part), sym.QTAG_NONE)) setup_helper = setup_helpers[i_remote_part] = MPIBoundaryCommSetupHelper( - mpi_communicator, queue, conn, i_remote_part, grp_factory) + mpi_communicator, array_context, conn, + i_remote_part, grp_factory) setup_helper.post_sends() for i_remote_part, setup_helper in six.iteritems(setup_helpers): @@ -152,7 +171,7 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): from meshmode.discretization import Discretization return Discretization( - self._volume_discr.cl_context, + self._setup_actx, no_quad_discr.mesh, self.group_factory_for_quadrature_tag(qtag)) @@ -186,6 +205,7 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): make_face_to_all_faces_embedding return make_face_to_all_faces_embedding( + self._setup_actx, faces_conn, self.discr_from_dd(to_dd), self.discr_from_dd(from_dd)) @@ -224,6 +244,7 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): make_same_mesh_connection return make_same_mesh_connection( + self._setup_actx, self.discr_from_dd(to_dd), self.discr_from_dd(from_dd)) @@ -264,19 +285,13 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): if quadrature_tag is None: quadrature_tag = sym.QTAG_NONE - from meshmode.discretization.poly_element import \ - PolynomialWarpAndBlendGroupFactory - - if quadrature_tag is not sym.QTAG_NONE: - return self.quad_tag_to_group_factory[quadrature_tag] - else: - return PolynomialWarpAndBlendGroupFactory(order=self.order) + return self.quad_tag_to_group_factory[quadrature_tag] @memoize_method def _quad_volume_discr(self, quadrature_tag): from meshmode.discretization import Discretization - return Discretization(self._volume_discr.cl_context, self._volume_discr.mesh, + return Discretization(self._setup_actx, self._volume_discr.mesh, self.group_factory_for_quadrature_tag(quadrature_tag)) # {{{ boundary @@ -285,9 +300,10 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): def _boundary_connection(self, boundary_tag): from meshmode.discretization.connection import make_face_restriction return make_face_restriction( - self._volume_discr, - self.group_factory_for_quadrature_tag(sym.QTAG_NONE), - boundary_tag=boundary_tag) + self._setup_actx, + self._volume_discr, + self.group_factory_for_quadrature_tag(sym.QTAG_NONE), + boundary_tag=boundary_tag) # }}} @@ -298,21 +314,24 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): from meshmode.discretization.connection import ( make_face_restriction, FACE_RESTR_INTERIOR) return make_face_restriction( - self._volume_discr, - self.group_factory_for_quadrature_tag(sym.QTAG_NONE), - FACE_RESTR_INTERIOR, + self._setup_actx, + self._volume_discr, + self.group_factory_for_quadrature_tag(sym.QTAG_NONE), + FACE_RESTR_INTERIOR, - # FIXME: This will need to change as soon as we support - # pyramids or other elements with non-identical face - # types. - per_face_groups=False) + # FIXME: This will need to change as soon as we support + # pyramids or other elements with non-identical face + # types. + per_face_groups=False) @memoize_method def opposite_face_connection(self): from meshmode.discretization.connection import \ make_opposite_face_connection - return make_opposite_face_connection(self._interior_faces_connection()) + return make_opposite_face_connection( + self._setup_actx, + self._interior_faces_connection()) # }}} @@ -323,21 +342,18 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): from meshmode.discretization.connection import ( make_face_restriction, FACE_RESTR_ALL) return make_face_restriction( - self._volume_discr, - self.group_factory_for_quadrature_tag(sym.QTAG_NONE), - FACE_RESTR_ALL, + self._setup_actx, + self._volume_discr, + self.group_factory_for_quadrature_tag(sym.QTAG_NONE), + FACE_RESTR_ALL, - # FIXME: This will need to change as soon as we support - # pyramids or other elements with non-identical face - # types. - per_face_groups=False) + # FIXME: This will need to change as soon as we support + # pyramids or other elements with non-identical face + # types. + per_face_groups=False) # }}} - @property - def cl_context(self): - return self._volume_discr.cl_context - @property def dim(self): return self._volume_discr.dim @@ -358,13 +374,11 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): def mesh(self): return self._volume_discr.mesh - def empty(self, queue=None, dtype=None, extra_dims=None, allocator=None): - return self._volume_discr.empty(queue, dtype, extra_dims=extra_dims, - allocator=allocator) + def empty(self, array_context: ArrayContext, dtype=None): + return self._volume_discr.empty(array_context, dtype) - def zeros(self, queue, dtype=None, extra_dims=None, allocator=None): - return self._volume_discr.zeros(queue, dtype, extra_dims=extra_dims, - allocator=allocator) + def zeros(self, array_context: ArrayContext, dtype=None): + return self._volume_discr.zeros(array_context, dtype) def is_volume_where(self, where): from grudge import sym @@ -372,55 +386,16 @@ class DGDiscretizationWithBoundaries(DiscretizationBase): where is None or where == sym.VTAG_ALL) - -class PointsDiscretization(DiscretizationBase): - """Implements just enough of the discretization interface to be - able to smuggle some points into :func:`bind`. - """ - - def __init__(self, nodes): - self._nodes = nodes - self.real_dtype = np.dtype(np.float64) - self.complex_dtype = np.dtype({ - np.float32: np.complex64, - np.float64: np.complex128 - }[self.real_dtype.type]) - - self.mpi_communicator = None - - def ambient_dim(self): - return self._nodes.shape[0] - - @property - def mesh(self): - return self - - @property - def nnodes(self): - return self._nodes.shape[-1] - - def nodes(self): - return self._nodes - - @property - def facial_adjacency_groups(self): - return [] - - def discr_from_dd(self, dd): - dd = sym.as_dofdesc(dd) - - if dd.quadrature_tag is not sym.QTAG_NONE: - raise ValueError("quadrature discretization requested from " - "PointsDiscretization") - if dd.domain_tag is not sym.DTAG_VOLUME_ALL: - raise ValueError("non-volume discretization requested from " - "PointsDiscretization") - - return self - @property - def quad_tag_to_group_factory(self): - return {} + def order(self): + from warnings import warn + warn("DGDiscretizationWithBoundaries.order is deprecated, " + "consider the orders of element groups instead. " + "'order' will go away in 2021.", + DeprecationWarning, stacklevel=2) + + from pytools import single_valued + return single_valued(egrp.order for egrp in self._volume_discr.groups) # vim: foldmethod=marker diff --git a/grudge/eager.py b/grudge/eager.py index 43d73a4db5ad6eef406bc75e66f4d1bc39b74bfb..71a01873001d56883d49754bd4264f18c3c20935 100644 --- a/grudge/eager.py +++ b/grudge/eager.py @@ -24,15 +24,16 @@ THE SOFTWARE. import numpy as np # noqa -from grudge.discretization import DGDiscretizationWithBoundaries from pytools import memoize_method -from pytools.obj_array import ( - with_object_array_or_scalar, - is_obj_array) -import pyopencl as cl +from pytools.obj_array import obj_array_vectorize, make_obj_array import pyopencl.array as cla # noqa from grudge import sym, bind -from meshmode.mesh import BTAG_ALL, BTAG_NONE # noqa + +from meshmode.mesh import BTAG_ALL, BTAG_NONE, BTAG_PARTITION # noqa +from meshmode.dof_array import freeze, DOFArray, flatten, unflatten + +from grudge.discretization import DGDiscretizationWithBoundaries +from grudge.symbolic.primitives import TracePair __doc__ = """ @@ -40,15 +41,6 @@ __doc__ = """ """ -def with_queue(queue, ary): - return with_object_array_or_scalar( - lambda x: x.with_queue(queue), ary) - - -def without_queue(ary): - return with_queue(None, ary) - - class EagerDGDiscretization(DGDiscretizationWithBoundaries): """ .. automethod:: project @@ -70,66 +62,215 @@ class EagerDGDiscretization(DGDiscretizationWithBoundaries): return self.project(src, tgt, vec) def project(self, src, tgt, vec): - if is_obj_array(vec): - return with_object_array_or_scalar( + if (isinstance(vec, np.ndarray) + and vec.dtype.char == "O" + and not isinstance(vec, DOFArray)): + return obj_array_vectorize( lambda el: self.project(src, tgt, el), vec) - return self.connection_from_dds(src, tgt)(vec.queue, vec) + return self.connection_from_dds(src, tgt)(vec) def nodes(self): return self._volume_discr.nodes() @memoize_method def _bound_grad(self): - return bind(self, sym.nabla(self.dim) * sym.Variable("u")) + return bind(self, sym.nabla(self.dim) * sym.Variable("u"), local_only=True) def grad(self, vec): - return self._bound_grad()(vec.queue, u=vec) + return self._bound_grad()(u=vec) def div(self, vecs): return sum( self.grad(vec_i)[i] for i, vec_i in enumerate(vecs)) @memoize_method - def _bound_weak_grad(self): - return bind(self, sym.stiffness_t(self.dim) * sym.Variable("u")) - - def weak_grad(self, vec): - return self._bound_weak_grad()(vec.queue, u=vec) + def _bound_weak_grad(self, dd): + return bind(self, + sym.stiffness_t(self.dim, dd_in=dd) * sym.Variable("u", dd), + local_only=True) + + def weak_grad(self, *args): + if len(args) == 1: + vec, = args + dd = sym.DOFDesc("vol", sym.QTAG_NONE) + elif len(args) == 2: + dd, vec = args + else: + raise TypeError("invalid number of arguments") + + return self._bound_weak_grad(dd)(u=vec) + + def weak_div(self, *args): + if len(args) == 1: + vecs, = args + dd = sym.DOFDesc("vol", sym.QTAG_NONE) + elif len(args) == 2: + dd, vecs = args + else: + raise TypeError("invalid number of arguments") - def weak_div(self, vecs): return sum( - self.weak_grad(vec_i)[i] for i, vec_i in enumerate(vecs)) + self.weak_grad(dd, vec_i)[i] for i, vec_i in enumerate(vecs)) @memoize_method def normal(self, dd): - with cl.CommandQueue(self.cl_context) as queue: - surface_discr = self.discr_from_dd(dd) - return without_queue( - bind(self, sym.normal( - dd, surface_discr.ambient_dim, surface_discr.dim))(queue)) + surface_discr = self.discr_from_dd(dd) + actx = surface_discr._setup_actx + return freeze( + bind(self, + sym.normal(dd, surface_discr.ambient_dim, surface_discr.dim), + local_only=True) + (array_context=actx)) @memoize_method def _bound_inverse_mass(self): - return bind(self, sym.InverseMassOperator()(sym.Variable("u"))) + return bind(self, sym.InverseMassOperator()(sym.Variable("u")), + local_only=True) def inverse_mass(self, vec): - if is_obj_array(vec): - return with_object_array_or_scalar( + if (isinstance(vec, np.ndarray) + and vec.dtype.char == "O" + and not isinstance(vec, DOFArray)): + return obj_array_vectorize( lambda el: self.inverse_mass(el), vec) - return self._bound_inverse_mass()(vec.queue, u=vec) + return self._bound_inverse_mass()(u=vec) + + @memoize_method + def _bound_face_mass(self, dd): + u = sym.Variable("u", dd=dd) + return bind(self, sym.FaceMassOperator(dd_in=dd)(u), local_only=True) + + def face_mass(self, *args): + if len(args) == 1: + vec, = args + dd = sym.DOFDesc("all_faces", sym.QTAG_NONE) + elif len(args) == 2: + dd, vec = args + else: + raise TypeError("invalid number of arguments") + + if (isinstance(vec, np.ndarray) + and vec.dtype.char == "O" + and not isinstance(vec, DOFArray)): + return obj_array_vectorize( + lambda el: self.face_mass(dd, el), vec) + + return self._bound_face_mass(dd)(u=vec) @memoize_method - def _bound_face_mass(self): - u = sym.Variable("u", dd=sym.as_dofdesc("all_faces")) - return bind(self, sym.FaceMassOperator()(u)) + def _norm(self, p): + return bind(self, sym.norm(p, sym.var("arg")), local_only=True) + + def norm(self, vec, p=2): + return self._norm(p)(arg=vec) + + @memoize_method + def _nodal_reduction(self, operator, dd): + return bind(self, operator(dd)(sym.var("arg")), local_only=True) + + def nodal_sum(self, dd, vec): + return self._nodal_reduction(sym.NodalSum, dd)(arg=vec) + + def nodal_min(self, dd, vec): + return self._nodal_reduction(sym.NodalMin, dd)(arg=vec) + + def nodal_max(self, dd, vec): + return self._nodal_reduction(sym.NodalMax, dd)(arg=vec) + + @memoize_method + def connected_ranks(self): + from meshmode.distributed import get_connected_partitions + return get_connected_partitions(self._volume_discr.mesh) + + +def interior_trace_pair(discrwb, vec): + i = discrwb.project("vol", "int_faces", vec) + + if (isinstance(vec, np.ndarray) + and vec.dtype.char == "O" + and not isinstance(vec, DOFArray)): + e = obj_array_vectorize( + lambda el: discrwb.opposite_face_connection()(el), + i) + + return TracePair("int_faces", i, e) + + +class RankBoundaryCommunication: + base_tag = 1273 + + def __init__(self, discrwb, remote_rank, vol_field, tag=None): + self.tag = self.base_tag + if tag is not None: + self.tag += tag + + self.discrwb = discrwb + self.array_context = vol_field.array_context + self.remote_btag = BTAG_PARTITION(remote_rank) + + self.bdry_discr = discrwb.discr_from_dd(self.remote_btag) + self.local_dof_array = discrwb.project("vol", self.remote_btag, vol_field) + + local_data = self.array_context.to_numpy(flatten(self.local_dof_array)) + + comm = self.discrwb.mpi_communicator + + self.send_req = comm.Isend( + local_data, remote_rank, tag=self.tag) + + self.remote_data_host = np.empty_like(local_data) + self.recv_req = comm.Irecv(self.remote_data_host, remote_rank, self.tag) + + def finish(self): + self.recv_req.Wait() + + actx = self.array_context + remote_dof_array = unflatten(self.array_context, self.bdry_discr, + actx.from_numpy(self.remote_data_host)) + + bdry_conn = self.discrwb.get_distributed_boundary_swap_connection( + sym.as_dofdesc(sym.DTAG_BOUNDARY(self.remote_btag))) + swapped_remote_dof_array = bdry_conn(remote_dof_array) + + self.send_req.Wait() + + return TracePair(self.remote_btag, self.local_dof_array, + swapped_remote_dof_array) + + +def _cross_rank_trace_pairs_scalar_field(discrwb, vec, tag=None): + rbcomms = [RankBoundaryCommunication(discrwb, remote_rank, vec, tag=tag) + for remote_rank in discrwb.connected_ranks()] + return [rbcomm.finish() for rbcomm in rbcomms] + + +def cross_rank_trace_pairs(discrwb, vec, tag=None): + if (isinstance(vec, np.ndarray) + and vec.dtype.char == "O" + and not isinstance(vec, DOFArray)): + + n, = vec.shape + result = {} + for ivec in range(n): + for rank_tpair in _cross_rank_trace_pairs_scalar_field( + discrwb, vec[ivec]): + assert isinstance(rank_tpair.dd.domain_tag, sym.DTAG_BOUNDARY) + assert isinstance(rank_tpair.dd.domain_tag.tag, BTAG_PARTITION) + result[rank_tpair.dd.domain_tag.tag.part_nr, ivec] = rank_tpair - def face_mass(self, vec): - if is_obj_array(vec): - return with_object_array_or_scalar( - lambda el: self.face_mass(el), vec) + return [ + TracePair( + dd=sym.as_dofdesc(sym.DTAG_BOUNDARY(BTAG_PARTITION(remote_rank))), + interior=make_obj_array([ + result[remote_rank, i].int for i in range(n)]), + exterior=make_obj_array([ + result[remote_rank, i].ext for i in range(n)]) + ) + for remote_rank in discrwb.connected_ranks()] + else: + return _cross_rank_trace_pairs_scalar_field(discrwb, vec, tag=tag) - return self._bound_face_mass()(vec.queue, u=vec) # vim: foldmethod=marker diff --git a/grudge/execution.py b/grudge/execution.py index 7c3fbb07846880b64f01cab250379a2afb8e33db..1c25739d1964863fbda151851607aab6a746f09b 100644 --- a/grudge/execution.py +++ b/grudge/execution.py @@ -22,12 +22,19 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import six +from typing import Optional, Union, Dict +from numbers import Number import numpy as np + +from pytools import memoize_in +from pytools.obj_array import make_obj_array + import loopy as lp import pyopencl as cl import pyopencl.array # noqa -from pytools import memoize_in + +from meshmode.dof_array import DOFArray, thaw, flatten, unflatten +from meshmode.array_context import ArrayContext, make_loopy_program import grudge.symbolic.mappers as mappers from grudge import sym @@ -42,17 +49,20 @@ from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_1 # noqa: F401 MPI_TAG_SEND_TAGS = 1729 +ResultType = Union[DOFArray, Number] + + # {{{ exec mapper class ExecutionMapper(mappers.Evaluator, mappers.BoundOpMapperMixin, mappers.LocalOpReducerMixin): - def __init__(self, queue, context, bound_op): + def __init__(self, array_context, context, bound_op): super(ExecutionMapper, self).__init__(context) self.discrwb = bound_op.discrwb self.bound_op = bound_op self.function_registry = bound_op.function_registry - self.queue = queue + self.array_context = array_context # {{{ expression mappings @@ -62,13 +72,14 @@ class ExecutionMapper(mappers.Evaluator, discr = self.discrwb.discr_from_dd(expr.dd) - result = discr.empty(self.queue, allocator=self.bound_op.allocator) - result.fill(1) + result = discr.empty(self.array_context) + for grp_ary in result: + grp_ary.fill(1) return result def map_node_coordinate_component(self, expr): discr = self.discrwb.discr_from_dd(expr.dd) - return discr.nodes()[expr.axis].with_queue(self.queue) + return thaw(self.array_context, discr.nodes()[expr.axis]) def map_grudge_variable(self, expr): from numbers import Number @@ -76,8 +87,9 @@ class ExecutionMapper(mappers.Evaluator, value = self.context[expr.name] if not expr.dd.is_scalar() and isinstance(value, Number): discr = self.discrwb.discr_from_dd(expr.dd) - ary = discr.empty(self.queue) - ary.fill(value) + ary = discr.empty(self.array_context) + for grp_ary in ary: + grp_ary.fill(value) value = ary return value @@ -91,42 +103,42 @@ class ExecutionMapper(mappers.Evaluator, from numbers import Number if not dd.is_scalar() and isinstance(value, Number): discr = self.discrwb.discr_from_dd(dd) - ary = discr.empty(self.queue) - ary.fill(value) + ary = discr.empty(self.array_context) + for grp_ary in ary: + grp_ary.fill(value) value = ary return value def map_call(self, expr): args = [self.rec(p) for p in expr.parameters] - return self.function_registry[expr.function.name](self.queue, *args) + return self.function_registry[expr.function.name](self.array_context, *args) # }}} # {{{ elementwise reductions def _map_elementwise_reduction(self, op_name, field_expr, dd): - @memoize_in(self, "elementwise_%s_knl" % op_name) - def knl(): - knl = lp.make_kernel( - "{[el, idof, jdof]: 0<=el=2.3", - "pytools>=2018.5.2", + "pytools>=2020.3", "modepy>=2013.3", - "meshmode>=2013.3", + "meshmode>=2020.2", "pyopencl>=2013.1", "pymbolic>=2013.2", "loo.py>=2013.1beta", diff --git a/test/test_grudge.py b/test/test_grudge.py index 4734a199551abe8bb625e9cd5744dce46610a9cd..b6d82a3f4b7e64f871b39988077b9c9ba64f0bea 100644 --- a/test/test_grudge.py +++ b/test/test_grudge.py @@ -25,14 +25,14 @@ THE SOFTWARE. import numpy as np import numpy.linalg as la +import pytest # noqa -import pyopencl as cl -import pyopencl.array -import pyopencl.clmath +from pytools.obj_array import flat_obj_array, make_obj_array -from pytools.obj_array import join_fields, make_obj_array +import pyopencl as cl -import pytest # noqa +from meshmode.array_context import PyOpenCLArrayContext +from meshmode.dof_array import unflatten, flatten, flat_norm from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) @@ -50,6 +50,7 @@ from grudge import sym, bind, DGDiscretizationWithBoundaries def test_inverse_metric(ctx_factory, dim): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) from meshmode.mesh.generation import generate_regular_rect_mesh mesh = generate_regular_rect_mesh(a=(-0.5,)*dim, b=(0.5,)*dim, @@ -70,7 +71,7 @@ def test_inverse_metric(ctx_factory, dim): from meshmode.mesh.processing import map_mesh mesh = map_mesh(mesh, m) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=4) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=4) sym_op = ( sym.forward_metric_derivative_mat(mesh.dim) @@ -80,13 +81,13 @@ def test_inverse_metric(ctx_factory, dim): .reshape(-1)) op = bind(discr, sym_op) - mat = op(queue).reshape(mesh.dim, mesh.dim) + mat = op(actx).reshape(mesh.dim, mesh.dim) for i in range(mesh.dim): for j in range(mesh.dim): tgt = 1 if i == j else 0 - err = np.max(np.abs((mat[i, j] - tgt).get(queue=queue))) + err = flat_norm(mat[i, j] - tgt, np.inf) logger.info("error[%d, %d]: %.5e", i, j, err) assert err < 1.0e-12, (i, j, err) @@ -99,6 +100,7 @@ def test_mass_mat_trig(ctx_factory, ambient_dim, quad_tag): """ cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) nelements = 17 order = 4 @@ -120,7 +122,7 @@ def test_mass_mat_trig(ctx_factory, ambient_dim, quad_tag): mesh = generate_regular_rect_mesh( a=(a,)*ambient_dim, b=(b,)*ambient_dim, n=(nelements,)*ambient_dim, order=1) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order, + discr = DGDiscretizationWithBoundaries(actx, mesh, order=order, quad_tag_to_group_factory=quad_tag_to_group_factory) def _get_variables_on(dd): @@ -131,20 +133,20 @@ def test_mass_mat_trig(ctx_factory, ambient_dim, quad_tag): return sym_f, sym_x, sym_ones sym_f, sym_x, sym_ones = _get_variables_on(sym.DD_VOLUME) - f_volm = bind(discr, sym.cos(sym_x[0])**2)(queue).get() - ones_volm = bind(discr, sym_ones)(queue).get() + f_volm = actx.to_numpy(flatten(bind(discr, sym.cos(sym_x[0])**2)(actx))) + ones_volm = actx.to_numpy(flatten(bind(discr, sym_ones)(actx))) sym_f, sym_x, sym_ones = _get_variables_on(dd_quad) - f_quad = bind(discr, sym.cos(sym_x[0])**2)(queue) - ones_quad = bind(discr, sym_ones)(queue) + f_quad = bind(discr, sym.cos(sym_x[0])**2)(actx) + ones_quad = bind(discr, sym_ones)(actx) mass_op = bind(discr, sym.MassOperator(dd_quad, sym.DD_VOLUME)(sym_f)) - num_integral_1 = np.dot(ones_volm, mass_op(queue, f=f_quad).get()) + num_integral_1 = np.dot(ones_volm, actx.to_numpy(flatten(mass_op(f=f_quad)))) err_1 = abs(num_integral_1 - true_integral) assert err_1 < 1e-9, err_1 - num_integral_2 = np.dot(f_volm, mass_op(queue, f=ones_quad).get()) + num_integral_2 = np.dot(f_volm, actx.to_numpy(flatten(mass_op(f=ones_quad)))) err_2 = abs(num_integral_2 - true_integral) assert err_2 < 1.0e-9, err_2 @@ -152,7 +154,7 @@ def test_mass_mat_trig(ctx_factory, ambient_dim, quad_tag): # NOTE: `integral` always makes a square mass matrix and # `QuadratureSimplexGroupFactory` does not have a `mass_matrix` method. num_integral_3 = bind(discr, - sym.integral(sym_f, dd=dd_quad))(queue, f=f_quad) + sym.integral(sym_f, dd=dd_quad))(f=f_quad) err_3 = abs(num_integral_3 - true_integral) assert err_3 < 5.0e-10, err_3 @@ -166,6 +168,7 @@ def test_tri_diff_mat(ctx_factory, dim, order=4): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) from meshmode.mesh.generation import generate_regular_rect_mesh @@ -176,20 +179,20 @@ def test_tri_diff_mat(ctx_factory, dim, order=4): mesh = generate_regular_rect_mesh(a=(-0.5,)*dim, b=(0.5,)*dim, n=(n,)*dim, order=4) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=4) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=4) nabla = sym.nabla(dim) for axis in range(dim): x = sym.nodes(dim) - f = bind(discr, sym.sin(3*x[axis]))(queue) - df = bind(discr, 3*sym.cos(3*x[axis]))(queue) + f = bind(discr, sym.sin(3*x[axis]))(actx) + df = bind(discr, 3*sym.cos(3*x[axis]))(actx) sym_op = nabla[axis](sym.var("f")) bound_op = bind(discr, sym_op) - df_num = bound_op(queue, f=f) + df_num = bound_op(f=f) - linf_error = la.norm((df_num-df).get(), np.Inf) + linf_error = flat_norm(df_num-df, np.Inf) axis_eoc_recs[axis].add_data_point(1/n, linf_error) for axis, eoc_rec in enumerate(axis_eoc_recs): @@ -217,11 +220,12 @@ def test_2d_gauss_theorem(ctx_factory): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=2) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=2) def f(x): - return join_fields( + return flat_obj_array( sym.sin(3*x[0])+sym.cos(3*x[1]), sym.sin(2*x[0])+sym.cos(x[1])) @@ -234,7 +238,7 @@ def test_2d_gauss_theorem(ctx_factory): sym.project("vol", sym.BTAG_ALL)(f(sym.nodes(2))) .dot(sym.normal(sym.BTAG_ALL, 2)), dd=sym.BTAG_ALL) - )(queue) + )(actx) assert abs(gauss_err) < 1e-13 @@ -255,6 +259,7 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) from pytools.convergence import EOCRecorder eoc_rec = EOCRecorder() @@ -314,7 +319,7 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type from grudge.models.advection import ( StrongAdvectionOperator, WeakAdvectionOperator) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=order) op_class = { "strong": StrongAdvectionOperator, "weak": WeakAdvectionOperator, @@ -325,17 +330,17 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type bound_op = bind(discr, op.sym_operator()) - u = bind(discr, u_analytic(sym.nodes(dim)))(queue, t=0) + u = bind(discr, u_analytic(sym.nodes(dim)))(actx, t=0) def rhs(t, u): - return bound_op(queue, t=t, u=u) + return bound_op(t=t, u=u) if dim == 3: final_time = 0.1 else: final_time = 0.2 - h_max = bind(discr, sym.h_max_from_volume(discr.ambient_dim))(queue) + h_max = bind(discr, sym.h_max_from_volume(discr.ambient_dim))(actx) dt = dt_factor * h_max/order**2 nsteps = (final_time // dt) + 1 dt = final_time/nsteps + 1e-15 @@ -364,7 +369,7 @@ def test_convergence_advec(ctx_factory, mesh_name, mesh_pars, op_type, flux_type error_l2 = bind(discr, sym.norm(2, sym.var("u")-u_analytic(sym.nodes(dim))))( - queue, t=last_t, u=last_u) + t=last_t, u=last_u) logger.info("h_max %.5e error %.5e", h_max, error_l2) eoc_rec.add_data_point(h_max, error_l2) @@ -381,6 +386,7 @@ def test_convergence_maxwell(ctx_factory, order): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) from pytools.convergence import EOCRecorder eoc_rec = EOCRecorder() @@ -394,7 +400,7 @@ def test_convergence_maxwell(ctx_factory, order): b=(1.0,)*dims, n=(n,)*dims) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=order) epsilon = 1 mu = 1 @@ -403,7 +409,7 @@ def test_convergence_maxwell(ctx_factory, order): sym_mode = get_rectangular_cavity_mode(1, (1, 2, 2)) analytic_sol = bind(discr, sym_mode) - fields = analytic_sol(queue, t=0, epsilon=epsilon, mu=mu) + fields = analytic_sol(actx, t=0, epsilon=epsilon, mu=mu) from grudge.models.em import MaxwellOperator op = MaxwellOperator(epsilon, mu, flux_type=0.5, dimensions=dims) @@ -411,7 +417,7 @@ def test_convergence_maxwell(ctx_factory, order): bound_op = bind(discr, op.sym_operator()) def rhs(t, w): - return bound_op(queue, t=t, w=w) + return bound_op(t=t, w=w) dt = 0.002 final_t = dt * 5 @@ -433,8 +439,8 @@ def test_convergence_maxwell(ctx_factory, order): step += 1 logger.debug("[%04d] t = %.5e", step, event.t) - sol = analytic_sol(queue, mu=mu, epsilon=epsilon, t=step * dt) - vals = [norm(queue, u=(esc[i] - sol[i])) / norm(queue, u=sol[i]) for i in range(5)] # noqa E501 + sol = analytic_sol(actx, mu=mu, epsilon=epsilon, t=step * dt) + vals = [norm(u=(esc[i] - sol[i])) / norm(u=sol[i]) for i in range(5)] # noqa E501 total_error = sum(vals) eoc_rec.add_data_point(1.0/n, total_error) @@ -455,10 +461,11 @@ def test_improvement_quadrature(ctx_factory, order): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) dims = 2 sym_nds = sym.nodes(dims) - advec_v = join_fields(-1*sym_nds[1], sym_nds[0]) + advec_v = flat_obj_array(-1*sym_nds[1], sym_nds[0]) flux = "upwind" op = VariableCoefficientAdvectionOperator(advec_v, 0, flux_type=flux) @@ -489,15 +496,15 @@ def test_improvement_quadrature(ctx_factory, order): else: quad_tag_to_group_factory = {"product": None} - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=order, + discr = DGDiscretizationWithBoundaries(actx, mesh, order=order, quad_tag_to_group_factory=quad_tag_to_group_factory) bound_op = bind(discr, op.sym_operator()) - fields = bind(discr, gaussian_mode())(queue, t=0) + fields = bind(discr, gaussian_mode())(actx, t=0) norm = bind(discr, sym.norm(2, sym.var("u"))) - esc = bound_op(queue, u=fields) - total_error = norm(queue, u=esc) + esc = bound_op(u=fields) + total_error = norm(u=esc) eoc_rec.add_data_point(1.0/n, total_error) logger.info("\n%s", eoc_rec.pretty_print( @@ -514,22 +521,6 @@ def test_improvement_quadrature(ctx_factory, order): assert q_eoc > order -def test_foreign_points(ctx_factory): - pytest.importorskip("sumpy") - import sumpy.point_calculus as pc - - cl_ctx = ctx_factory() - queue = cl.CommandQueue(cl_ctx) - - dim = 2 - cp = pc.CalculusPatch(np.zeros(dim)) - - from grudge.discretization import PointsDiscretization - pdiscr = PointsDiscretization(cl.array.to_device(queue, cp.points)) - - bind(pdiscr, sym.nodes(dim)**2)(queue) - - def test_op_collector_order_determinism(): class TestOperator(sym.Operator): @@ -558,6 +549,7 @@ def test_op_collector_order_determinism(): def test_bessel(ctx_factory): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) dims = 2 @@ -567,7 +559,7 @@ def test_bessel(ctx_factory): b=(1.0,)*dims, n=(8,)*dims) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=3) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=3) nodes = sym.nodes(dims) r = sym.cse(sym.sqrt(nodes[0]**2 + nodes[1]**2)) @@ -579,7 +571,7 @@ def test_bessel(ctx_factory): + sym.bessel_j(n-1, r) - 2*n/r * sym.bessel_j(n, r)) - z = bind(discr, sym.norm(2, bessel_zero))(queue) + z = bind(discr, sym.norm(2, bessel_zero))(actx) assert z < 1e-15 @@ -587,6 +579,7 @@ def test_bessel(ctx_factory): def test_external_call(ctx_factory): cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) def double(queue, x): return 2 * x @@ -596,7 +589,7 @@ def test_external_call(ctx_factory): dims = 2 mesh = generate_regular_rect_mesh(a=(0,) * dims, b=(1,) * dims, n=(4,) * dims) - discr = DGDiscretizationWithBoundaries(cl_ctx, mesh, order=1) + discr = DGDiscretizationWithBoundaries(actx, mesh, order=1) ones = sym.Ones(sym.DD_VOLUME) op = ( @@ -614,37 +607,41 @@ def test_external_call(ctx_factory): bound_op = bind(discr, op, function_registry=freg) - result = bound_op(queue, double=double) - assert (result == 5).get().all() + result = bound_op(actx, double=double) + assert actx.to_numpy(flatten(result) == 5).all() @pytest.mark.parametrize("array_type", ["scalar", "vector"]) def test_function_symbol_array(ctx_factory, array_type): ctx = ctx_factory() queue = cl.CommandQueue(ctx) + actx = PyOpenCLArrayContext(queue) from meshmode.mesh.generation import generate_regular_rect_mesh dim = 2 mesh = generate_regular_rect_mesh( a=(-0.5,)*dim, b=(0.5,)*dim, n=(8,)*dim, order=4) - discr = DGDiscretizationWithBoundaries(ctx, mesh, order=4) - nnodes = discr.discr_from_dd(sym.DD_VOLUME).nnodes + discr = DGDiscretizationWithBoundaries(actx, mesh, order=4) + volume_discr = discr.discr_from_dd(sym.DD_VOLUME) + ndofs = sum(grp.ndofs for grp in volume_discr.groups) import pyopencl.clrandom # noqa: F401 if array_type == "scalar": sym_x = sym.var("x") - x = cl.clrandom.rand(queue, nnodes, dtype=np.float) + x = unflatten(actx, volume_discr, + cl.clrandom.rand(queue, ndofs, dtype=np.float)) elif array_type == "vector": sym_x = sym.make_sym_array("x", dim) x = make_obj_array([ - cl.clrandom.rand(queue, nnodes, dtype=np.float) + unflatten(actx, volume_discr, + cl.clrandom.rand(queue, ndofs, dtype=np.float)) for _ in range(dim) ]) else: raise ValueError("unknown array type") - norm = bind(discr, sym.norm(2, sym_x))(queue, x=x) + norm = bind(discr, sym.norm(2, sym_x))(x=x) assert isinstance(norm, float) diff --git a/test/test_mpi_communication.py b/test/test_mpi_communication.py index 0b5ae492798b2783564f4afc75ec3051a43c3461..a5109737106279f0a76d147a9d77a4e35d2d6d84 100644 --- a/test/test_mpi_communication.py +++ b/test/test_mpi_communication.py @@ -31,6 +31,9 @@ import numpy as np import pyopencl as cl import logging +from meshmode.array_context import PyOpenCLArrayContext +from meshmode.dof_array import flat_norm + logger = logging.getLogger(__name__) logging.basicConfig() logger.setLevel(logging.INFO) @@ -42,6 +45,8 @@ from grudge.shortcuts import set_up_rk4 def simple_mpi_communication_entrypoint(): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) + from meshmode.distributed import MPIMeshDistributor, get_partition_by_pymetis from mpi4py import MPI @@ -62,12 +67,12 @@ def simple_mpi_communication_entrypoint(): else: local_mesh = mesh_dist.receive_mesh_part() - vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=5, + vol_discr = DGDiscretizationWithBoundaries(actx, local_mesh, order=5, mpi_communicator=comm) sym_x = sym.nodes(local_mesh.dim) myfunc_symb = sym.sin(np.dot(sym_x, [2, 3])) - myfunc = bind(vol_discr, myfunc_symb)(queue) + myfunc = bind(vol_discr, myfunc_symb)(actx) sym_all_faces_func = sym.cse( sym.project("vol", "all_faces")(sym.var("myfunc"))) @@ -84,9 +89,8 @@ def simple_mpi_communication_entrypoint(): ) - (sym_all_faces_func - sym_bdry_faces_func) ) - hopefully_zero = bound_face_swap(queue, myfunc=myfunc) - import numpy.linalg as la - error = la.norm(hopefully_zero.get()) + hopefully_zero = bound_face_swap(myfunc=myfunc) + error = flat_norm(hopefully_zero, np.inf) print(__file__) with np.printoptions(threshold=100000000, suppress=True): @@ -99,6 +103,7 @@ def simple_mpi_communication_entrypoint(): def mpi_communication_entrypoint(): cl_ctx = cl.create_some_context() queue = cl.CommandQueue(cl_ctx) + actx = PyOpenCLArrayContext(queue) from mpi4py import MPI comm = MPI.COMM_WORLD @@ -124,7 +129,7 @@ def mpi_communication_entrypoint(): else: local_mesh = mesh_dist.receive_mesh_part() - vol_discr = DGDiscretizationWithBoundaries(cl_ctx, local_mesh, order=order, + vol_discr = DGDiscretizationWithBoundaries(actx, local_mesh, order=order, mpi_communicator=comm) source_center = np.array([0.1, 0.22, 0.33])[:local_mesh.dim] @@ -148,9 +153,9 @@ def mpi_communication_entrypoint(): radiation_tag=BTAG_ALL, flux_type="upwind") - from pytools.obj_array import join_fields - fields = join_fields(vol_discr.zeros(queue), - [vol_discr.zeros(queue) for i in range(vol_discr.dim)]) + from pytools.obj_array import flat_obj_array + fields = flat_obj_array(vol_discr.zeros(actx), + [vol_discr.zeros(actx) for i in range(vol_discr.dim)]) # FIXME # dt = op.estimate_rk4_timestep(vol_discr, fields=fields) @@ -189,7 +194,7 @@ def mpi_communication_entrypoint(): bound_op = bind(vol_discr, op.sym_operator()) def rhs(t, w): - val, rhs.profile_data = bound_op(queue, profile_data=rhs.profile_data, + val, rhs.profile_data = bound_op(profile_data=rhs.profile_data, log_quantities=log_quantities, t=t, w=w) return val @@ -219,7 +224,7 @@ def mpi_communication_entrypoint(): step += 1 logger.debug("[%04d] t = %.5e |u| = %.5e ellapsed %.5e", step, event.t, - norm(queue, u=event.state_component[0]), + norm(u=event.state_component[0]), time() - t_last_step) # if step % 10 == 0: