diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d658c057a5d965d98a8136fe7bc876ad8658883e..75f714fd09474c56fb155b5e10be0c7dc3c90faf 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,12 +15,13 @@ Python 2.7 POCL: script: - export PY_EXE=python2.7 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="numpy mako mpi4py" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - python2.7 - pocl + - mpi except: - tags @@ -28,12 +29,13 @@ Python 3.5 POCL: script: - export PY_EXE=python3.5 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="numpy mako mpi4py" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - python3.5 - pocl + - mpi except: - tags diff --git a/meshmode/discretization/connection/__init__.py b/meshmode/discretization/connection/__init__.py index bd4f1292a083ade8273f5f92e01efebd66d652e8..5f136982d799b14ccaddef05e700dc0ec75b5156 100644 --- a/meshmode/discretization/connection/__init__.py +++ b/meshmode/discretization/connection/__init__.py @@ -38,7 +38,7 @@ from meshmode.discretization.connection.face import ( make_face_restriction, make_face_to_all_faces_embedding) from meshmode.discretization.connection.opposite_face import \ - make_opposite_face_connection + make_opposite_face_connection, make_partition_connection from meshmode.discretization.connection.refinement import \ make_refinement_connection from meshmode.discretization.connection.chained import \ @@ -55,8 +55,9 @@ __all__ = [ "make_face_restriction", "make_face_to_all_faces_embedding", "make_opposite_face_connection", + "make_partition_connection", "make_refinement_connection", - "flatten_chained_connection" + "flatten_chained_connection", ] __doc__ = """ @@ -73,6 +74,7 @@ __doc__ = """ .. autofunction:: make_face_to_all_faces_embedding .. autofunction:: make_opposite_face_connection +.. autofunction:: make_partition_connection .. autofunction:: make_refinement_connection diff --git a/meshmode/discretization/connection/face.py b/meshmode/discretization/connection/face.py index d2745225ab6726399f41a552022df4101eac7da2..6016a146db7b41400dfc995e79005f9b5e089434 100644 --- a/meshmode/discretization/connection/face.py +++ b/meshmode/discretization/connection/face.py @@ -88,8 +88,7 @@ def _build_boundary_connection(queue, vol_discr, bdry_discr, connection_data, from_group_index=igrp, from_element_indices=cl.array.to_device( queue, - vol_grp.mesh_el_group.element_nr_base - + data.group_source_element_indices) + data.group_source_element_indices) .with_queue(None), to_element_indices=cl.array.to_device( queue, diff --git a/meshmode/discretization/connection/opposite_face.py b/meshmode/discretization/connection/opposite_face.py index 6ce70b2a11b148ce6a1e2cca6f7588ac2549aada..0df3521f8a8632d8275761be5804a9f391f39de1 100644 --- a/meshmode/discretization/connection/opposite_face.py +++ b/meshmode/discretization/connection/opposite_face.py @@ -35,143 +35,83 @@ logger = logging.getLogger(__name__) # {{{ opposite-face connection -def _make_cross_face_batches( - queue, vol_discr, bdry_discr, - i_tgt_grp, i_src_grp, - i_face_tgt, - adj_grp, - vbc_tgt_grp_face_batch, src_grp_el_lookup): +def _make_cross_face_batches(queue, tgt_bdry_discr, src_bdry_discr, + i_tgt_grp, i_src_grp, + tgt_bdry_element_indices, + src_bdry_element_indices): - # {{{ index wrangling + # FIXME: This should view-then-transfer + # (but PyOpenCL doesn't do non-contiguous transfers for now). + tgt_bdry_nodes = (tgt_bdry_discr.groups[i_tgt_grp].view(tgt_bdry_discr.nodes(). + get(queue=queue))[:, tgt_bdry_element_indices]) - # Assert that the adjacency group and the restriction - # interpolation batch and the adjacency group have the same - # element ordering. + # FIXME: This should view-then-transfer + # (but PyOpenCL doesn't do non-contiguous transfers for now). + src_bdry_nodes = (src_bdry_discr.groups[i_src_grp].view(src_bdry_discr.nodes(). + get(queue=queue))[:, src_bdry_element_indices]) - adj_grp_tgt_flags = adj_grp.element_faces == i_face_tgt + tol = 1e4 * np.finfo(tgt_bdry_nodes.dtype).eps - assert ( - np.array_equal( - adj_grp.elements[adj_grp_tgt_flags], - vbc_tgt_grp_face_batch.from_element_indices - .get(queue=queue))) + src_mesh_grp = src_bdry_discr.mesh.groups[i_src_grp] + src_grp = src_bdry_discr.groups[i_src_grp] - # find to_element_indices - - to_bdry_element_indices = ( - vbc_tgt_grp_face_batch.to_element_indices - .get(queue=queue)) - - # find from_element_indices - - from_vol_element_indices = adj_grp.neighbors[adj_grp_tgt_flags] - from_element_faces = adj_grp.neighbor_faces[adj_grp_tgt_flags] - - from_bdry_element_indices = src_grp_el_lookup[ - from_vol_element_indices, from_element_faces] - - # }}} - - # {{{ visualization (for debugging) - - if 0: - print("TVE", adj_grp.elements[adj_grp_tgt_flags]) - print("TBE", to_bdry_element_indices) - print("FVE", from_vol_element_indices) - from meshmode.mesh.visualization import draw_2d_mesh - import matplotlib.pyplot as pt - draw_2d_mesh(vol_discr.mesh, draw_element_numbers=True, - set_bounding_box=True, - draw_vertex_numbers=False, - draw_face_numbers=True, - fill=None) - pt.figure() - - draw_2d_mesh(bdry_discr.mesh, draw_element_numbers=True, - set_bounding_box=True, - draw_vertex_numbers=False, - draw_face_numbers=True, - fill=None) - - pt.show() - # }}} + dim = src_grp.dim + ambient_dim, nelements, ntgt_unit_nodes = tgt_bdry_nodes.shape + assert tgt_bdry_nodes.shape == src_bdry_nodes.shape # {{{ invert face map (using Gauss-Newton) - to_bdry_nodes = ( - # FIXME: This should view-then-transfer (but PyOpenCL doesn't do - # non-contiguous transfers for now). - bdry_discr.groups[i_tgt_grp].view( - bdry_discr.nodes().get(queue=queue)) - [:, to_bdry_element_indices]) - - tol = 1e4 * np.finfo(to_bdry_nodes.dtype).eps - - from_mesh_grp = bdry_discr.mesh.groups[i_src_grp] - from_grp = bdry_discr.groups[i_src_grp] - - dim = from_grp.dim - ambient_dim, nelements, nto_unit_nodes = to_bdry_nodes.shape - - initial_guess = np.mean(from_mesh_grp.vertex_unit_coordinates(), axis=0) - from_unit_nodes = np.empty((dim, nelements, nto_unit_nodes)) - from_unit_nodes[:] = initial_guess.reshape(-1, 1, 1) + initial_guess = np.mean(src_mesh_grp.vertex_unit_coordinates(), axis=0) + src_unit_nodes = np.empty((dim, nelements, ntgt_unit_nodes)) + src_unit_nodes[:] = initial_guess.reshape(-1, 1, 1) import modepy as mp - from_vdm = mp.vandermonde(from_grp.basis(), from_grp.unit_nodes) - from_inv_t_vdm = la.inv(from_vdm.T) - from_nfuncs = len(from_grp.basis()) - - # (ambient_dim, nelements, nfrom_unit_nodes) - from_bdry_nodes = ( - # FIXME: This should view-then-transfer (but PyOpenCL doesn't do - # non-contiguous transfers for now). - bdry_discr.groups[i_src_grp].view( - bdry_discr.nodes().get(queue=queue)) - [:, from_bdry_element_indices]) + vdm = mp.vandermonde(src_grp.basis(), src_grp.unit_nodes) + inv_t_vdm = la.inv(vdm.T) + nsrc_funcs = len(src_grp.basis()) def apply_map(unit_nodes): - # unit_nodes: (dim, nelements, nto_unit_nodes) + # unit_nodes: (dim, nelements, ntgt_unit_nodes) # basis_at_unit_nodes - basis_at_unit_nodes = np.empty((from_nfuncs, nelements, nto_unit_nodes)) + basis_at_unit_nodes = np.empty((nsrc_funcs, nelements, ntgt_unit_nodes)) - for i, f in enumerate(from_grp.basis()): + for i, f in enumerate(src_grp.basis()): basis_at_unit_nodes[i] = ( f(unit_nodes.reshape(dim, -1)) - .reshape(nelements, nto_unit_nodes)) + .reshape(nelements, ntgt_unit_nodes)) - intp_coeffs = np.einsum("fj,jet->fet", from_inv_t_vdm, basis_at_unit_nodes) + intp_coeffs = np.einsum("fj,jet->fet", inv_t_vdm, basis_at_unit_nodes) # If we're interpolating 1, we had better get 1 back. one_deviation = np.abs(np.sum(intp_coeffs, axis=0) - 1) assert (one_deviation < tol).all(), np.max(one_deviation) - return np.einsum("fet,aef->aet", intp_coeffs, from_bdry_nodes) + return np.einsum("fet,aef->aet", intp_coeffs, src_bdry_nodes) def get_map_jacobian(unit_nodes): - # unit_nodes: (dim, nelements, nto_unit_nodes) + # unit_nodes: (dim, nelements, ntgt_unit_nodes) # basis_at_unit_nodes dbasis_at_unit_nodes = np.empty( - (dim, from_nfuncs, nelements, nto_unit_nodes)) + (dim, nsrc_funcs, nelements, ntgt_unit_nodes)) - for i, df in enumerate(from_grp.grad_basis()): + for i, df in enumerate(src_grp.grad_basis()): df_result = df(unit_nodes.reshape(dim, -1)) for rst_axis, df_r in enumerate(df_result): dbasis_at_unit_nodes[rst_axis, i] = ( - df_r.reshape(nelements, nto_unit_nodes)) + df_r.reshape(nelements, ntgt_unit_nodes)) dintp_coeffs = np.einsum( - "fj,rjet->rfet", from_inv_t_vdm, dbasis_at_unit_nodes) + "fj,rjet->rfet", inv_t_vdm, dbasis_at_unit_nodes) - return np.einsum("rfet,aef->raet", dintp_coeffs, from_bdry_nodes) + return np.einsum("rfet,aef->raet", dintp_coeffs, src_bdry_nodes) # {{{ test map applier and jacobian if 0: - u = from_unit_nodes + u = src_unit_nodes f = apply_map(u) for h in [1e-1, 1e-2]: du = h*np.random.randn(*u.shape) @@ -190,16 +130,19 @@ def _make_cross_face_batches( if 0: import matplotlib.pyplot as pt - guess = apply_map(from_unit_nodes) - goals = to_bdry_nodes + guess = apply_map(src_unit_nodes) + goals = tgt_bdry_nodes from meshmode.discretization.visualization import draw_curve - draw_curve(bdry_discr) + pt.figure(0) + draw_curve(tgt_bdry_discr) + pt.figure(1) + draw_curve(src_bdry_discr) + pt.figure(2) pt.plot(guess[0].reshape(-1), guess[1].reshape(-1), "or") pt.plot(goals[0].reshape(-1), goals[1].reshape(-1), "og") - pt.plot(from_bdry_nodes[0].reshape(-1), from_bdry_nodes[1].reshape(-1), "o", - color="purple") + pt.plot(src_bdry_nodes[0].reshape(-1), src_bdry_nodes[1].reshape(-1), "xb") pt.show() # }}} @@ -208,10 +151,10 @@ def _make_cross_face_batches( niter = 0 while True: - resid = apply_map(from_unit_nodes) - to_bdry_nodes + resid = apply_map(src_unit_nodes) - tgt_bdry_nodes - df = get_map_jacobian(from_unit_nodes) - df_inv_resid = np.empty_like(from_unit_nodes) + df = get_map_jacobian(src_unit_nodes) + df_inv_resid = np.empty_like(src_unit_nodes) # For the 1D/2D accelerated versions, we'll use the normal # equations and Cramer's rule. If you're looking for high-end @@ -231,7 +174,7 @@ def _make_cross_face_batches( det = ata[0, 0]*ata[1, 1] - ata[0, 1]*ata[1, 0] - df_inv_resid = np.empty_like(from_unit_nodes) + df_inv_resid = np.empty_like(src_unit_nodes) df_inv_resid[0] = 1/det * (ata[1, 1] * atb[0] - ata[1, 0]*atb[1]) df_inv_resid[1] = 1/det * (-ata[0, 1] * atb[0] + ata[0, 0]*atb[1]) @@ -243,11 +186,24 @@ def _make_cross_face_batches( # But we'll only hit it for boundaries of 4+D meshes, in which # case... good luck. :) for e in range(nelements): - for t in range(nto_unit_nodes): + for t in range(ntgt_unit_nodes): df_inv_resid[:, e, t], _, _, _ = \ la.lstsq(df[:, :, e, t].T, resid[:, e, t]) - from_unit_nodes = from_unit_nodes - df_inv_resid + src_unit_nodes = src_unit_nodes - df_inv_resid + + # {{{ visualize next guess + + if 0: + import matplotlib.pyplot as pt + guess = apply_map(src_unit_nodes) + goals = tgt_bdry_nodes + + pt.plot(guess[0].reshape(-1), guess[1].reshape(-1), "rx") + pt.plot(goals[0].reshape(-1), goals[1].reshape(-1), "go") + pt.show() + + # }}} max_resid = np.max(np.abs(resid)) logger.debug("gauss-newton residual: %g" % max_resid) @@ -264,7 +220,7 @@ def _make_cross_face_batches( # }}} - # {{{ find groups of from_unit_nodes + # {{{ find groups of src_unit_nodes def to_dev(ary): return cl.array.to_device(queue, ary, array_queue=None) @@ -275,10 +231,10 @@ def _make_cross_face_batches( if not len(todo_elements): return - template_unit_nodes = from_unit_nodes[:, todo_elements[0], :] + template_unit_nodes = src_unit_nodes[:, todo_elements[0], :] unit_node_dist = np.max(np.max(np.abs( - from_unit_nodes[:, todo_elements, :] + src_unit_nodes[:, todo_elements, :] - template_unit_nodes.reshape(dim, 1, -1)), axis=2), axis=0) @@ -287,7 +243,7 @@ def _make_cross_face_batches( done_elements[close_els] = True unit_node_dist = np.max(np.max(np.abs( - from_unit_nodes[:, todo_elements, :] + src_unit_nodes[:, todo_elements, :] - template_unit_nodes.reshape(dim, 1, -1)), axis=2), axis=0) @@ -295,8 +251,8 @@ def _make_cross_face_batches( from meshmode.discretization.connection import InterpolationBatch yield InterpolationBatch( from_group_index=i_src_grp, - from_element_indices=to_dev(from_bdry_element_indices[close_els]), - to_element_indices=to_dev(to_bdry_element_indices[close_els]), + from_element_indices=to_dev(src_bdry_element_indices[close_els]), + to_element_indices=to_dev(tgt_bdry_element_indices[close_els]), result_unit_nodes=template_unit_nodes, to_element_face=None) @@ -316,7 +272,11 @@ def _find_ibatch_for_face(vbc_tgt_grp_batches, iface): return vbc_tgt_grp_face_batch -def _make_el_lookup_table(queue, connection, igrp): +def _make_bdry_el_lookup_table(queue, connection, igrp): + """Given a volume-to-boundary connection as *connection*, return + a table of shape ``(from_nelements, nfaces)`` to look up the + element number of the boundary element for that face. + """ from_nelements = connection.from_discr.groups[igrp].nelements from_nfaces = connection.from_discr.mesh.groups[igrp].nfaces @@ -360,25 +320,77 @@ def make_opposite_face_connection(volume_to_bdry_conn): groups = [[] for i_tgt_grp in range(ngrps)] for i_src_grp in range(ngrps): - src_grp_el_lookup = _make_el_lookup_table( + src_grp_el_lookup = _make_bdry_el_lookup_table( queue, volume_to_bdry_conn, i_src_grp) for i_tgt_grp in range(ngrps): vbc_tgt_grp_batches = volume_to_bdry_conn.groups[i_tgt_grp].batches - adj_grp = vol_mesh.facial_adjacency_groups[i_tgt_grp][i_src_grp] + adj = vol_mesh.facial_adjacency_groups[i_tgt_grp][i_src_grp] for i_face_tgt in range(vol_mesh.groups[i_tgt_grp].nfaces): vbc_tgt_grp_face_batch = _find_ibatch_for_face( vbc_tgt_grp_batches, i_face_tgt) - groups[i_tgt_grp].extend( - _make_cross_face_batches( - queue, vol_discr, bdry_discr, + # {{{ index wrangling + + # Assert that the adjacency group and the restriction + # interpolation batch and the adjacency group have the same + # element ordering. + + adj_tgt_flags = adj.element_faces == i_face_tgt + + assert (np.array_equal( + adj.elements[adj_tgt_flags], + vbc_tgt_grp_face_batch.from_element_indices + .get(queue=queue))) + + # find to_element_indices + + tgt_bdry_element_indices = ( + vbc_tgt_grp_face_batch.to_element_indices + .get(queue=queue)) + + # find from_element_indices + + src_vol_element_indices = adj.neighbors[adj_tgt_flags] + src_element_faces = adj.neighbor_faces[adj_tgt_flags] + + src_bdry_element_indices = src_grp_el_lookup[ + src_vol_element_indices, src_element_faces] + + # }}} + + # {{{ visualization (for debugging) + + if 0: + print("TVE", adj.elements[adj_tgt_flags]) + print("TBE", tgt_bdry_element_indices) + print("FVE", src_vol_element_indices) + from meshmode.mesh.visualization import draw_2d_mesh + import matplotlib.pyplot as pt + draw_2d_mesh(vol_discr.mesh, draw_element_numbers=True, + set_bounding_box=True, + draw_vertex_numbers=False, + draw_face_numbers=True, + fill=None) + pt.figure() + + draw_2d_mesh(bdry_discr.mesh, draw_element_numbers=True, + set_bounding_box=True, + draw_vertex_numbers=False, + draw_face_numbers=True, + fill=None) + + pt.show() + + # }}} + + groups[i_tgt_grp].extend(_make_cross_face_batches(queue, + bdry_discr, bdry_discr, i_tgt_grp, i_src_grp, - i_face_tgt, - adj_grp, - vbc_tgt_grp_face_batch, src_grp_el_lookup)) + tgt_bdry_element_indices, + src_bdry_element_indices)) from meshmode.discretization.connection import ( DirectDiscretizationConnection, DiscretizationConnectionElementGroup) @@ -392,4 +404,100 @@ def make_opposite_face_connection(volume_to_bdry_conn): # }}} + +# {{{ partition_connection + +def make_partition_connection(local_bdry_conn, i_local_part, + remote_bdry, remote_adj_groups, + remote_from_elem_faces, remote_from_elem_indices): + """ + Connects ``local_bdry_conn`` to a neighboring partition. + + :arg local_bdry_conn: A :class:`DiscretizationConnection` of the local + partition. + :arg i_local_part: The partition number of the local partition. + :arg remote_adj_groups: A list of :class:`InterPartitionAdjacency`` of the + remote partition. + :arg remote_bdry: A :class:`Discretization` of the boundary of the + remote partition. + :arg remote_from_elem_faces: `remote_from_elem_faces[igrp][idx]` gives the face + that batch `idx` interpolates from in group `igrp`. + :arg remote_from_elem_indices: `remote_from_elem_indices[igrp][idx]` gives a + :class:`np.array` of element indices that batch `idx` interpolates from + in group `igrp`. + + :returns: A :class:`DirectDiscretizationConnection` that performs data + exchange across faces from the remote partition to partition `i_local_part`. + + .. versionadded:: 2017.1 + + .. warning:: Interface is not final. + """ + + from meshmode.mesh.processing import find_group_indices + from meshmode.discretization.connection import ( + DirectDiscretizationConnection, DiscretizationConnectionElementGroup) + + local_bdry = local_bdry_conn.to_discr + local_groups = local_bdry_conn.from_discr.mesh.groups + + part_batches = [[] for _ in local_groups] + + with cl.CommandQueue(local_bdry_conn.cl_context) as queue: + + for i_remote_grp, adj in enumerate(remote_adj_groups): + indices = (i_local_part == adj.neighbor_partitions) + if not np.any(indices): + # Skip because i_remote_grp is not connected to i_local_part. + continue + i_remote_faces = adj.element_faces[indices] + i_local_meshwide_elems = adj.global_neighbors[indices] + i_local_faces = adj.neighbor_faces[indices] + + i_local_grps = find_group_indices(local_groups, i_local_meshwide_elems) + + for i_local_grp in np.unique(i_local_grps): + + elem_base = local_groups[i_local_grp].element_nr_base + local_el_lookup = _make_bdry_el_lookup_table(queue, + local_bdry_conn, + i_local_grp) + + for i_remote_face in i_remote_faces: + + index_flags = np.logical_and(i_local_grps == i_local_grp, + i_remote_faces == i_remote_face) + if not np.any(index_flags): + continue + + remote_bdry_indices = None + for idxs, face in zip(remote_from_elem_indices[i_remote_grp], + remote_from_elem_faces[i_remote_grp]): + if face == i_remote_face: + remote_bdry_indices = idxs + break + assert remote_bdry_indices is not None + + elems = i_local_meshwide_elems[index_flags] - elem_base + faces = i_local_faces[index_flags] + local_bdry_indices = local_el_lookup[elems, faces] + + batches = _make_cross_face_batches(queue, + local_bdry, remote_bdry, + i_local_grp, i_remote_grp, + local_bdry_indices, + remote_bdry_indices) + + part_batches[i_local_grp].extend(batches) + + return DirectDiscretizationConnection( + from_discr=remote_bdry, + to_discr=local_bdry, + groups=[DiscretizationConnectionElementGroup(batches=grp_batches) + for grp_batches in part_batches], + is_surjective=True) + +# }}} + + # vim: foldmethod=marker diff --git a/meshmode/distributed.py b/meshmode/distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..529190b55e091b432298f442697e02c9b2786ddc --- /dev/null +++ b/meshmode/distributed.py @@ -0,0 +1,224 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = """ +Copyright (C) 2017 Ellis Hoag +Copyright (C) 2017 Andreas Kloeckner +""" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import numpy as np # noqa + +# This file needs to be importable without mpi4py. So don't be tempted to add +# that import here--push it into individual functions instead. + +import logging +logger = logging.getLogger(__name__) + +TAG_BASE = 83411 +TAG_DISTRIBUTE_MESHES = TAG_BASE + 1 +TAG_SEND_BOUNDARY = TAG_BASE + 2 + +__doc__ = """ +.. autoclass:: MPIMeshDistributor +.. autoclass:: MPIBoundaryTransceiver +""" + + +# {{{ mesh distributor + +class MPIMeshDistributor(object): + """ + .. automethod:: is_mananger_rank + .. automethod:: send_mesh_parts + .. automethod:: recv_mesh_part + """ + def __init__(self, mpi_comm, manager_rank=0): + """ + :arg mpi_comm: A :class:`MPI.Intracomm` + """ + self.mpi_comm = mpi_comm + self.manager_rank = manager_rank + + def is_mananger_rank(self): + return self.mpi_comm.Get_rank() == self.manager_rank + + def send_mesh_parts(self, mesh, part_per_element, num_parts): + """ + :arg mesh: A :class:`Mesh` to distribute to other ranks. + :arg part_per_element: A :class:`numpy.ndarray` containing one + integer per element of *mesh* indicating which part of the + partitioned mesh the element is to become a part of. + :arg num_parts: The number of partitions to divide the mesh into. + + Sends each partition to a different rank. + Returns one partition that was not sent to any other rank. + """ + mpi_comm = self.mpi_comm + rank = mpi_comm.Get_rank() + assert num_parts <= mpi_comm.Get_size() + + assert self.is_mananger_rank() + + from meshmode.mesh.processing import partition_mesh + parts = [partition_mesh(mesh, part_per_element, i)[0] + for i in range(num_parts)] + + local_part = None + + reqs = [] + for r, part in enumerate(parts): + if r == self.manager_rank: + local_part = part + else: + reqs.append(mpi_comm.isend(part, dest=r, tag=TAG_DISTRIBUTE_MESHES)) + + logger.info('rank %d: sent all mesh partitions', rank) + for req in reqs: + req.wait() + + return local_part + + def receive_mesh_part(self): + """ + Returns the mesh sent by the manager rank. + """ + mpi_comm = self.mpi_comm + rank = mpi_comm.Get_rank() + + assert not self.is_mananger_rank(), "Manager rank cannot receive mesh" + + from mpi4py import MPI + status = MPI.Status() + result = self.mpi_comm.recv( + source=self.manager_rank, tag=TAG_DISTRIBUTE_MESHES, + status=status) + logger.info('rank %d: received local mesh (size = %d)', rank, status.count) + + return result + +# }}} + + +# {{{ boundary communication setup helper + +class MPIBoundaryCommSetupHelper(object): + """ + .. automethod:: __call__ + .. automethod:: is_ready + """ + def __init__(self, mpi_comm, queue, local_bdry_conn, i_remote_part, + bdry_grp_factory): + """ + :arg mpi_comm: A :class:`MPI.Intracomm` + :arg queue: + :arg i_remote_part: The part number of the remote partition + """ + self.mpi_comm = mpi_comm + self.queue = queue + self.i_local_part = mpi_comm.Get_rank() + self.i_remote_part = i_remote_part + self.local_bdry_conn = local_bdry_conn + self.bdry_grp_factory = bdry_grp_factory + + def _post_send_boundary_data(self): + local_bdry = self.local_bdry_conn.to_discr + local_mesh = self.local_bdry_conn.from_discr.mesh + local_adj_groups = [local_mesh.facial_adjacency_groups[i][None] + for i in range(len(local_mesh.groups))] + local_batches = [self.local_bdry_conn.groups[i].batches + for i in range(len(local_mesh.groups))] + local_to_elem_faces = [[batch.to_element_face for batch in grp_batches] + for grp_batches in local_batches] + local_to_elem_indices = [[batch.to_element_indices.get(queue=self.queue) + for batch in grp_batches] + for grp_batches in local_batches] + + local_data = {'bdry_mesh': local_bdry.mesh, + 'adj': local_adj_groups, + 'to_elem_faces': local_to_elem_faces, + 'to_elem_indices': local_to_elem_indices} + return self.mpi_comm.isend(local_data, + dest=self.i_remote_part, + tag=TAG_SEND_BOUNDARY) + + def post_sends(self): + logger.info("bdry comm rank %d send begin", self.i_local_part) + self.send_req = self._post_send_boundary_data() + + def is_setup_ready(self): + """ + Returns True if the rank boundary data is ready to be received. + """ + return self.mpi_comm.Iprobe(source=self.i_remote_part, tag=TAG_SEND_BOUNDARY) + + def complete_setup(self): + """ + Returns the tuple ``remote_to_local_bdry_conn`` + where `remote_to_local_bdry_conn` is a + :class:`DirectDiscretizationConnection` that gives the connection that + performs data exchange across faces from partition `i_remote_part` to the + local mesh. + """ + remote_data = self.mpi_comm.recv( + source=self.i_remote_part, + tag=TAG_SEND_BOUNDARY) + + logger.debug('rank %d: Received rank %d data', + self.i_local_part, self.i_remote_part) + + from meshmode.discretization import Discretization + remote_bdry_mesh = remote_data['bdry_mesh'] + remote_bdry = Discretization(self.queue.context, remote_bdry_mesh, + self.bdry_grp_factory) + remote_adj_groups = remote_data['adj'] + remote_to_elem_faces = remote_data['to_elem_faces'] + remote_to_elem_indices = remote_data['to_elem_indices'] + + # Connect local_mesh to remote_mesh + from meshmode.discretization.connection import make_partition_connection + remote_to_local_bdry_conn = make_partition_connection(self.local_bdry_conn, + self.i_local_part, + remote_bdry, + remote_adj_groups, + remote_to_elem_faces, + remote_to_elem_indices) + self.send_req.wait() + return remote_to_local_bdry_conn + +# }}} + + +def get_connected_partitions(mesh): + """ + :arg mesh: A :class:`Mesh` + Returns the set of partition numbers that are connected to `mesh` + """ + connected_parts = set() + from meshmode.mesh import InterPartitionAdjacencyGroup + for adj in mesh.facial_adjacency_groups: + if isinstance(adj[None], InterPartitionAdjacencyGroup): + indices = (adj[None].neighbor_partitions >= 0) + connected_parts = connected_parts.union( + adj[None].neighbor_partitions[indices]) + return connected_parts + +# vim: foldmethod=marker diff --git a/meshmode/mesh/__init__.py b/meshmode/mesh/__init__.py index 9e374dfb765db8bdbfa579c886a6638a75fb5bc4..8ff2981e9a50e89c409654f81d36d2e6d8de1d13 100644 --- a/meshmode/mesh/__init__.py +++ b/meshmode/mesh/__init__.py @@ -54,6 +54,7 @@ Predefined Boundary tags .. autoclass:: BTAG_ALL .. autoclass:: BTAG_REALLY_ALL .. autoclass:: BTAG_NO_BOUNDARY +.. autoclass:: BTAG_PARTITION """ @@ -88,7 +89,34 @@ class BTAG_NO_BOUNDARY(object): # noqa pass -SYSTEM_TAGS = set([BTAG_NONE, BTAG_ALL, BTAG_REALLY_ALL, BTAG_NO_BOUNDARY]) +class BTAG_PARTITION(object): # noqa + """ + A boundary tag indicating that this edge is adjacent to an element of + another :class:`Mesh`. The partition number of the adjacent mesh + is given by ``part_nr``. + + .. attribute:: part_nr + + .. versionadded:: 2017.1 + """ + def __init__(self, part_nr): + self.part_nr = int(part_nr) + + def __hash__(self): + return hash((type(self), self.part_nr)) + + def __eq__(self, other): + if isinstance(other, BTAG_PARTITION): + return self.part_nr == other.part_nr + else: + return False + + def __ne__(self, other): + return not self.__eq__(other) + + +SYSTEM_TAGS = set([BTAG_NONE, BTAG_ALL, BTAG_REALLY_ALL, BTAG_NO_BOUNDARY, + BTAG_PARTITION]) # }}} @@ -458,6 +486,82 @@ class FacialAdjacencyGroup(Record): # }}} +# {{{ partition adjacency + +class InterPartitionAdjacencyGroup(FacialAdjacencyGroup): + """ + Describes boundary adjacency information of elements in + :class:`MeshElementGroup`. + + .. attribute:: igroup + + The group number of this group. + + .. attribute:: ineighbor_group + + *None* for boundary faces. + + .. attribute:: elements + + Group-local element numbers. + Element ``element_id_dtype elements[i]`` and face + ``face_id_dtype element_faces[i]`` is connected to neighbor element + ``element_id_dtype global_neighbors[i]`` with face + ``face_id_dtype global_neighbor_faces[i]``. The partition number it connects + to is ``neighbor_partitions[i]``. + + .. attribute:: element_faces + + ``face_id_dtype element_faces[i]`` gives the face of + ``element_id_dtype elements[i]`` that is connected to + ``global_neighbors[i]``. + + .. attribute:: neighbors + + Since this is a boundary, ``element_id_dtype neighbors[i]`` is interpreted + as a boundary tag. ``-neighbors[i]`` should be interpreted according to + :class:``Mesh.boundary_tags``. + + .. attribute:: global_neighbors + + Mesh-wide element numbers. + ``element_id_dtype global_neighbors[i]`` gives the element number within the + neighboring partition of the element connected to + ``element_id_dtype elements[i]``. Use ``find_group_instances()`` to find the + group that the element belongs to, then subtract ``element_nr_base`` to find + the element of the group. + + If ``global_neighbors[i]`` is negative, ``elements[i]`` is on a true + boundary and is not connected to any other :class:``Mesh``. + + .. attribute:: neighbor_faces + + ``face_id_dtype global_neighbor_faces[i]`` gives face index within the + neighboring partition of the face connected to + ``element_id_dtype elements[i]`` + + If ``neighbor_partitions[i]`` is negative, ``elements[i]`` is on a true + boundary and is not connected to any other :class:``Mesh``. + + .. attribute:: neighbor_partitions + + ``neighbor_partitions[i]`` gives the partition number that ``elements[i]`` + is connected to. + + If ``neighbor_partitions[i]`` is negative, ``elements[i]`` is on a true + boundary and is not connected to any other :class:``Mesh``. + + .. versionadded:: 2017.1 + """ + + def __eq__(self, other): + return (super.__eq__(self, other) + and np.array_equal(self.global_neighbors, other.global_neighbors) + and np.array_equal(self.neighbor_partitions, other.neighbor_partitions)) + +# }}} + + # {{{ mesh class Mesh(Record): @@ -487,7 +591,8 @@ class Mesh(Record): the set of facial adjacency relations between group *igrp* and *ineighbor_group*. *ineighbor_group* and *igrp* may be identical, or *ineighbor_group* may be *None*, in which case - a group containing boundary faces is returned. + an :class:``InterPartitionAdjacency`` group containing boundary + faces is returned. Referencing this attribute may raise :exc:`meshmode.DataUnavailable`. @@ -535,6 +640,7 @@ class Mesh(Record): .. automethod:: __eq__ .. automethod:: __ne__ + .. automethos:: adjacency_list """ face_id_dtype = np.int8 @@ -578,6 +684,7 @@ class Mesh(Record): will result in exceptions. Lastly, a data structure as described in :attr:`facial_adjacency_groups` may be passed. """ + el_nr = 0 node_nr = 0 diff --git a/meshmode/mesh/processing.py b/meshmode/mesh/processing.py index 36c38eda9d6704f5f59a6e589578d019d7485b88..ea72facf2fea4718d496d2e617a9aff2f6a9cb32 100644 --- a/meshmode/mesh/processing.py +++ b/meshmode/mesh/processing.py @@ -31,6 +31,7 @@ import modepy as mp __doc__ = """ +.. autofunction:: find_group_indices .. autofunction:: partition_mesh .. autofunction:: find_volume_mesh_element_orientations .. autofunction:: perform_flips @@ -41,15 +42,32 @@ __doc__ = """ """ +def find_group_indices(groups, meshwide_elems): + """ + :arg groups: A list of :class:``MeshElementGroup`` instances that contain + ``meshwide_elems``. + :arg meshwide_elems: A :class:``numpy.ndarray`` of mesh-wide element numbers + Usually computed by ``elem + element_nr_base``. + :returns: A :class:``numpy.ndarray`` of group numbers that ``meshwide_elem`` + belongs to. + """ + grps = np.zeros_like(meshwide_elems) + next_grp_boundary = 0 + for igrp, grp in enumerate(groups): + next_grp_boundary += grp.nelements + grps += meshwide_elems >= next_grp_boundary + return grps + + # {{{ partition_mesh -def partition_mesh(mesh, part_per_element, part_nr): +def partition_mesh(mesh, part_per_element, part_num): """ :arg mesh: A :class:`meshmode.mesh.Mesh` to be partitioned. :arg part_per_element: A :class:`numpy.ndarray` containing one integer per element of *mesh* indicating which part of the partitioned mesh the element is to become a part of. - :arg part_nr: The part number of the mesh to return. + :arg part_num: The part number of the mesh to return. :returns: A tuple ``(part_mesh, part_to_global)``, where *part_mesh* is a :class:`meshmode.mesh.Mesh` that is a partition of mesh, and @@ -57,15 +75,12 @@ def partition_mesh(mesh, part_per_element, part_nr): numbers on *part_mesh* to ones in *mesh*. .. versionadded:: 2017.1 - - .. warning:: Interface is not final. Connectivity between elements - across groups needs to be added. """ assert len(part_per_element) == mesh.nelements, ( "part_per_element must have shape (mesh.nelements,)") # Contains the indices of the elements requested. - queried_elems = np.where(np.array(part_per_element) == part_nr)[0] + queried_elems = np.where(np.array(part_per_element) == part_num)[0] num_groups = len(mesh.groups) new_indices = [] @@ -80,10 +95,10 @@ def partition_mesh(mesh, part_per_element, part_nr): skip_groups = [] num_prev_elems = 0 start_idx = 0 - for group_nr in range(num_groups): - mesh_group = mesh.groups[group_nr] + for group_num in range(num_groups): + mesh_group = mesh.groups[group_num] - # Find the index of first element in the next group + # Find the index of first element in the next group. end_idx = len(queried_elems) for idx in range(start_idx, len(queried_elems)): if queried_elems[idx] - num_prev_elems >= mesh_group.nelements: @@ -91,7 +106,7 @@ def partition_mesh(mesh, part_per_element, part_nr): break if start_idx == end_idx: - skip_groups.append(group_nr) + skip_groups.append(group_num) new_indices.append(np.array([])) new_nodes.append(np.array([])) num_prev_elems += mesh_group.nelements @@ -107,10 +122,10 @@ def partition_mesh(mesh, part_per_element, part_nr): for j in range(start_idx, end_idx): elems = queried_elems[j] - num_prev_elems new_idx = j - start_idx - new_nodes[group_nr][i, new_idx, :] = mesh_group.nodes[i, elems, :] + new_nodes[group_num][i, new_idx, :] = mesh_group.nodes[i, elems, :] - #index_set = np.append(index_set, new_indices[group_nr].ravel()) - index_sets = np.append(index_sets, set(new_indices[group_nr].ravel())) + #index_set = np.append(index_set, new_indices[group_num].ravel()) + index_sets = np.append(index_sets, set(new_indices[group_num].ravel())) num_prev_elems += mesh_group.nelements start_idx = end_idx @@ -124,31 +139,126 @@ def partition_mesh(mesh, part_per_element, part_nr): new_vertices[dim] = mesh.vertices[dim][required_indices] # Our indices need to be in range [0, len(mesh.nelements)]. - for group_nr in range(num_groups): - if group_nr not in skip_groups: - for i in range(len(new_indices[group_nr])): - for j in range(len(new_indices[group_nr][0])): - original_index = new_indices[group_nr][i, j] - new_indices[group_nr][i, j] = np.where( - required_indices == original_index)[0] + for group_num in range(num_groups): + if group_num not in skip_groups: + for i in range(len(new_indices[group_num])): + for j in range(len(new_indices[group_num][0])): + original_index = new_indices[group_num][i, j] + new_indices[group_num][i, j] = np.where( + required_indices == original_index)[0] new_mesh_groups = [] - for group_nr in range(num_groups): - if group_nr not in skip_groups: - mesh_group = mesh.groups[group_nr] + for group_num, mesh_group in enumerate(mesh.groups): + if group_num not in skip_groups: new_mesh_groups.append( type(mesh_group)( - mesh_group.order, new_indices[group_nr], - new_nodes[group_nr], + mesh_group.order, new_indices[group_num], + new_nodes[group_num], unit_nodes=mesh_group.unit_nodes)) + from meshmode.mesh import BTAG_ALL, BTAG_PARTITION + boundary_tags = [BTAG_PARTITION(n) for n in np.unique(part_per_element)] + from meshmode.mesh import Mesh part_mesh = Mesh( new_vertices, new_mesh_groups, + facial_adjacency_groups=None, + boundary_tags=boundary_tags, is_conforming=mesh.is_conforming) - return part_mesh, queried_elems + adj_data = [[] for _ in range(len(part_mesh.groups))] + + for igrp, grp in enumerate(part_mesh.groups): + elem_base = grp.element_nr_base + boundary_adj = part_mesh.facial_adjacency_groups[igrp][None] + boundary_elems = boundary_adj.elements + boundary_faces = boundary_adj.element_faces + p_meshwide_elems = queried_elems[boundary_elems + elem_base] + parent_igrps = find_group_indices(mesh.groups, p_meshwide_elems) + for adj_idx, elem in enumerate(boundary_elems): + face = boundary_faces[adj_idx] + tag = -boundary_adj.neighbors[adj_idx] + assert tag >= 0, "Expected boundary tag in adjacency group." + + parent_igrp = parent_igrps[adj_idx] + parent_elem_base = mesh.groups[parent_igrp].element_nr_base + parent_elem = p_meshwide_elems[adj_idx] - parent_elem_base + + parent_adj = mesh.facial_adjacency_groups[parent_igrp] + + for parent_facial_group in parent_adj.values(): + indices, = np.nonzero(parent_facial_group.elements == parent_elem) + for idx in indices: + if (parent_facial_group.neighbors[idx] >= 0 and + parent_facial_group.element_faces[idx] == face): + rank_neighbor = (parent_facial_group.neighbors[idx] + + parent_elem_base) + n_face = parent_facial_group.neighbor_faces[idx] + + n_part_num = part_per_element[rank_neighbor] + tag = tag & ~part_mesh.boundary_tag_bit(BTAG_ALL) + tag = tag | part_mesh.boundary_tag_bit( + BTAG_PARTITION(n_part_num)) + boundary_adj.neighbors[adj_idx] = -tag + + # Find the neighbor element from the other partition. + n_meshwide_elem = np.count_nonzero( + part_per_element[:rank_neighbor] == n_part_num) + + adj_data[igrp].append((elem, face, + n_part_num, n_meshwide_elem, n_face)) + + connected_mesh = part_mesh.copy() + + from meshmode.mesh import InterPartitionAdjacencyGroup + for igrp, adj in enumerate(adj_data): + if adj: + bdry = connected_mesh.facial_adjacency_groups[igrp][None] + # Initialize connections + n_parts = np.zeros_like(bdry.elements) + n_parts.fill(-1) + global_n_elems = np.copy(n_parts) + n_faces = np.copy(n_parts) + + # Sort both sets of elements so that we can quickly merge + # the two data structures + bdry_perm = np.lexsort([bdry.element_faces, bdry.elements]) + elems = bdry.elements[bdry_perm] + faces = bdry.element_faces[bdry_perm] + neighbors = bdry.neighbors[bdry_perm] + adj_elems, adj_faces, adj_n_parts, adj_gl_n_elems, adj_n_faces =\ + np.array(adj).T + adj_perm = np.lexsort([adj_faces, adj_elems]) + adj_elems = adj_elems[adj_perm] + adj_faces = adj_faces[adj_perm] + adj_n_parts = adj_n_parts[adj_perm] + adj_gl_n_elems = adj_gl_n_elems[adj_perm] + adj_n_faces = adj_n_faces[adj_perm] + + # Merge interpartition adjacency data with FacialAdjacencyGroup + adj_idx = 0 + for bdry_idx in range(len(elems)): + if adj_idx >= len(adj_elems): + break + if (adj_elems[adj_idx] == elems[bdry_idx] + and adj_faces[adj_idx] == faces[bdry_idx]): + n_parts[bdry_idx] = adj_n_parts[adj_idx] + global_n_elems[bdry_idx] = adj_gl_n_elems[adj_idx] + n_faces[bdry_idx] = adj_n_faces[adj_idx] + adj_idx += 1 + + connected_mesh.facial_adjacency_groups[igrp][None] =\ + InterPartitionAdjacencyGroup(elements=elems, + element_faces=faces, + neighbors=neighbors, + igroup=bdry.igroup, + ineighbor_group=None, + neighbor_partitions=n_parts, + global_neighbors=global_n_elems, + neighbor_faces=n_faces) + + return connected_mesh, queried_elems # }}} @@ -391,6 +501,8 @@ def merge_disjoint_meshes(meshes, skip_tests=False, single_group=False): grp_cls = None order = None unit_nodes = None + nodal_adjacency = None + facial_adjacency_groups = None for mesh in meshes: if mesh._nodal_adjacency is not None: @@ -426,6 +538,8 @@ def merge_disjoint_meshes(meshes, skip_tests=False, single_group=False): else: new_groups = [] + nodal_adjacency = None + facial_adjacency_groups = None for mesh, vert_base in zip(meshes, vert_bases): if mesh._nodal_adjacency is not None: diff --git a/test/test_meshmode.py b/test/test_meshmode.py index e97d8efa036f378272b7f94588f1e867265d9a35..bd775dfc8a682455790df02b9327f54cffe36535 100644 --- a/test/test_meshmode.py +++ b/test/test_meshmode.py @@ -49,56 +49,6 @@ import logging logger = logging.getLogger(__name__) -# {{{ partition_mesh - -def test_partition_torus_mesh(): - from meshmode.mesh.generation import generate_torus - my_mesh = generate_torus(2, 1, n_outer=2, n_inner=2) - - part_per_element = np.array([0, 1, 2, 1, 1, 2, 1, 0]) - - from meshmode.mesh.processing import partition_mesh - (part_mesh0, _) = partition_mesh(my_mesh, part_per_element, 0) - (part_mesh1, _) = partition_mesh(my_mesh, part_per_element, 1) - (part_mesh2, _) = partition_mesh(my_mesh, part_per_element, 2) - - assert part_mesh0.nelements == 2 - assert part_mesh1.nelements == 4 - assert part_mesh2.nelements == 2 - - -def test_partition_boxes_mesh(): - pymetis = pytest.importorskip('pymetis') - - n = 5 - num_parts = 7 - from meshmode.mesh.generation import generate_regular_rect_mesh - mesh1 = generate_regular_rect_mesh(a=(0, 0, 0), b=(1, 1, 1), n=(n, n, n)) - mesh2 = generate_regular_rect_mesh(a=(2, 2, 2), b=(3, 3, 3), n=(n, n, n)) - - from meshmode.mesh.processing import merge_disjoint_meshes - mesh = merge_disjoint_meshes([mesh1, mesh2]) - - adjacency_list = np.zeros((mesh.nelements,), dtype=set) - for elem in range(mesh.nelements): - adjacency_list[elem] = set() - starts = mesh.nodal_adjacency.neighbors_starts - for n in range(starts[elem], starts[elem + 1]): - adjacency_list[elem].add(mesh.nodal_adjacency.neighbors[n]) - - (_, p) = pymetis.part_graph(num_parts, adjacency=adjacency_list) - part_per_element = np.array(p) - - from meshmode.mesh.processing import partition_mesh - new_meshes = [ - partition_mesh(mesh, part_per_element, i)[0] for i in range(num_parts)] - - assert mesh.nelements == np.sum( - [new_meshes[i].nelements for i in range(num_parts)]) - -# }}} - - # {{{ circle mesh def test_circle_mesh(do_plot=False): @@ -143,9 +93,9 @@ def test_circle_mesh(do_plot=False): ("warp", 3, [10, 20, 30]), ]) @pytest.mark.parametrize("per_face_groups", [False, True]) -def test_boundary_interpolation(ctx_getter, group_factory, boundary_tag, +def test_boundary_interpolation(ctx_factory, group_factory, boundary_tag, mesh_name, dim, mesh_pars, per_face_groups): - cl_ctx = ctx_getter() + cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) from meshmode.discretization import Discretization @@ -231,9 +181,9 @@ def test_boundary_interpolation(ctx_getter, group_factory, boundary_tag, ("warp", 3, [10, 20, 30]), ]) @pytest.mark.parametrize("per_face_groups", [False, True]) -def test_all_faces_interpolation(ctx_getter, mesh_name, dim, mesh_pars, +def test_all_faces_interpolation(ctx_factory, mesh_name, dim, mesh_pars, per_face_groups): - cl_ctx = ctx_getter() + cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) from meshmode.discretization import Discretization @@ -343,11 +293,11 @@ def test_all_faces_interpolation(ctx_getter, mesh_name, dim, mesh_pars, ("warp", 2, [3, 5, 7]), ("warp", 3, [3, 5]), ]) -def test_opposite_face_interpolation(ctx_getter, group_factory, +def test_opposite_face_interpolation(ctx_factory, group_factory, mesh_name, dim, mesh_pars): logging.basicConfig(level=logging.INFO) - cl_ctx = ctx_getter() + cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) from meshmode.discretization import Discretization @@ -459,12 +409,12 @@ def test_element_orientation(): ("ball", lambda: mgen.generate_icosahedron(1, 1)), ("torus", lambda: mgen.generate_torus(5, 1)), ]) -def test_3d_orientation(ctx_getter, what, mesh_gen_func, visualize=False): +def test_3d_orientation(ctx_factory, what, mesh_gen_func, visualize=False): pytest.importorskip("pytential") logging.basicConfig(level=logging.INFO) - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) mesh = mesh_gen_func() @@ -514,7 +464,7 @@ def test_3d_orientation(ctx_getter, what, mesh_gen_func, visualize=False): # {{{ merge and map -def test_merge_and_map(ctx_getter, visualize=False): +def test_merge_and_map(ctx_factory, visualize=False): from meshmode.mesh.io import generate_gmsh, FileSource from meshmode.mesh.generation import generate_box_mesh from meshmode.mesh import TensorProductElementGroup @@ -555,7 +505,7 @@ def test_merge_and_map(ctx_getter, visualize=False): if visualize: from meshmode.discretization import Discretization - cl_ctx = ctx_getter() + cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) discr = Discretization(cl_ctx, mesh3, discr_grp_factory) @@ -571,10 +521,10 @@ def test_merge_and_map(ctx_getter, visualize=False): @pytest.mark.parametrize("dim", [2, 3]) @pytest.mark.parametrize("order", [1, 3]) -def test_sanity_single_element(ctx_getter, dim, order, visualize=False): +def test_sanity_single_element(ctx_factory, dim, order, visualize=False): pytest.importorskip("pytential") - cl_ctx = ctx_getter() + cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) from modepy.tools import unit_vertices @@ -658,12 +608,12 @@ def test_sanity_single_element(ctx_getter, dim, order, visualize=False): @pytest.mark.parametrize("dim", [2, 3, 4]) @pytest.mark.parametrize("order", [3]) -def test_sanity_qhull_nd(ctx_getter, dim, order): +def test_sanity_qhull_nd(ctx_factory, dim, order): pytest.importorskip("scipy") logging.basicConfig(level=logging.INFO) - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from scipy.spatial import Delaunay @@ -712,13 +662,13 @@ def test_sanity_qhull_nd(ctx_getter, dim, order): ("ball-radius-1.step", 3), ]) @pytest.mark.parametrize("mesh_order", [1, 2]) -def test_sanity_balls(ctx_getter, src_file, dim, mesh_order, +def test_sanity_balls(ctx_factory, src_file, dim, mesh_order, visualize=False): pytest.importorskip("pytential") logging.basicConfig(level=logging.INFO) - ctx = ctx_getter() + ctx = ctx_factory() queue = cl.CommandQueue(ctx) from pytools.convergence import EOCRecorder @@ -839,7 +789,7 @@ def test_rect_mesh(do_plot=False): pt.show() -def test_box_mesh(ctx_getter, visualize=False): +def test_box_mesh(ctx_factory, visualize=False): from meshmode.mesh.generation import generate_box_mesh mesh = generate_box_mesh(3*(np.linspace(0, 1, 5),)) @@ -847,7 +797,7 @@ def test_box_mesh(ctx_getter, visualize=False): from meshmode.discretization import Discretization from meshmode.discretization.poly_element import \ PolynomialWarpAndBlendGroupFactory - cl_ctx = ctx_getter() + cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) discr = Discretization(cl_ctx, mesh, @@ -996,6 +946,8 @@ def no_test_quad_mesh_3d(): # }}} +# {{{ test_quad_single_element + def test_quad_single_element(): from meshmode.mesh.generation import make_group_from_vertices from meshmode.mesh import Mesh, TensorProductElementGroup @@ -1019,6 +971,10 @@ def test_quad_single_element(): mg.nodes[1].reshape(-1), "o") plt.show() +# }}} + + +# {{{ test_quad_multi_element def test_quad_multi_element(): from meshmode.mesh.generation import generate_box_mesh @@ -1039,6 +995,8 @@ def test_quad_multi_element(): mg.nodes[1].reshape(-1), "o") plt.show() +# }}} + def test_vtk_overwrite(ctx_getter): pytest.importorskip("pyvisfile") diff --git a/test/test_partition.py b/test/test_partition.py new file mode 100644 index 0000000000000000000000000000000000000000..78258f4f1a0becdba402e32f68b05f0d5fa52ac0 --- /dev/null +++ b/test/test_partition.py @@ -0,0 +1,559 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = """ +Copyright (C) 2017 Ellis Hoag +Copyright (C) 2017 Andreas Kloeckner +""" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from six.moves import range +import numpy as np +import numpy.linalg as la +import pyopencl as cl +import pyopencl.array # noqa +import pyopencl.clmath # noqa + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl + as pytest_generate_tests) + +from meshmode.discretization.poly_element import ( + PolynomialWarpAndBlendGroupFactory) +from meshmode.mesh import BTAG_ALL + +import pytest +import os + +import logging +logger = logging.getLogger(__name__) + +# Is there a smart way of choosing this number? +# Currenly it is the same as the base from MPIBoundaryTransceiver +TAG_BASE = 83411 +TAG_SEND_REMOTE_NODES = TAG_BASE + 3 +TAG_SEND_LOCAL_NODES = TAG_BASE + 4 + + +# {{{ partition_interpolation + +@pytest.mark.parametrize("num_parts", [2, 3]) +@pytest.mark.parametrize("num_groups", [1, 2]) +@pytest.mark.parametrize("scramble_partitions", [False]) +@pytest.mark.parametrize(("dim", "mesh_pars"), + [ + (2, [3, 4, 7]), + (3, [3, 4]) + ]) +def test_partition_interpolation(ctx_factory, dim, mesh_pars, + num_parts, num_groups, scramble_partitions): + np.random.seed(42) + group_factory = PolynomialWarpAndBlendGroupFactory + cl_ctx = ctx_factory() + queue = cl.CommandQueue(cl_ctx) + order = 4 + + from pytools.convergence import EOCRecorder + eoc_rec = dict() + for i in range(num_parts): + for j in range(num_parts): + if i == j: + continue + eoc_rec[i, j] = EOCRecorder() + + def f(x): + return 10.*cl.clmath.sin(50.*x) + + for n in mesh_pars: + from meshmode.mesh.generation import generate_warped_rect_mesh + meshes = [generate_warped_rect_mesh(dim, order=order, n=n) + for _ in range(num_groups)] + + if num_groups > 1: + from meshmode.mesh.processing import merge_disjoint_meshes + mesh = merge_disjoint_meshes(meshes) + else: + mesh = meshes[0] + + if scramble_partitions: + part_per_element = np.random.randint(num_parts, size=mesh.nelements) + else: + from pymetis import part_graph + _, p = part_graph(num_parts, + xadj=mesh.nodal_adjacency.neighbors_starts.tolist(), + adjncy=mesh.nodal_adjacency.neighbors.tolist()) + part_per_element = np.array(p) + + from meshmode.mesh.processing import partition_mesh + part_meshes = [ + partition_mesh(mesh, part_per_element, i)[0] for i in range(num_parts)] + + from meshmode.discretization import Discretization + vol_discrs = [Discretization(cl_ctx, part_meshes[i], group_factory(order)) + for i in range(num_parts)] + + from meshmode.mesh import BTAG_PARTITION + from meshmode.discretization.connection import (make_face_restriction, + make_partition_connection, + check_connection) + + for i_local_part, i_remote_part in eoc_rec.keys(): + if eoc_rec[i_local_part, i_remote_part] is None: + continue + + # Mark faces within local_mesh that are connected to remote_mesh + local_bdry_conn = make_face_restriction(vol_discrs[i_local_part], + group_factory(order), + BTAG_PARTITION(i_remote_part)) + + # If these parts are not connected, don't bother checking the error + bdry_nodes = local_bdry_conn.to_discr.nodes() + if bdry_nodes.size == 0: + eoc_rec[i_local_part, i_remote_part] = None + continue + + # Mark faces within remote_mesh that are connected to local_mesh + remote_bdry_conn = make_face_restriction(vol_discrs[i_remote_part], + group_factory(order), + BTAG_PARTITION(i_local_part)) + + assert bdry_nodes.size == remote_bdry_conn.to_discr.nodes().size, \ + "partitions do not have the same number of connected nodes" + + # Gather just enough information for the connection + local_bdry = local_bdry_conn.to_discr + local_mesh = part_meshes[i_local_part] + local_adj_groups = [local_mesh.facial_adjacency_groups[i][None] + for i in range(len(local_mesh.groups))] + local_batches = [local_bdry_conn.groups[i].batches + for i in range(len(local_mesh.groups))] + local_from_elem_faces = [[batch.to_element_face + for batch in grp_batches] + for grp_batches in local_batches] + local_from_elem_indices = [[batch.to_element_indices.get(queue=queue) + for batch in grp_batches] + for grp_batches in local_batches] + + remote_bdry = remote_bdry_conn.to_discr + remote_mesh = part_meshes[i_remote_part] + remote_adj_groups = [remote_mesh.facial_adjacency_groups[i][None] + for i in range(len(remote_mesh.groups))] + remote_batches = [remote_bdry_conn.groups[i].batches + for i in range(len(remote_mesh.groups))] + remote_from_elem_faces = [[batch.to_element_face + for batch in grp_batches] + for grp_batches in remote_batches] + remote_from_elem_indices = [[batch.to_element_indices.get(queue=queue) + for batch in grp_batches] + for grp_batches in remote_batches] + + # Connect from remote_mesh to local_mesh + remote_to_local_conn = make_partition_connection(local_bdry_conn, + i_local_part, + remote_bdry, + remote_adj_groups, + remote_from_elem_faces, + remote_from_elem_indices) + # Connect from local mesh to remote mesh + local_to_remote_conn = make_partition_connection(remote_bdry_conn, + i_remote_part, + local_bdry, + local_adj_groups, + local_from_elem_faces, + local_from_elem_indices) + check_connection(remote_to_local_conn) + check_connection(local_to_remote_conn) + + true_local_points = f(local_bdry.nodes()[0].with_queue(queue)) + remote_points = local_to_remote_conn(queue, true_local_points) + local_points = remote_to_local_conn(queue, remote_points) + + err = la.norm((true_local_points - local_points).get(), np.inf) + eoc_rec[i_local_part, i_remote_part].add_data_point(1./n, err) + + for (i, j), e in eoc_rec.items(): + if e is not None: + print("Error of connection from part %i to part %i." % (i, j)) + print(e) + assert(e.order_estimate() >= order - 0.5 or e.max_error() < 1e-11) + +# }}} + + +# {{{ partition_mesh + +@pytest.mark.parametrize("dim", [2, 3]) +@pytest.mark.parametrize("num_parts", [4, 5, 7]) +@pytest.mark.parametrize("num_meshes", [1, 2, 7]) +@pytest.mark.parametrize("scramble_partitions", [True, False]) +def test_partition_mesh(num_parts, num_meshes, dim, scramble_partitions): + np.random.seed(42) + n = (5,) * dim + from meshmode.mesh.generation import generate_regular_rect_mesh + meshes = [generate_regular_rect_mesh(a=(0 + i,) * dim, b=(1 + i,) * dim, n=n) + for i in range(num_meshes)] + + from meshmode.mesh.processing import merge_disjoint_meshes + mesh = merge_disjoint_meshes(meshes) + + if scramble_partitions: + part_per_element = np.random.randint(num_parts, size=mesh.nelements) + else: + pymetis = pytest.importorskip('pymetis') + + _, p = pymetis.part_graph(num_parts, + xadj=mesh.nodal_adjacency.neighbors_starts.tolist(), + adjncy=mesh.nodal_adjacency.neighbors.tolist()) + part_per_element = np.array(p) + + from meshmode.mesh.processing import partition_mesh + # TODO: The same part_per_element array must be used to partition each mesh. + # Maybe the interface should be changed to guarantee this. + new_meshes = [ + partition_mesh(mesh, part_per_element, i) for i in range(num_parts)] + + assert mesh.nelements == np.sum( + [new_meshes[i][0].nelements for i in range(num_parts)]), \ + "part_mesh has the wrong number of elements" + + assert count_tags(mesh, BTAG_ALL) == np.sum( + [count_tags(new_meshes[i][0], BTAG_ALL) for i in range(num_parts)]), \ + "part_mesh has the wrong number of BTAG_ALL boundaries" + + from meshmode.mesh import BTAG_PARTITION, InterPartitionAdjacencyGroup + from meshmode.mesh.processing import find_group_indices + num_tags = np.zeros((num_parts,)) + + index_lookup_table = dict() + for ipart, (m, _) in enumerate(new_meshes): + for igrp in range(len(m.groups)): + adj = m.facial_adjacency_groups[igrp][None] + if not isinstance(adj, InterPartitionAdjacencyGroup): + # This group is not connected to another partition. + continue + for i, (elem, face) in enumerate(zip(adj.elements, adj.element_faces)): + index_lookup_table[ipart, igrp, elem, face] = i + + for part_num in range(num_parts): + part, part_to_global = new_meshes[part_num] + for grp_num in range(len(part.groups)): + adj = part.facial_adjacency_groups[grp_num][None] + tags = -part.facial_adjacency_groups[grp_num][None].neighbors + assert np.all(tags >= 0) + if not isinstance(adj, InterPartitionAdjacencyGroup): + # This group is not connected to another partition. + continue + elem_base = part.groups[grp_num].element_nr_base + for idx in range(len(adj.elements)): + if adj.global_neighbors[idx] == -1: + continue + elem = adj.elements[idx] + face = adj.element_faces[idx] + n_part_num = adj.neighbor_partitions[idx] + n_meshwide_elem = adj.global_neighbors[idx] + n_face = adj.neighbor_faces[idx] + num_tags[n_part_num] += 1 + n_part, n_part_to_global = new_meshes[n_part_num] + # Hack: find_igrps expects a numpy.ndarray and returns + # a numpy.ndarray. But if a single integer is fed + # into find_igrps, an integer is returned. + n_grp_num = int(find_group_indices(n_part.groups, n_meshwide_elem)) + n_adj = n_part.facial_adjacency_groups[n_grp_num][None] + n_elem_base = n_part.groups[n_grp_num].element_nr_base + n_elem = n_meshwide_elem - n_elem_base + n_idx = index_lookup_table[n_part_num, n_grp_num, n_elem, n_face] + assert (part_num == n_adj.neighbor_partitions[n_idx] + and elem + elem_base == n_adj.global_neighbors[n_idx] + and face == n_adj.neighbor_faces[n_idx]),\ + "InterPartitionAdjacencyGroup is not consistent" + _, n_part_to_global = new_meshes[n_part_num] + p_meshwide_elem = part_to_global[elem + elem_base] + p_meshwide_n_elem = n_part_to_global[n_elem + n_elem_base] + + p_grp_num = find_group_indices(mesh.groups, p_meshwide_elem) + p_n_grp_num = find_group_indices(mesh.groups, p_meshwide_n_elem) + + p_elem_base = mesh.groups[p_grp_num].element_nr_base + p_n_elem_base = mesh.groups[p_n_grp_num].element_nr_base + p_elem = p_meshwide_elem - p_elem_base + p_n_elem = p_meshwide_n_elem - p_n_elem_base + + f_groups = mesh.facial_adjacency_groups[p_grp_num] + for p_bnd_adj in f_groups.values(): + for idx in range(len(p_bnd_adj.elements)): + if (p_elem == p_bnd_adj.elements[idx] and + face == p_bnd_adj.element_faces[idx]): + assert p_n_elem == p_bnd_adj.neighbors[idx],\ + "Tag does not give correct neighbor" + assert n_face == p_bnd_adj.neighbor_faces[idx],\ + "Tag does not give correct neighbor" + + for i_tag in range(num_parts): + tag_sum = 0 + for mesh, _ in new_meshes: + tag_sum += count_tags(mesh, BTAG_PARTITION(i_tag)) + assert num_tags[i_tag] == tag_sum,\ + "part_mesh has the wrong number of BTAG_PARTITION boundaries" + + +def count_tags(mesh, tag): + num_bnds = 0 + for adj_dict in mesh.facial_adjacency_groups: + for neighbors in adj_dict[None].neighbors: + if neighbors < 0: + if -neighbors & mesh.boundary_tag_bit(tag) != 0: + num_bnds += 1 + return num_bnds + +# }}} + + +# {{{ MPI test boundary swap + +def _test_mpi_boundary_swap(dim, order, num_groups): + from meshmode.distributed import MPIMeshDistributor, MPIBoundaryCommSetupHelper + + from mpi4py import MPI + mpi_comm = MPI.COMM_WORLD + i_local_part = mpi_comm.Get_rank() + num_parts = mpi_comm.Get_size() + + mesh_dist = MPIMeshDistributor(mpi_comm) + + if mesh_dist.is_mananger_rank(): + np.random.seed(42) + from meshmode.mesh.generation import generate_warped_rect_mesh + meshes = [generate_warped_rect_mesh(dim, order=order, n=4) + for _ in range(num_groups)] + + if num_groups > 1: + from meshmode.mesh.processing import merge_disjoint_meshes + mesh = merge_disjoint_meshes(meshes) + else: + mesh = meshes[0] + + part_per_element = np.random.randint(num_parts, size=mesh.nelements) + + local_mesh = mesh_dist.send_mesh_parts(mesh, part_per_element, num_parts) + else: + local_mesh = mesh_dist.receive_mesh_part() + + group_factory = PolynomialWarpAndBlendGroupFactory(order) + cl_ctx = cl.create_some_context() + queue = cl.CommandQueue(cl_ctx) + + from meshmode.discretization import Discretization + vol_discr = Discretization(cl_ctx, local_mesh, group_factory) + + from meshmode.distributed import get_connected_partitions + connected_parts = get_connected_partitions(local_mesh) + assert i_local_part not in connected_parts + bdry_setup_helpers = {} + local_bdry_conns = {} + + from meshmode.discretization.connection import make_face_restriction + from meshmode.mesh import BTAG_PARTITION + for i_remote_part in connected_parts: + local_bdry_conns[i_remote_part] = make_face_restriction( + vol_discr, group_factory, BTAG_PARTITION(i_remote_part)) + + setup_helper = bdry_setup_helpers[i_remote_part] = \ + MPIBoundaryCommSetupHelper( + mpi_comm, queue, local_bdry_conns[i_remote_part], + i_remote_part, bdry_grp_factory=group_factory) + + setup_helper.post_sends() + + remote_to_local_bdry_conns = {} + from meshmode.discretization.connection import check_connection + while bdry_setup_helpers: + for i_remote_part, setup_helper in bdry_setup_helpers.items(): + if setup_helper.is_setup_ready(): + assert bdry_setup_helpers.pop(i_remote_part) is setup_helper + conn = setup_helper.complete_setup() + check_connection(conn) + remote_to_local_bdry_conns[i_remote_part] = conn + break + + # FIXME: Not ideal, busy-waits + + _test_data_transfer(mpi_comm, + queue, + local_bdry_conns, + remote_to_local_bdry_conns, + connected_parts) + + logger.debug("Rank %d exiting", i_local_part) + + +# TODO +def _test_data_transfer(mpi_comm, queue, local_bdry_conns, + remote_to_local_bdry_conns, connected_parts): + from mpi4py import MPI + + def f(x): + return 10*cl.clmath.sin(20.*x) + + ''' + Here is a simplified example of what happens from + the point of view of the local rank. + + Local rank: + 1. Transfer local points from local boundary to remote boundary + to get remote points. + 2. Send remote points to remote rank. + Remote rank: + 3. Receive remote points from local rank. + 4. Transfer remote points from remote boundary to local boundary + to get local points. + 5. Send local points to local rank. + Local rank: + 6. Receive local points from remote rank. + 7. Check if local points are the same as the original local points. + ''' + + # 1. + send_reqs = [] + for i_remote_part in connected_parts: + conn = remote_to_local_bdry_conns[i_remote_part] + bdry_discr = local_bdry_conns[i_remote_part].to_discr + bdry_x = bdry_discr.nodes()[0].with_queue(queue=queue) + + true_local_f = f(bdry_x) + remote_f = conn(queue, true_local_f) + + # 2. + send_reqs.append(mpi_comm.isend(remote_f.get(queue=queue), + dest=i_remote_part, + tag=TAG_SEND_REMOTE_NODES)) + + # 3. + buffers = {} + for i_remote_part in connected_parts: + status = MPI.Status() + mpi_comm.probe(source=i_remote_part, + tag=TAG_SEND_REMOTE_NODES, + status=status) + buffers[i_remote_part] = np.empty(status.count, dtype=bytes) + + recv_reqs = {} + for i_remote_part, buf in buffers.items(): + recv_reqs[i_remote_part] = mpi_comm.irecv(buf=buf, + source=i_remote_part, + tag=TAG_SEND_REMOTE_NODES) + remote_to_local_f_data = {} + for i_remote_part, req in recv_reqs.items(): + remote_to_local_f_data[i_remote_part] = req.wait() + buffers[i_remote_part] = None # free buffer + + for req in send_reqs: + req.wait() + + # 4. + send_reqs = [] + for i_remote_part in connected_parts: + conn = remote_to_local_bdry_conns[i_remote_part] + local_f_np = remote_to_local_f_data[i_remote_part] + local_f_cl = cl.array.Array(queue, + shape=local_f_np.shape, + dtype=local_f_np.dtype) + local_f_cl.set(local_f_np) + remote_f = conn(queue, local_f_cl).get(queue=queue) + + # 5. + send_reqs.append(mpi_comm.isend(remote_f, + dest=i_remote_part, + tag=TAG_SEND_LOCAL_NODES)) + + # 6. + buffers = {} + for i_remote_part in connected_parts: + status = MPI.Status() + mpi_comm.probe(source=i_remote_part, + tag=TAG_SEND_LOCAL_NODES, + status=status) + buffers[i_remote_part] = np.empty(status.count, dtype=bytes) + + recv_reqs = {} + for i_remote_part, buf in buffers.items(): + recv_reqs[i_remote_part] = mpi_comm.irecv(buf=buf, + source=i_remote_part, + tag=TAG_SEND_LOCAL_NODES) + local_f_data = {} + for i_remote_part, req in recv_reqs.items(): + local_f_data[i_remote_part] = req.wait() + buffers[i_remote_part] = None # free buffer + + for req in send_reqs: + req.wait() + + # 7. + for i_remote_part in connected_parts: + bdry_discr = local_bdry_conns[i_remote_part].to_discr + bdry_x = bdry_discr.nodes()[0].with_queue(queue=queue) + + true_local_f = f(bdry_x).get(queue=queue) + local_f = local_f_data[i_remote_part] + + from numpy.linalg import norm + err = norm(true_local_f - local_f, np.inf) + assert err < 1e-11, "Error = %f is too large" % err + +# }}} + + +# {{{ MPI pytest entrypoint + +@pytest.mark.mpi +@pytest.mark.parametrize("num_partitions", [3, 4]) +@pytest.mark.parametrize("order", [2, 3]) +def test_mpi_communication(num_partitions, order): + pytest.importorskip("mpi4py") + + num_ranks = num_partitions + from subprocess import check_call + import sys + newenv = os.environ.copy() + newenv["RUN_WITHIN_MPI"] = "1" + newenv["order"] = str(order) + check_call([ + "mpiexec", "-np", str(num_ranks), "-x", "RUN_WITHIN_MPI", + sys.executable, __file__], + env=newenv) + +# }}} + + +if __name__ == "__main__": + if "RUN_WITHIN_MPI" in os.environ: + dim = 2 + order = int(os.environ["order"]) + num_groups = 2 + _test_mpi_boundary_swap(dim, order, num_groups) + else: + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from py.test.cmdline import main + main([__file__]) + +# vim: fdm=marker