diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 48d7cd3fa690c8a7fd5dd2a8c6821b11ab52c021..750bf6f4cc5018fc14c9195ec688d45bea21d129 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -80,6 +80,7 @@ Python 3.5 Conda Apple: - apple except: - tags + retry: 2 Documentation: script: diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py index a5292fdede8bab1f61e0df97cf75e19b7cd63d8f..037f818893a77edbad9a032a3868ac0dec8af514 100644 --- a/pytential/qbx/fmm.py +++ b/pytential/qbx/fmm.py @@ -248,7 +248,9 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), self.multipole_expansions_view(multipole_exps, isrc_level) evt, (qbx_expansions_res,) = m2qbxl(self.queue, - qbx_center_to_target_box=geo_data.qbx_center_to_target_box(), + qbx_center_to_target_box_source_level=( + geo_data.qbx_center_to_target_box_source_level(isrc_level) + ), centers=self.tree.box_centers, qbx_centers=geo_data.centers(), @@ -437,8 +439,7 @@ def drive_fmm(expansion_wrangler, src_weights): # contribution *out* of the downward-propagating local expansions) non_qbx_potentials = non_qbx_potentials + wrangler.eval_multipoles( - traversal.level_start_target_box_nrs, - traversal.target_boxes, + traversal.target_boxes_sep_smaller_by_source_level, traversal.from_sep_smaller_by_level, mpole_exps) @@ -717,13 +718,15 @@ def assemble_performance_data(geo_data, uses_pde_expansions, assert tree.nlevels == len(traversal.from_sep_smaller_by_level) - for itgt_box, tgt_ibox in enumerate(traversal.target_boxes): - ntargets = box_target_counts_nonchild[tgt_ibox] - - for ilevel, sep_smaller_list in enumerate( - traversal.from_sep_smaller_by_level): + for ilevel, sep_smaller_list in enumerate( + traversal.from_sep_smaller_by_level): + for itgt_box, tgt_ibox in enumerate( + traversal.target_boxes_sep_smaller_by_source_level[ilevel]): + ntargets = box_target_counts_nonchild[tgt_ibox] start, end = sep_smaller_list.starts[itgt_box:itgt_box+2] - nmp_eval[ilevel, itgt_box] += ntargets * (end-start) + nmp_eval[ilevel, sep_smaller_list.nonempty_indices[itgt_box]] = ( + ntargets * (end-start) + ) result["mp_eval"] = summarize_parallel(nmp_eval, ncoeffs_fmm) @@ -772,6 +775,13 @@ def assemble_performance_data(geo_data, uses_pde_expansions, global_qbx_centers = geo_data.global_qbx_centers() qbx_center_to_target_box = geo_data.qbx_center_to_target_box() center_to_targets_starts = geo_data.center_to_tree_targets().starts + qbx_center_to_target_box_source_level = np.empty( + (tree.nlevels,), dtype=object + ) + for src_level in range(tree.nlevels): + qbx_center_to_target_box_source_level[src_level] = ( + geo_data.qbx_center_to_target_box_source_level(src_level) + ) with cl.CommandQueue(geo_data.cl_context) as queue: global_qbx_centers = global_qbx_centers.get( @@ -780,6 +790,10 @@ def assemble_performance_data(geo_data, uses_pde_expansions, queue=queue) center_to_targets_starts = center_to_targets_starts.get( queue=queue) + for src_level in range(tree.nlevels): + qbx_center_to_target_box_source_level[src_level] = ( + qbx_center_to_target_box_source_level[src_level].get(queue=queue) + ) def process_form_qbxl(): ncenters = geo_data.ncenters @@ -856,8 +870,13 @@ def assemble_performance_data(geo_data, uses_pde_expansions, assert tree.nlevels == len(traversal.from_sep_smaller_by_level) for isrc_level, ssn in enumerate(traversal.from_sep_smaller_by_level): + for itgt_center, tgt_icenter in enumerate(global_qbx_centers): - icontaining_tgt_box = qbx_center_to_target_box[tgt_icenter] + icontaining_tgt_box = qbx_center_to_target_box_source_level[ + isrc_level][tgt_icenter] + + if icontaining_tgt_box == -1: + continue start, stop = ( ssn.starts[icontaining_tgt_box], diff --git a/pytential/qbx/fmmlib.py b/pytential/qbx/fmmlib.py index 887b3049b568f0b0a98a2a998aeee81d6ebf4dfe..578dadce208da52b81f1e5014381e0aa04df4a17 100644 --- a/pytential/qbx/fmmlib.py +++ b/pytential/qbx/fmmlib.py @@ -99,6 +99,11 @@ class ToHostTransferredGeoDataWrapper(object): def qbx_center_to_target_box(self): return self.geo_data.qbx_center_to_target_box().get(queue=self.queue) + @memoize_method + def qbx_center_to_target_box_source_level(self, source_level): + return self.geo_data.qbx_center_to_target_box_source_level( + source_level).get(queue=self.queue) + @memoize_method def non_qbx_box_target_lists(self): return self.geo_data.non_qbx_box_target_lists().get(queue=self.queue) @@ -347,23 +352,27 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): qbx_exps = self.qbx_local_expansion_zeros() geo_data = self.geo_data - qbx_center_to_target_box = geo_data.qbx_center_to_target_box() qbx_centers = geo_data.centers() centers = self.tree.box_centers ngqbx_centers = len(geo_data.global_qbx_centers()) + traversal = geo_data.traversal() if ngqbx_centers == 0: return qbx_exps mploc = self.get_translation_routine("%ddmploc", vec_suffix="_imany") - for isrc_level, ssn in enumerate( - geo_data.traversal().from_sep_smaller_by_level): + for isrc_level, ssn in enumerate(traversal.from_sep_smaller_by_level): source_level_start_ibox, source_mpoles_view = \ self.multipole_expansions_view(multipole_exps, isrc_level) tgt_icenter_vec = geo_data.global_qbx_centers() - icontaining_tgt_box_vec = qbx_center_to_target_box[tgt_icenter_vec] + qbx_center_to_target_box_source_level = ( + geo_data.qbx_center_to_target_box_source_level(isrc_level) + ) + icontaining_tgt_box_vec = qbx_center_to_target_box_source_level[ + tgt_icenter_vec + ] rscale2 = geo_data.expansion_radii()[geo_data.global_qbx_centers()] @@ -372,9 +381,13 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): kwargs["radius"] = (0.5 * geo_data.expansion_radii()[geo_data.global_qbx_centers()]) - nsrc_boxes_per_gqbx_center = ( - ssn.starts[icontaining_tgt_box_vec+1] - - ssn.starts[icontaining_tgt_box_vec]) + nsrc_boxes_per_gqbx_center = np.zeros(icontaining_tgt_box_vec.shape, + dtype=traversal.tree.box_id_dtype) + mask = (icontaining_tgt_box_vec != -1) + nsrc_boxes_per_gqbx_center[mask] = ( + ssn.starts[icontaining_tgt_box_vec[mask] + 1] - + ssn.starts[icontaining_tgt_box_vec[mask]] + ) nsrc_boxes = np.sum(nsrc_boxes_per_gqbx_center) src_boxes_starts = np.empty(ngqbx_centers+1, dtype=np.int32) @@ -387,7 +400,9 @@ class QBXFMMLibExpansionWrangler(FMMLibExpansionWrangler): src_ibox = np.empty(nsrc_boxes, dtype=np.int32) for itgt_center, tgt_icenter in enumerate( geo_data.global_qbx_centers()): - icontaining_tgt_box = qbx_center_to_target_box[tgt_icenter] + icontaining_tgt_box = qbx_center_to_target_box_source_level[ + tgt_icenter + ] src_ibox[ src_boxes_starts[itgt_center]: src_boxes_starts[itgt_center+1]] = ( diff --git a/pytential/qbx/geometry.py b/pytential/qbx/geometry.py index 7cc8e8ea739f38e20aa154af2891e7caa676ad07..f9cc10e07e0ea0ec84dbb545bcde644d10b547e0 100644 --- a/pytential/qbx/geometry.py +++ b/pytential/qbx/geometry.py @@ -614,6 +614,36 @@ class QBXFMMGeometryData(object): return qbx_center_to_target_box.with_queue(None) + @memoize_method + def qbx_center_to_target_box_source_level(self, source_level): + """Return an array for mapping qbx centers to indices into + interaction lists as found in + ``traversal.from_sep_smaller_by_level[source_level].`` + -1 if no such interaction list exist on *source_level*. + """ + traversal = self.traversal() + sep_smaller = traversal.from_sep_smaller_by_level[source_level] + qbx_center_to_target_box = self.qbx_center_to_target_box() + + with cl.CommandQueue(self.cl_context) as queue: + target_box_to_target_box_source_level = cl.array.empty( + queue, len(traversal.target_boxes), + dtype=traversal.tree.box_id_dtype + ) + target_box_to_target_box_source_level.fill(-1) + target_box_to_target_box_source_level[sep_smaller.nonempty_indices] = ( + cl.array.arange(queue, sep_smaller.num_nonempty_lists, + dtype=traversal.tree.box_id_dtype) + ) + + qbx_center_to_target_box_source_level = ( + target_box_to_target_box_source_level[ + qbx_center_to_target_box + ] + ) + + return qbx_center_to_target_box_source_level.with_queue(None) + @memoize_method def global_qbx_flags(self): """Return an array of :class:`numpy.int8` of length diff --git a/pytential/qbx/interactions.py b/pytential/qbx/interactions.py index 0cca9f170eb5ecc13bbf8166673cc13318b74150..dad4db1f2988f566fcdd9c2d07ead3c8d455e525 100644 --- a/pytential/qbx/interactions.py +++ b/pytential/qbx/interactions.py @@ -160,38 +160,42 @@ class M2QBXL(E2EBase): ], [""" for icenter - <> icontaining_tgt_box = qbx_center_to_target_box[icenter] + <> icontaining_tgt_box = \ + qbx_center_to_target_box_source_level[icenter] - <> tgt_center[idim] = qbx_centers[idim, icenter] \ - {id=fetch_tgt_center} - <> tgt_rscale = qbx_expansion_radii[icenter] + if icontaining_tgt_box != -1 + <> tgt_center[idim] = qbx_centers[idim, icenter] \ + {id=fetch_tgt_center} + <> tgt_rscale = qbx_expansion_radii[icenter] - <> isrc_start = src_box_starts[icontaining_tgt_box] - <> isrc_stop = src_box_starts[icontaining_tgt_box+1] + <> isrc_start = src_box_starts[icontaining_tgt_box] + <> isrc_stop = src_box_starts[icontaining_tgt_box+1] - for isrc_box - <> src_ibox = src_box_lists[isrc_box] \ - {id=read_src_ibox} - <> src_center[idim] = centers[idim, src_ibox] {dup=idim} - <> d[idim] = tgt_center[idim] - src_center[idim] {dup=idim} - """] + [""" + for isrc_box + <> src_ibox = src_box_lists[isrc_box] \ + {id=read_src_ibox} + <> src_center[idim] = centers[idim, src_ibox] {dup=idim} + <> d[idim] = tgt_center[idim] - src_center[idim] \ + {dup=idim} + """] + [""" - <> src_coeff{i} = \ - src_expansions[src_ibox - src_base_ibox, {i}] \ - {{dep=read_src_ibox}} + <> src_coeff{i} = \ + src_expansions[src_ibox - src_base_ibox, {i}] \ + {{dep=read_src_ibox}} - """.format(i=i) for i in range(ncoeff_src)] + [ + """.format(i=i) for i in range(ncoeff_src)] + [ - ] + self.get_translation_loopy_insns() + [""" + ] + self.get_translation_loopy_insns() + [""" + end + """] + [""" + qbx_expansions[icenter, {i}] = \ + qbx_expansions[icenter, {i}] + \ + simul_reduce(sum, isrc_box, coeff{i}) \ + {{id_prefix=write_expn}} + """.format(i=i) + for i in range(ncoeff_tgt)] + [""" end - """] + [""" - qbx_expansions[icenter, {i}] = qbx_expansions[icenter, {i}] + \ - simul_reduce(sum, isrc_box, coeff{i}) \ - {{id_prefix=write_expn}} - """.format(i=i) - for i in range(ncoeff_tgt)] + [""" - end """], [ diff --git a/pytential/version.py b/pytential/version.py index 0118173ddf0d458d695cc2501d7f1ee7c0569c34..426eafaf79ab790587a3d57c2370c4ac689d30f0 100644 --- a/pytential/version.py +++ b/pytential/version.py @@ -5,4 +5,4 @@ VERSION_TEXT = ".".join(str(i) for i in VERSION) # branch name, so as to avoid conflicts with the master branch. Make sure # to reset this to the next number up with "master" before merging into # master. -PYTENTIAL_KERNEL_VERSION = ("master", 9) +PYTENTIAL_KERNEL_VERSION = ("master", 10)