diff --git a/boxtree/pyfmmlib_integration.py b/boxtree/pyfmmlib_integration.py index 33e9dec7d087cecb0610a35e9ef08a6daebc631d..dfff104bf5576cd3d5ce207a483893a30fd1066d 100644 --- a/boxtree/pyfmmlib_integration.py +++ b/boxtree/pyfmmlib_integration.py @@ -396,6 +396,13 @@ class FMMLibExpansionWrangler(object): def _get_targets(self, pslice): return self._get_single_targets_array()[:, pslice] + @memoize_method + def _get_single_box_centers_array(self): + return np.array([ + self.tree.box_centers[idim] + for idim in range(self.dim) + ], order="F") + # }}} @log_process(logger) @@ -698,48 +705,74 @@ class FMMLibExpansionWrangler(object): target_or_target_parent_boxes, starts, lists, src_weights): local_exps = self.local_expansion_zeros() - formta = self.get_routine("%ddformta" + self.dp_suffix) + formta = self.get_routine("%ddformta" + self.dp_suffix, suffix="_imany") + + sources = self._get_single_sources_array() + # sources_starts / sources_lists is a CSR list mapping box centers to + # lists of starting indices into the sources array. To get the starting + # source indices we have to look at box_source_starts. + sources_offsets = self.tree.box_source_starts[lists] + + # nsources_starts / nsources_lists is a CSR list mapping box centers to + # lists of indices into nsources, each of which represents a source + # count. + nsources = self.tree.box_source_counts_nonchild + nsources_offsets = lists + + # centers is indexed into by values of centers_offsets, which is a list + # mapping box indices to box center indices. + centers = self._get_single_box_centers_array() + + source_kwargs = self.get_source_kwargs(src_weights, slice(None)) for lev in range(self.tree.nlevels): lev_start, lev_stop = \ level_start_target_or_target_parent_box_nrs[lev:lev+2] + if lev_start == lev_stop: continue - target_level_start_ibox, target_local_exps_view = \ + target_box_start, target_local_exps_view = \ self.local_expansions_view(local_exps, lev) - rscale = self.level_to_rscale(lev) - - for itgt_box, tgt_ibox in enumerate( - target_or_target_parent_boxes[lev_start:lev_stop]): - start, end = starts[lev_start+itgt_box:lev_start+itgt_box+2] - - contrib = 0 - - for src_ibox in lists[start:end]: - src_pslice = self._get_source_slice(src_ibox) - tgt_center = self.tree.box_centers[:, tgt_ibox] - - if src_pslice.stop - src_pslice.start == 0: - continue + centers_offsets = target_or_target_parent_boxes[lev_start:lev_stop] - kwargs = {} - kwargs.update(self.kernel_kwargs) - kwargs.update(self.get_source_kwargs(src_weights, src_pslice)) - - ier, mpole = formta( - rscale=rscale, - source=self._get_sources(src_pslice), - center=tgt_center, - nterms=self.level_nterms[lev], - **kwargs) - if ier: - raise RuntimeError("formta failed") + rscale = self.level_to_rscale(lev) - contrib = contrib + mpole.T + sources_starts = starts[lev_start:1 + lev_stop] + nsources_starts = sources_starts - target_local_exps_view[tgt_ibox-target_level_start_ibox] = contrib + kwargs = {} + kwargs.update(self.kernel_kwargs) + for key, val in source_kwargs.items(): + kwargs[key] = val + # Add CSR lists mapping box centers to lists of starting positions + # in the array of source strengths. + # Since the source strengths have the same order as the sources, + # these lists are the same as those for starting position in the + # sources array. + kwargs[key + "_starts"] = sources_starts + kwargs[key + "_offsets"] = sources_offsets + + ier, expn = formta( + rscale=rscale, + sources=sources, + sources_offsets=sources_offsets, + sources_starts=sources_starts, + nsources=nsources, + nsources_starts=nsources_starts, + nsources_offsets=nsources_offsets, + centers=centers, + centers_offsets=centers_offsets, + nterms=self.level_nterms[lev], + **kwargs) + + if ier.any(): + raise RuntimeError("formta failed") + + target_local_exps_view[ + target_or_target_parent_boxes[lev_start:lev_stop] + - target_box_start] = expn.T return local_exps diff --git a/requirements.txt b/requirements.txt index cd0a243269021d7cd8117275513a9d31339a6f58..c3174a43652827d4e18af7211b4fb9c595515c23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ numpy git+https://github.com/inducer/pyopencl git+https://github.com/inducer/islpy git+https://github.com/inducer/loopy -git+https://github.com/inducer/pyfmmlib +git+https://gitlab.tiker.net/inducer/pyfmmlib # only for reference values for the fmmlib test # (unable to use--circular dep)