diff --git a/sumpy/assignment_collection.py b/sumpy/assignment_collection.py index 54a825cb1a1eaae0310ae1395ffe1817a8ac3ecc..df4502aba08b7064c30a297465a02e991d927e73 100644 --- a/sumpy/assignment_collection.py +++ b/sumpy/assignment_collection.py @@ -22,7 +22,6 @@ THE SOFTWARE. import sumpy.symbolic as sym -from collections import OrderedDict import logging logger = logging.getLogger(__name__) @@ -96,9 +95,14 @@ class SymbolicAssignmentCollection: by this class, but not expressions using names defined in this collection. """ - def __init__(self): + def __init__(self, assignments=None): + """ + :arg assignments: mapping from *var_name* to expression + """ + + if assignments is None: + assignments = {} - assignments = OrderedDict() self.assignments = assignments self.reversed_assignments = {v: k for (k, v) in assignments.items()} @@ -187,21 +191,18 @@ class SymbolicAssignmentCollection: #from sumpy.symbolic import checked_cse from sumpy.cse import cse - new_assignments_list, new_exprs = cse(assign_exprs + extra_exprs, + new_assignments, new_exprs = cse(assign_exprs + extra_exprs, symbols=self.symbol_generator) new_assign_exprs = new_exprs[:len(assign_exprs)] new_extra_exprs = new_exprs[len(assign_exprs):] - new_assignments = OrderedDict() - for name, value in new_assignments_list: - assert isinstance(name, sym.Symbol) - new_assignments[name.name] = value - for name, new_expr in zip(assign_names, new_assign_exprs): - new_assignments[name] = new_expr + self.assignments[name] = new_expr - self.assignments = new_assignments + for name, value in new_assignments: + assert isinstance(name, sym.Symbol) + self.add_assignment(name.name, value) for name, new_expr in zip(assign_names, new_assign_exprs): # We want the assignment collection to be ordered correctly diff --git a/sumpy/cse.py b/sumpy/cse.py index d93cb503d76b433ee0724b7054dc9ad2d84f501b..0da0eadc7f1c0afb4c29dafe1569f19b5c36000d 100644 --- a/sumpy/cse.py +++ b/sumpy/cse.py @@ -143,6 +143,7 @@ class FuncArgTracker: for func_i, func in enumerate(funcs): func_argset = set() + for func_arg in func.args: arg_number = self.get_or_add_value_number(func_arg) func_argset.add(arg_number) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 1865833f4caf228824fdec37ca57f9f8dee46d9a..adc12b452b80b1caefd088cfd32401ef86a882ee 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -81,7 +81,10 @@ class E2EBase(KernelCacheWrapper): self.tgt_expansion = tgt_expansion self.name = name or self.default_name self.device = device - self.use_fft = self.tgt_expansion.use_fft + try: + self.use_fft = self.tgt_expansion.use_fft + except AttributeError: + self.use_fft = False if src_expansion.dim != tgt_expansion.dim: raise ValueError("source and target expansions must have " @@ -223,7 +226,8 @@ class E2EFromCSR(E2EBase): loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) - loopy_knl = lp.set_options(loopy_knl, enforce_variable_access_ordered="no_check") + loopy_knl = lp.set_options(loopy_knl, + enforce_variable_access_ordered="no_check") return loopy_knl @@ -250,152 +254,6 @@ class E2EFromCSR(E2EBase): **kwargs) -class E2EFromCSRTranslationInvariant(E2EFromCSR): - """Implements translation from a "compressed sparse row"-like source box - list for translation invariant Kernels. - """ - default_name = "e2e_from_csr_translation_invariant" - - def get_translation_loopy_insns(self): - from sumpy.symbolic import make_sym_vector - dvec = make_sym_vector("d", self.dim) - - src_coeff_exprs = [sym.Symbol("src_coeff%d" % i) - for i in range(len(self.src_expansion))] - src_rscale = sym.Symbol("src_rscale") - - tgt_rscale = sym.Symbol("tgt_rscale") - - nprecomputed_exprs = \ - self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) - - precomputed_exprs = [sym.Symbol("precomputed_expr%d" % i) - for i in range(nprecomputed_exprs)] - - from sumpy.assignment_collection import SymbolicAssignmentCollection - sac = SymbolicAssignmentCollection() - tgt_coeff_names = [ - sac.assign_unique("coeff%d" % i, coeff_i) - for i, coeff_i in enumerate( - self.tgt_expansion.translate_from( - self.src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale, sac, - precomputed_exprs=precomputed_exprs))] - - sac.run_global_cse() - - from sumpy.codegen import to_loopy_insns - return to_loopy_insns( - sac.assignments.items(), - vector_names=set(["d"]), - pymbolic_expr_maps=[self.tgt_expansion.get_code_transformer()], - retain_names=tgt_coeff_names, - complex_dtype=np.complex128 # FIXME - ) - - def get_kernel(self): - ncoeff_src = len(self.src_expansion) - ncoeff_tgt = len(self.tgt_expansion) - nprecomputed_exprs = \ - self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) - - # To clarify terminology: - # - # isrc_box -> The index in a list of (in this case, source) boxes - # src_ibox -> The (global) box number for the (in this case, source) box - # - # (same for itgt_box, tgt_ibox) - - from sumpy.tools import gather_loopy_arguments - loopy_knl = lp.make_kernel( - [ - "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", - "{[isrc_box]: isrc_start<=isrc_box<isrc_stop}", - "{[idim]: 0<=idim<dim}", - ], - [""" - for itgt_box - <> tgt_ibox = target_boxes[itgt_box] - - <> tgt_center[idim] = centers[idim, tgt_ibox] \ - - <> isrc_start = src_box_starts[itgt_box] - <> isrc_stop = src_box_starts[itgt_box+1] - - for isrc_box - <> src_ibox = src_box_lists[isrc_box] \ - {id=read_src_ibox} - - <> src_center[idim] = centers[idim, src_ibox] {dup=idim} - <> d[idim] = tgt_center[idim] - src_center[idim] \ - {dup=idim} - <> translation_class = \ - m2l_translation_classes_lists[isrc_box] - <> translation_class_rel = translation_class - \ - translation_classes_level_start - """] + [""" - <> precomputed_expr{idx} = \ - m2l_precomputed_exprs[translation_class_rel, {idx}] - """.format(idx=idx) for idx in range( - nprecomputed_exprs)] + [""" - <> src_coeff{coeffidx} = \ - src_expansions[src_ibox - src_base_ibox, {coeffidx}] \ - {{dep=read_src_ibox}} - """.format(coeffidx=i) for i in range(ncoeff_src)] + [ - - ] + self.get_translation_loopy_insns() + [""" - end - - """] + [""" - tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \ - simul_reduce(sum, isrc_box, coeff{coeffidx}) \ - {{id_prefix=write_expn}} - """.format(coeffidx=i) for i in range(ncoeff_tgt)] + [""" - end - """], - [ - lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), - lp.ValueArg("src_rscale,tgt_rscale", None), - lp.GlobalArg("src_box_starts, src_box_lists", - None, shape=None, strides=(1,), offset=lp.auto), - lp.ValueArg("aligned_nboxes,tgt_base_ibox,src_base_ibox", - np.int32), - lp.ValueArg("nsrc_level_boxes,ntgt_level_boxes", - np.int32), - lp.ValueArg("translation_classes_level_start", - np.int32), - lp.GlobalArg("src_expansions", None, - shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto), - lp.GlobalArg("tgt_expansions", None, - shape=("ntgt_level_boxes", ncoeff_tgt), offset=lp.auto), - lp.ValueArg("ntranslation_classes, ntranslation_classes_lists", - np.int32), - lp.GlobalArg("m2l_translation_classes_lists", np.int32, - shape=("ntranslation_classes_lists"), strides=(1,), - offset=lp.auto), - lp.GlobalArg("m2l_precomputed_exprs", None, - shape=("ntranslation_classes", nprecomputed_exprs), - offset=lp.auto), - "..." - ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), - name=self.name, - assumptions="ntgt_boxes>=1", - silenced_warnings="write_race(write_expn*)", - default_offset=lp.auto, - fixed_parameters=dict(dim=self.dim, - nprecomputed_exprs=nprecomputed_exprs), - lang_version=MOST_RECENT_LANGUAGE_VERSION - ) - - for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: - loopy_knl = knl.prepare_loopy_kernel(loopy_knl) - - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) - - return loopy_knl - - class E2EFromCSRTranslationClassesPrecompute(E2EFromCSR): """Implements precomputing the translation classes dependent derivatives. @@ -549,7 +407,7 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): from sumpy.codegen import to_loopy_insns return to_loopy_insns( - six.iteritems(sac.assignments), + sac.assignments.items(), vector_names=set(["d"]), pymbolic_expr_maps=[self.tgt_expansion.get_code_transformer()], retain_names=tgt_coeff_names, @@ -578,10 +436,10 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): if self.use_fft: tgt_coeff_post_exprs = self.tgt_expansion.m2l_postprocess_exprs( self.src_expansion, tgt_coeff_exprs, src_rscale, tgt_rscale, - sac=sac, use_fft=self.use_fft) + sac=sac) if result_dtype in (np.float32, np.float64): real_func = sym.Function("real") - tgt_coeff_post_exprs = [real_func(expr) for expr in \ + tgt_coeff_post_exprs = [real_func(expr) for expr in tgt_coeff_post_exprs] else: tgt_coeff_post_exprs = tgt_coeff_exprs @@ -595,7 +453,7 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): from sumpy.codegen import to_loopy_insns insns = to_loopy_insns( - six.iteritems(sac.assignments), + sac.assignments.items(), vector_names=set(["d"]), pymbolic_expr_maps=[self.tgt_expansion.get_code_transformer()], retain_names=tgt_coeff_post_names, @@ -603,14 +461,12 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): ) return insns - def get_kernel(self, result_dtype): ncoeff_src = len(self.src_expansion) ncoeff_tgt = len(self.tgt_expansion) ncoeff_tgt_post = len(self.tgt_expansion) nprecomputed_exprs = \ - self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion, - use_fft=self.use_fft) + self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) if self.use_fft: ncoeff_src = nprecomputed_exprs @@ -706,12 +562,13 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): lang_version=MOST_RECENT_LANGUAGE_VERSION ) - for expn in [self.src_expansion, self.tgt_expansion]: + for expn in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) - loopy_knl = lp.set_options(loopy_knl, enforce_variable_access_ordered="no_check") + loopy_knl = lp.set_options(loopy_knl, + enforce_variable_access_ordered="no_check") return loopy_knl def get_optimized_kernel(self, result_dtype): @@ -725,7 +582,6 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): pickle.dump(knl, f) return knl - def __call__(self, queue, **kwargs): """ :arg src_expansions: @@ -753,116 +609,6 @@ class E2EFromCSRTranslationInvariant(E2EFromCSR): tgt_expansions=tgt_expansions, **kwargs) -class E2EFromCSRTranslationClassesPrecompute(E2EFromCSR): - """Implements precomputing the translation classes dependent - derivatives. - """ - default_name = "e2e_from_csr_translation_classes_precompute" - - def get_translation_loopy_insns(self, result_dtype): - from sumpy.symbolic import make_sym_vector - dvec = make_sym_vector("d", self.dim) - - src_rscale = sym.Symbol("src_rscale") - tgt_rscale = sym.Symbol("tgt_rscale") - - from sumpy.assignment_collection import SymbolicAssignmentCollection - sac = SymbolicAssignmentCollection() - tgt_coeff_names = [ - sac.assign_unique("precomputed_expr%d" % i, coeff_i) - for i, coeff_i in enumerate( - self.tgt_expansion.m2l_global_precompute_exprs( - self.src_expansion, src_rscale, dvec, tgt_rscale, - sac=sac))] - - sac.run_global_cse() - - from sumpy.codegen import to_loopy_insns - return to_loopy_insns( - six.iteritems(sac.assignments), - vector_names=set(["d"]), - pymbolic_expr_maps=[self.tgt_expansion.get_code_transformer()], - retain_names=tgt_coeff_names, - complex_dtype=to_complex_dtype(result_dtype), - ) - - def get_kernel(self, result_dtype): - nprecomputed_exprs = \ - self.tgt_expansion.m2l_global_precompute_nexpr(self.src_expansion) - from sumpy.tools import gather_loopy_arguments - loopy_knl = lp.make_kernel( - [ - "{[itr_class]: 0<=itr_class<ntranslation_classes}", - "{[idim]: 0<=idim<dim}", - ], - [""" - for itr_class - <> d[idim] = m2l_translation_vectors[idim, \ - itr_class + translation_classes_level_start] {dup=idim} - - """] + self.get_translation_loopy_insns(result_dtype) + [""" - m2l_precomputed_exprs[itr_class, {idx}] = precomputed_expr{idx} - """.format(idx=i) for i in range(nprecomputed_exprs)] + [""" - end - """], - [ - lp.ValueArg("src_rscale", None), - lp.GlobalArg("m2l_precomputed_exprs", None, - shape=("ntranslation_classes", nprecomputed_exprs), - offset=lp.auto), - lp.GlobalArg("m2l_translation_vectors", None, - shape=("dim", "ntranslation_vectors")), - lp.ValueArg("ntranslation_classes", np.int32), - lp.ValueArg("ntranslation_vectors", np.int32), - lp.ValueArg("translation_classes_level_start", np.int32), - "..." - ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), - name=self.name, - assumptions="ntranslation_classes>=1", - default_offset=lp.auto, - fixed_parameters=dict(dim=self.dim), - lang_version=MOST_RECENT_LANGUAGE_VERSION - ) - - for expn in [self.src_expansion, self.tgt_expansion]: - loopy_knl = expn.prepare_loopy_kernel(loopy_knl) - - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) - loopy_knl = lp.set_options(loopy_knl, enforce_variable_access_ordered="no_check") - - return loopy_knl - - def get_optimized_kernel(self, result_dtype): - # FIXME - knl = self.get_kernel(result_dtype=result_dtype) - knl = lp.split_iname(knl, "itr_class", 16, outer_tag="g.0") - - return knl - - def __call__(self, queue, **kwargs): - """ - :arg src_rscale: - :arg translation_classes_level_start: - :arg ntranslation_classes: - :arg m2l_precomputed_exprs: - :arg m2l_translation_vectors: - """ - m2l_translation_vectors = kwargs.pop("m2l_translation_vectors") - # "1" may be passed for rscale, which won't have its type - # meaningfully inferred. Make the type of rscale explicit. - src_rscale = m2l_translation_vectors.dtype.type(kwargs.pop("src_rscale")) - - m2l_precomputed_exprs = kwargs.pop("m2l_precomputed_exprs") - result_dtype = m2l_precomputed_exprs.dtype - knl = self.get_cached_optimized_kernel(result_dtype=result_dtype) - - return knl(queue, - src_rscale=src_rscale, - m2l_translation_vectors=m2l_translation_vectors, - m2l_precomputed_exprs=m2l_precomputed_exprs, - **kwargs) - class E2EFromCSRWithFFTPreprocess(E2EFromCSR): """Implements preprocessing the multipole expansions @@ -890,7 +636,7 @@ class E2EFromCSRWithFFTPreprocess(E2EFromCSR): from sumpy.codegen import to_loopy_insns return to_loopy_insns( - six.iteritems(sac.assignments), + sac.assignments.items(), vector_names=set(["d"]), pymbolic_expr_maps=[self.tgt_expansion.get_code_transformer()], retain_names=pp_src_coeff_names, @@ -932,10 +678,11 @@ class E2EFromCSRWithFFTPreprocess(E2EFromCSR): lang_version=MOST_RECENT_LANGUAGE_VERSION ) - for expn in [self.src_expansion, self.tgt_expansion]: + for expn in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) - loopy_knl = lp.set_options(loopy_knl, enforce_variable_access_ordered="no_check") + loopy_knl = lp.set_options(loopy_knl, + enforce_variable_access_ordered="no_check") return loopy_knl def get_optimized_kernel(self, result_dtype): @@ -1047,7 +794,8 @@ class E2EFromChildren(E2EBase): loopy_knl = knl.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - loopy_knl = lp.set_options(loopy_knl, enforce_variable_access_ordered="no_check") + loopy_knl = lp.set_options(loopy_knl, + enforce_variable_access_ordered="no_check") return loopy_knl diff --git a/sumpy/expansion/__init__.py b/sumpy/expansion/__init__.py index c8e4400b3d84a72b5c3da3865bf8583ca2ae40cc..79bc9cb8e79139f4b2470d4b88c69018992ca7f7 100644 --- a/sumpy/expansion/__init__.py +++ b/sumpy/expansion/__init__.py @@ -53,7 +53,6 @@ class ExpansionBase: .. automethod:: translate_from .. automethod:: __eq__ .. automethod:: __ne__ - .. automethod:: get_kernel_derivative_taker """ init_arg_names = ("kernel", "order", "use_rscale") @@ -115,8 +114,8 @@ class ExpansionBase: :arg avec: vector from source to center. :arg bvec: vector from center to target. Not usually necessary, except for line-Taylor expansion. - :arg sac: A SymbolicAssignmentCollction used for storing - temporary expressions. + :arg sac: a symbolic assignment collection where temporary + expressions are stored. :returns: a list of :mod:`sympy` expressions representing the coefficients of the expansion. @@ -145,9 +144,6 @@ class ExpansionBase: def evaluate(self, kernel, coeffs, bvec, rscale, sac=None): """ - :arg sac: A SymbolicAssignmentCollction used for storing - temporary expressions. - :return: a :mod:`sympy` expression corresponding to the evaluated expansion with the coefficients in *coeffs*. @@ -189,6 +185,7 @@ class ExpansionBase: return type(self)(**new_kwargs) + # }}} @@ -207,11 +204,11 @@ class ExpansionTermsWrangler: raise NotImplementedError def get_full_kernel_derivatives_from_stored(self, stored_kernel_derivatives, - rscale): + rscale, sac=None): raise NotImplementedError def get_stored_mpole_coefficients_from_full(self, full_mpole_coefficients, - rscale): + rscale, sac=None): raise NotImplementedError @memoize_method @@ -324,7 +321,7 @@ class FullExpansionTermsWrangler(ExpansionTermsWrangler): ExpansionTermsWrangler.get_full_coefficient_identifiers) def get_full_kernel_derivatives_from_stored(self, stored_kernel_derivatives, - rscale): + rscale, sac=None): return stored_kernel_derivatives get_stored_mpole_coefficients_from_full = ( @@ -690,11 +687,6 @@ class LaplaceConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): def __init__(self, kernel, order, use_rscale): self.expansion_terms_wrangler_key = (order, kernel.dim) - def get_kernel_derivative_taker(self, dvec, rscale, sac): - from sumpy.tools import LaplaceDerivativeTaker - return LaplaceDerivativeTaker(self.kernel.get_expression(dvec), dvec, - rscale, sac) - class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): @@ -706,11 +698,6 @@ class HelmholtzConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): helmholtz_k_name = kernel.get_base_kernel().helmholtz_k_name self.expansion_terms_wrangler_key = (order, kernel.dim, helmholtz_k_name) - def get_kernel_derivative_taker(self, dvec, rscale, sac): - from sumpy.tools import HelmholtzYukawaDerivativeTaker - return HelmholtzYukawaDerivativeTaker(self.kernel.get_expression(dvec), dvec, - rscale, sac) - class BiharmonicConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): @@ -721,6 +708,7 @@ class BiharmonicConformingVolumeTaylorExpansion(VolumeTaylorExpansionBase): def __init__(self, kernel, order, use_rscale): self.expansion_terms_wrangler_key = (order, kernel.dim) + # }}} diff --git a/sumpy/expansion/local.py b/sumpy/expansion/local.py index bffeca35a214e4ce2b196e577ad79ced20dfffc9..fc1b9ae5b33c83f2e91ce427afb8edf8a49f9340 100644 --- a/sumpy/expansion/local.py +++ b/sumpy/expansion/local.py @@ -243,7 +243,6 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): toeplitz_matrix_ident_to_index = dict((ident, i) for i, ident in enumerate(toeplitz_matrix_coeffs)) ->>>>>>> 4b95905766d950f99b371222b93ff36b1b12358b # Create a expansion terms wrangler for derivatives up to order # (tgt order)+(src order) including a corresponding reduction matrix @@ -284,7 +283,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): derivatives_full = [0]*len(toeplitz_matrix_coeffs) for expr, mi in zip(vector, needed_vector_terms): derivatives_full[toeplitz_matrix_ident_to_index[mi]] = expr - return self._fft(list(reversed(derivatives_full)), sac=sac) + return fft(list(reversed(derivatives_full)), sac=sac) return vector @@ -304,7 +303,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): add_to_sac(sac, coeff) if self.use_fft: - return self._fft(toeplitz_first_row, sac=sac) + return fft(toeplitz_first_row, sac=sac) else: return toeplitz_first_row @@ -372,7 +371,7 @@ class VolumeTaylorLocalExpansionBase(LocalExpansionBase): derivatives_full[toeplitz_matrix_ident_to_index[mi]] = expr toeplitz_first_row = self.m2l_preprocess_exprs(src_expansion, - src_coeff_exprs, sac) + src_coeff_exprs, sac, src_rscale) # Do the matvec if 0: @@ -593,8 +592,6 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): def evaluate(self, kernel, coeffs, bvec, rscale, sac=None): if not self.use_rscale: rscale = 1 - if knl is None: - knl = self.kernel from sumpy.symbolic import sym_real_norm_2 bessel_j = sym.Function("bessel_j") @@ -635,6 +632,8 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): if self.use_fft: order = self.order first, last = precomputed_exprs[:2*order], precomputed_exprs[2*order:] + #first, last = precomputed_exprs[:2*order+1], precomputed_exprs[2*order+1:] + #return fft(list(reversed(first))+list(reversed(last)), sac) return fft(list(last)+list(first), sac) return precomputed_exprs @@ -659,14 +658,13 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): tgt_rscale, sac): if self.use_fft: - n = 2 * self.order + 1 m2l_result = fft(m2l_result, inverse=True, sac=sac) m2l_result = m2l_result[:2*self.order+1] # Filter out the dummy rows and scale them for target result = [] for j in self.get_coefficient_identifiers(): - result.append(m2l_result[j + self.order] \ + result.append(m2l_result[j + self.order] * tgt_rscale**(abs(j)) * sym.Integer(-1)**j) return result @@ -700,7 +698,7 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): if isinstance(src_expansion, self.mpole_expn_class): if precomputed_exprs is None: derivatives = self.m2l_global_precompute_exprs(src_expansion, - src_rscale, dvec, tgt_rscale) + src_rscale, dvec, tgt_rscale, sac=sac) else: derivatives = precomputed_exprs @@ -712,17 +710,17 @@ class _FourierBesselLocalExpansion(LocalExpansionBase): translated_coeffs.append(a * b) return translated_coeffs - src_coeff_exprs = self.m2l_preprocess_exprs(src_expansion, src_coeff_exprs, sac, - src_rscale) + src_coeff_exprs = self.m2l_preprocess_exprs(src_expansion, + src_coeff_exprs, sac, src_rscale) for j in self.get_coefficient_identifiers(): - translated_coeff.append( + translated_coeffs.append( sum(derivatives[m + j + 2*self.order] * src_coeff_exprs[src_expansion.get_storage_index(m)] for m in src_expansion.get_coefficient_identifiers())) - translated_coeffs = self.m2l_postprocess_exprs(src_expansion, translated_coeffs, src_rscale, - tgt_rscale, sac) + translated_coeffs = self.m2l_postprocess_exprs(src_expansion, + translated_coeffs, src_rscale, tgt_rscale, sac) return translated_coeffs raise RuntimeError("do not know how to translate %s to %s" @@ -737,6 +735,7 @@ class H2DLocalExpansion(_FourierBesselLocalExpansion): and kernel.dim == 2) super().__init__(kernel, order, use_rscale, use_fft=use_fft) + print(use_fft) from sumpy.expansion.multipole import H2DMultipoleExpansion self.mpole_expn_class = H2DMultipoleExpansion diff --git a/sumpy/expansion/multipole.py b/sumpy/expansion/multipole.py index 240385e711cbae9ed228dd26f9e9e091ddf1764f..b9b1a07700b1019d18ea06b2613cd9acca4f251c 100644 --- a/sumpy/expansion/multipole.py +++ b/sumpy/expansion/multipole.py @@ -413,8 +413,6 @@ class _HankelBased2DMultipoleExpansion(MultipoleExpansionBase): def evaluate(self, kernel, coeffs, bvec, rscale, sac=None): if not self.use_rscale: rscale = 1 - if knl is None: - knl = self.kernel from sumpy.symbolic import sym_real_norm_2 hankel_1 = sym.Function("hankel_1") diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 4611814b2e1a599e3979e3019fe42d4e80d85791..96e223333248868b2df1c4a9cdb5af0a94f46405 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -99,6 +99,7 @@ class SumpyExpansionWranglerCodeContainer: @memoize_method def local_expansion(self, order): + print(self.use_fft) return self.local_expansion_factory(order, self.use_rscale, use_fft=self.use_fft) @@ -132,19 +133,19 @@ class SumpyExpansionWranglerCodeContainer: def m2l_optimized(self, src_order, tgt_order): return E2EFromCSRTranslationInvariant(self.cl_context, self.multipole_expansion(src_order), - self.local_expansion(tgt_order), use_fft=self.use_fft) + self.local_expansion(tgt_order)) @memoize_method def m2l_optimized_precompute_kernel(self, src_order, tgt_order): return E2EFromCSRTranslationClassesPrecompute(self.cl_context, self.multipole_expansion(src_order), - self.local_expansion(tgt_order), use_fft=self.use_fft) + self.local_expansion(tgt_order)) @memoize_method def m2l_preprocess_kernel(self, src_order, tgt_order): return E2EFromCSRWithFFTPreprocess(self.cl_context, self.multipole_expansion(src_order), - self.local_expansion(tgt_order), use_fft=self.use_fft) + self.local_expansion(tgt_order)) @memoize_method def l2l(self, src_order, tgt_order): @@ -178,8 +179,7 @@ class SumpyExpansionWranglerCodeContainer: translation_classes_data=None): return SumpyExpansionWrangler(self, queue, tree, dtype, fmm_level_to_order, source_extra_kwargs, kernel_extra_kwargs, self_extra_kwargs, - translation_classes_data=translation_classes_data, - use_fft=self.use_fft) + translation_classes_data=translation_classes_data) # }}} @@ -366,7 +366,7 @@ class SumpyExpansionWrangler: "unoptimized. Supply a translation_classes_data argument " "to the wrangler for optimized List 2.", SumpyTranslationClassesDataNotSuppliedWarning, - stacklevel=2) + stacklevel=2) self.supports_optimized_m2l = False else: self.supports_optimized_m2l = True @@ -374,6 +374,7 @@ class SumpyExpansionWrangler: self.supports_optimized_m2l = False self.translation_classes_data = translation_classes_data + self.use_fft = self.code.use_fft # {{{ data vector utilities @@ -463,9 +464,10 @@ class SumpyExpansionWrangler: @memoize_method def pp_multipole_expansions_level_starts(self): def order_to_size(order): - mpole_expn = self.code.multipole_expansion_factory(order) - local_expn = self.code.local_expansion_factory(order) - return local_expn.m2l_global_precompute_nexpr(mpole_expn) + mpole_expn = self.code.multipole_expansion(order) + local_expn = self.code.local_expansion(order) + res = local_expn.m2l_global_precompute_nexpr(mpole_expn) + return res return self._expansions_level_starts(order_to_size, level_starts=self.tree.level_start_box_nrs) @@ -746,6 +748,9 @@ class SumpyExpansionWrangler: ) events.append(evt) mpole_exps = pp_mpole_exps + mpole_exps_view_func = self.pp_multipole_expansions_view + else: + mpole_exps_view_func = self.multipole_expansions_view local_exps = self.local_expansion_zeros() @@ -759,7 +764,7 @@ class SumpyExpansionWrangler: m2l = self.m2l_class(order, order) source_level_start_ibox, source_mpoles_view = \ - self.m2l_mpole_expansions_view(mpole_exps, lev) + mpole_exps_view_func(mpole_exps, lev) target_level_start_ibox, target_local_exps_view = \ self.local_expansions_view(local_exps, lev) diff --git a/sumpy/kernel.py b/sumpy/kernel.py index 00699ac13cddddeb77ab7b81e351b1ac28d6689c..494a6107248004883cd70c9e79dc58474c8d74ea 100644 --- a/sumpy/kernel.py +++ b/sumpy/kernel.py @@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ + import loopy as lp import numpy as np from pymbolic.mapper import IdentityMapper, CSECachingMapperMixin @@ -370,8 +371,6 @@ class ExpressionKernel(Kernel): key_builder.rec(key_hash, value) mapper_method = "map_expression_kernel" - # TODO: Allow kernels that are not translation invariant - is_translation_invariant = True def get_derivative_taker(self, dvec, rscale, sac): """Return a :class:`sumpy.tools.ExprDerivativeTaker` instance that supports @@ -424,7 +423,6 @@ class LaplaceKernel(ExpressionKernel): return "LapKnl%dD" % self.dim mapper_method = "map_laplace_kernel" - is_translation_invariant = True def get_derivative_taker(self, dvec, rscale, sac): """Return a :class:`sumpy.tools.ExprDerivativeTaker` instance that supports @@ -468,7 +466,6 @@ class BiharmonicKernel(ExpressionKernel): return "BiharmKnl%dD" % self.dim mapper_method = "map_biharmonic_kernel" - is_translation_invariant = True def get_derivative_taker(self, dvec, rscale, sac): """Return a :class:`sumpy.tools.ExprDerivativeTaker` instance that supports @@ -547,7 +544,6 @@ class HelmholtzKernel(ExpressionKernel): )] mapper_method = "map_helmholtz_kernel" - is_translation_invariant = True def get_derivative_taker(self, dvec, rscale, sac): """Return a :class:`sumpy.tools.ExprDerivativeTaker` instance that supports @@ -629,7 +625,6 @@ class YukawaKernel(ExpressionKernel): )] mapper_method = "map_yukawa_kernel" - is_translation_invariant = True def get_derivative_taker(self, dvec, rscale, sac): """Return a :class:`sumpy.tools.ExprDerivativeTaker` instance that supports @@ -704,7 +699,6 @@ class StokesletKernel(ExpressionKernel): )] mapper_method = "map_stokeslet_kernel" - is_translation_invariant = True class StressletKernel(ExpressionKernel): diff --git a/sumpy/tools.py b/sumpy/tools.py index 79eb333de04920583c260a49ab69bda56f312200..60672ce0b741c121e635ceb1362721363bb64d2b 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -71,14 +71,10 @@ __doc__ = """ from pytools import memoize_method, memoize_in from pytools.tag import Tag, tag_dataclass -import math import numbers import numpy as np import sumpy.symbolic as sym -import numbers -import math -from collections import namedtuple import pyopencl as cl import pyopencl.array # noqa @@ -1165,109 +1161,6 @@ def find_linear_relationship(matrix): # }}} -def matvec_toeplitz_upper_triangular(first_row, vector): - n = len(first_row) - assert len(vector) == n - output = [0]*n - for row in range(n): - terms = tuple(first_row[col-row]*vector[col] for col in range(row, n)) - output[row] = sym.Add(*terms) - return output - - -# {{{ FFT - -def _complex_tuple_mul(a, b, sac=None): - """ - Multiply the two complex numbers represented as a tuple - for real and imaginary parts - """ - return (add_to_sac(sac, (a[0]*b[0])-(a[1]*b[1])), - add_to_sac(sac, (a[0]*b[1])+(a[1]*b[0]))) - - -def _binary_reverse(n, bits): - # Returns the reverse of the number n in binary form with bits - # number of bits - b = bin(n)[2:].rjust(bits, "0") - return int(b[::-1], 2) - - -def fft(seq, inverse=False, sac=None): - """ - Return the discrete fourier transform of the sequence seq. - seq should be a python iterable with tuples of length 2 - corresponding to the real part and imaginary part. - """ - - a = seq - n = len(a) - if n < 2: - return a - - b = n.bit_length() - 1 - if n & (n - 1): # not a power of 2 - b += 1 - n = 2**b - - a += [(0, 0)]*(n - len(a)) - for i in range(1, n): - j = _binary_reverse(i, b) - if i < j: - a[i], a[j] = a[j], a[i] - ang = 2*math.pi/n - # Rewrite cosines and sines using cosines of angle in the first quadrant - # This is to reduce duplicate of floating point numbers with 1 ULP difference - # and also make sure quantities like cos(pi/2) - sin(pi/2) produces 0 exactly. - w = [(math.cos(ang*i), math.cos(ang*(n/4.0 - i))) for i in range((n + 3)//4)] - w[0] = (1, 0) - w += [(-math.cos(ang*(n/2 - i)), math.cos(ang*(i - n/4.0))) for - i in range((n + 3)//4, n//2)] - if n % 4 == 0: - w[n//4] = (0, 1) - if inverse: - w = [(x[0], -x[1]) for x in w] - h = 2 - while h <= n: - hf, ut = h // 2, n // h - for i in range(0, n, h): - for j in range(hf): - u, v = a[i + j], _complex_tuple_mul(a[i + j + hf], w[ut * j], - sac=None) - a[i + j] = (u[0] + v[0], u[1] + v[1]) - a[i + j + hf] = (u[0] - v[0], u[1] - v[1]) - h *= 2 - - if inverse: - a = [(x[0]/n, x[1]/n) for x in a] - - return a - - -def fft_toeplitz_upper_triangular(first_row, x, sac=None): - """ - Returns the matvec of the Toeplitz matrix given by - the first row and the vector x using a Fourier transform - """ - assert len(first_row) == len(x) - n = len(first_row) - v = list(first_row) - v += [0]*(n-1) - - x = list(reversed(x)) - x += [0]*(n-1) - - v_fft = fft([(a, 0) for a in v], sac=sac) - x_fft = fft([(a, 0) for a in x], sac=sac) - res_fft = [_complex_tuple_mul(a, b, sac=sac) for a, b in zip(v_fft, x_fft)] - res = fft(res_fft, inverse=True, sac=sac) - return [a for a, _ in reversed(res[:n])] - -# }}} - -# }}} - - # {{{ FFT def fft(seq, inverse=False, sac=None): @@ -1277,13 +1170,13 @@ def fft(seq, inverse=False, sac=None): corresponding to the real part and imaginary part. """ - from pymbolic.algorithm import fft as _fft, ifft as _ifft + from pymbolic.algorithm import fft as _fft, ifft as _ifft def wrap(expr): if isinstance(expr, np.ndarray): res = [wrap(a) for a in expr] return np.array(res, dtype=object).reshape(expr.shape) - return add_to_sac(sac, expr, "temp_fft") + return add_to_sac(sac, expr) if inverse: return _ifft(list(seq), wrap_intermediate=wrap).tolist() @@ -1311,10 +1204,6 @@ def fft_toeplitz_upper_triangular(first_row, x, sac=None): return list(reversed(res[:n])) -def fft_toeplitz_upper_triangular_lwork(n): - return n - - def matvec_toeplitz_upper_triangular(first_row, vector): n = len(first_row) assert len(vector) == n diff --git a/test/test_fmm.py b/test/test_fmm.py index a4f0d5ec01d67238c7e3d0239c92bc7b7c7e1107..09587b30733e85a61aa3a38d891d65030c408254 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -54,7 +54,7 @@ else: @pytest.mark.parametrize( - "knl, local_expn_class, mpole_expn_class, optimized_m2l, use_fft", [ + "knl, local_expn_class, mpole_expn_class, optimized_m2l, use_fft", [ (LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, False, False), (LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, @@ -78,11 +78,13 @@ else: (HelmholtzKernel(2), HelmholtzConformingVolumeTaylorLocalExpansion, HelmholtzConformingVolumeTaylorMultipoleExpansion, False, False), (HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion, False, False), + (HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion, True, False), + (HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion, True, True), (HelmholtzKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, False, False), (HelmholtzKernel(3), HelmholtzConformingVolumeTaylorLocalExpansion, HelmholtzConformingVolumeTaylorMultipoleExpansion, False, False), - (YukawaKernel(2), Y2DLocalExpansion, Y2DMultipoleExpansion, False, True), + (YukawaKernel(2), Y2DLocalExpansion, Y2DMultipoleExpansion, False, False), ]) def test_sumpy_fmm(ctx_factory, knl, local_expn_class, mpole_expn_class, optimized_m2l, use_fft): @@ -360,7 +362,7 @@ def test_sumpy_fmm_timing_data_collection(ctx_factory): wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), - partial(local_expn_class, knl, use_fft=use_fft), + partial(local_expn_class, knl), target_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, @@ -512,7 +514,8 @@ def test_sumpy_axis_source_derivative(ctx_factory): # You can test individual routines by typing -# $ python test_fmm.py 'test_sumpy_fmm(cl.create_some_context)' +# $ python test_fmm.py 'test_sumpy_fmm(cl.create_some_context, LaplaceKernel(2), +# VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, False, False)' if __name__ == "__main__": if len(sys.argv) > 1: diff --git a/test/test_kernels.py b/test/test_kernels.py index 3aa01bfc0dd9b3f2fce3c87d8b10224d5e7de9b4..7efa44f4ebcc038151b752b529f6518042c1fa6f 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -44,7 +44,6 @@ from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, AxisTargetDerivative, DirectionalSourceDerivative, BiharmonicKernel, StokesletKernel) import sumpy.symbolic as sym from pytools.convergence import PConvergenceVerifier -import sumpy.symbolic as sym import logging logger = logging.getLogger(__name__) @@ -456,72 +455,6 @@ def test_p2e2p(ctx_factory, base_knl, expn_class, order, with_source_derivative) assert eoc_rec_grad_x.order_estimate() > tgt_order_grad - grad_slack -# {{{ test toeplitz - -def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rscale, - dvec, tgt_rscale): - - if not tgt_expansion.use_rscale: - src_rscale = 1 - tgt_rscale = 1 - - from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansionBase - if not isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase): - return 1 - - # We know the general form of the multipole expansion is: - # - # coeff0 * diff(kernel, mi0) + coeff1 * diff(kernel, mi1) + ... - # - # To get the local expansion coefficients, we take derivatives of - # the multipole expansion. - taker = src_expansion.get_kernel_derivative_taker(dvec, src_rscale, sac=None) - - from sumpy.tools import add_mi - - result = [] - for deriv in tgt_expansion.get_coefficient_identifiers(): - local_result = [] - for coeff, term in zip( - src_coeff_exprs, - src_expansion.get_coefficient_identifiers()): - - kernel_deriv = taker.diff(add_mi(deriv, term)) / src_rscale**sum(deriv) - - local_result.append( - coeff * kernel_deriv * tgt_rscale**sum(deriv)) - result.append(sym.Add(*local_result)) - return result - - -def test_m2l_toeplitz(ctx_getter): - dim = 3 - knl = LaplaceKernel(dim) - local_expn_class = LaplaceConformingVolumeTaylorLocalExpansion - mpole_expn_class = LaplaceConformingVolumeTaylorMultipoleExpansion - - local_expn = local_expn_class(knl, order=5) - mpole_expn = mpole_expn_class(knl, order=5) - - dvec = sym.make_sym_vector("d", dim) - src_coeff_exprs = list(1 + np.random.randn(len(mpole_expn))) - src_rscale = 2.0 - tgt_rscale = 1.0 - - expected_output = _m2l_translate_simple(local_expn, mpole_expn, src_coeff_exprs, - src_rscale, dvec, tgt_rscale) - actual_output = local_expn.translate_from(mpole_expn, src_coeff_exprs, - src_rscale, dvec, tgt_rscale, sac=None) - - replace_dict = dict((d, np.random.rand(1)[0]) for d in dvec) - for sym_a, sym_b in zip(expected_output, actual_output): - num_a = sym_a.xreplace(replace_dict) - num_b = sym_b.xreplace(replace_dict) - assert abs(num_a - num_b)/abs(num_a) < 1e-10 - - -# }}} - @pytest.mark.parametrize("knl, local_expn_class, mpole_expn_class", [ (LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion), (LaplaceKernel(2), LaplaceConformingVolumeTaylorLocalExpansion,