diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9d960a6f4eb920f920fb7398c1816e447e97e05e..6ee07ae45d90f7fa66cf6d8ca7e219d5cde2017b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -53,6 +53,19 @@ Python 3.5 POCL: except: - tags +Python 3.6 POCL: + script: + - export PY_EXE=python3.6 + - export PYOPENCL_TEST=portable + - export EXTRA_INSTALL="numpy mako" + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh + - ". ./build-and-test-py-project.sh" + tags: + - python3.6 + - pocl + except: + - tags + Documentation: script: - EXTRA_INSTALL="numpy mako" diff --git a/examples/sym-exp-complexity.py b/examples/sym-exp-complexity.py new file mode 100644 index 0000000000000000000000000000000000000000..bde21c42abdd2cf006903a968945c9ffe280cd26 --- /dev/null +++ b/examples/sym-exp-complexity.py @@ -0,0 +1,88 @@ +import numpy as np +import pyopencl as cl +import loopy as lp +from sumpy.kernel import LaplaceKernel, HelmholtzKernel +from sumpy.expansion.local import ( + LaplaceConformingVolumeTaylorLocalExpansion, + HelmholtzConformingVolumeTaylorLocalExpansion, + ) +from sumpy.expansion.multipole import ( + LaplaceConformingVolumeTaylorMultipoleExpansion, + HelmholtzConformingVolumeTaylorMultipoleExpansion, + ) +from sumpy.e2e import E2EFromCSR + + +def find_flops(): + ctx = cl.create_some_context() + + if 0: + knl = LaplaceKernel(2) + m_expn_cls = LaplaceConformingVolumeTaylorMultipoleExpansion + l_expn_cls = LaplaceConformingVolumeTaylorLocalExpansion + flop_type = np.float64 + else: + knl = HelmholtzKernel(2) + m_expn_cls = HelmholtzConformingVolumeTaylorMultipoleExpansion + l_expn_cls = HelmholtzConformingVolumeTaylorLocalExpansion + flop_type = np.complex128 + + orders = list(range(1, 11, 1)) + flop_counts = [] + for order in orders: + print(order) + m_expn = m_expn_cls(knl, order) + l_expn = l_expn_cls(knl, order) + m2l = E2EFromCSR(ctx, m_expn, l_expn) + + loopy_knl = m2l.get_kernel() + loopy_knl = lp.add_and_infer_dtypes( + loopy_knl, + { + "target_boxes,src_box_lists,src_box_starts": np.int32, + "centers,src_expansions": np.float64, + }) + + flops = lp.get_op_map(loopy_knl).filter_by(dtype=[flop_type]).sum() + flop_counts.append( + flops.eval_with_dict( + dict(isrc_start=0, isrc_stop=1, ntgt_boxes=1))) + + print(orders) + print(flop_counts) + + +def plot_flops(): + if 0: + case = "3D Laplace M2L" + orders = [1, 2, 3, 4, 5, 6, 7, 8, 9] + flops = [62, 300, 914, 2221, 4567, 8405, 14172, 22538, 34113] + filename = "laplace-m2l-complexity-3d.pdf" + + elif 0: + case = "2D Laplace M2L" + orders = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20] + flops = [36, 99, 193, 319, 476, 665, 889, 1143, 1429, 1747, 2097, 2479, 2893, + 3339, 3817, 4327, 4869, 5443, 6049, 6687] + filename = "laplace-m2l-complexity-2d.pdf" + elif 1: + case = "2D Helmholtz M2L" + orders = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + flops = [45, 194, 474, 931, 1650, 2632, 3925, 5591, 7706, 10272] + filename = "helmholtz-m2l-complexity-2d.pdf" + + import matplotlib.pyplot as plt + plt.rc("font", size=16) + plt.title(case) + plt.ylabel("Flop count") + plt.xlabel("Expansion order") + plt.loglog(orders, flops, "o-") + plt.grid() + plt.tight_layout() + plt.savefig(filename) + + +if __name__ == "__main__": + #find_flops() + plot_flops() diff --git a/sumpy/cse.py b/sumpy/cse.py index 350a2c8d97d6ef4729657c85248d412e4128f6a1..ad44fc41c8516dfc90587ce61cab6a3e5070e6ea 100644 --- a/sumpy/cse.py +++ b/sumpy/cse.py @@ -183,11 +183,32 @@ class FuncArgTracker(object): from collections import defaultdict count_map = defaultdict(lambda: 0) - for arg in argset: - for func_i in self.arg_to_funcset[arg]: + # Sorted by size to make best use of the performance hack below. + funcsets = sorted((self.arg_to_funcset[arg] for arg in argset), key=len) + + for funcset in funcsets[:-threshold+1]: + for func_i in funcset: if func_i >= min_func_i: count_map[func_i] += 1 + for i, funcset in enumerate(funcsets[-threshold+1:]): + # When looking at the tail end of the funcsets list, items below + # this threshold in the count_map don't have to be considered + # because they can't possibly be in the output. + count_map_threshold = i + 1 + + # We pick the smaller of the two containers to iterate over to + # reduce the number of items we have to look at. + (smaller_funcs_container, + larger_funcs_container) = sorted([funcset, count_map], key=len) + + for func_i in smaller_funcs_container: + if count_map[func_i] < count_map_threshold: + continue + + if func_i in larger_funcs_container: + count_map[func_i] += 1 + return dict( (k, v) for k, v in count_map.items() if v >= threshold) @@ -258,14 +279,14 @@ def match_common_args(func_class, funcs, opt_subs): from sumpy.tools import OrderedSet for i in range(len(funcs)): - common_arg_candidates = arg_tracker.get_common_arg_candidates( + common_arg_candidates_counts = arg_tracker.get_common_arg_candidates( arg_tracker.func_to_argset[i], i + 1, threshold=2) # Sort the candidates in order of match size. # This makes us try combining smaller matches first. common_arg_candidates = OrderedSet(sorted( - common_arg_candidates.keys(), - key=lambda k: (common_arg_candidates[k], k))) + common_arg_candidates_counts.keys(), + key=lambda k: (common_arg_candidates_counts[k], k))) while common_arg_candidates: j = common_arg_candidates.pop(last=False) diff --git a/test/test_kernels.py b/test/test_kernels.py index c6d93225cf32fc0d9910a376c57c4796434f0413..628e2893f3c67a18296badc0fe477dca67fcd4f6 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -1,7 +1,4 @@ -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function -from six.moves import range +from __future__ import division, absolute_import, print_function __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -25,6 +22,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +from six.moves import range + import numpy as np import numpy.linalg as la import sys