From 583a4b99c2e8e3ccfd86416cebecb1913f54a273 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Tue, 23 May 2023 11:08:12 -0500 Subject: [PATCH] fp_contract(fast) for pocl CUDA --- sumpy/codegen.py | 14 ++++++++++++++ sumpy/p2p.py | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/sumpy/codegen.py b/sumpy/codegen.py index fa13d526..3b19fcb9 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -208,6 +208,20 @@ def register_bessel_callables(loopy_knl): Hankel1_01("hank1_01")) return loopy_knl + +def _fp_contract_fast_preamble(preamble_info): + yield ("fp_contract_fast_pocl", "#pragma clang fp contract(fast)") + + +def register_optimization_preambles(loopy_knl, device): + if isinstance(loopy_knl.target, lp.PyOpenCLTarget): + import pyopencl as cl + if device.platform.name == "Portable Computing Language" and \ + (device.type & cl.device_type.GPU): + loopy_knl = lp.register_preamble_generators(loopy_knl, + [_fp_contract_fast_preamble]) + return loopy_knl + # }}} diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 4be5916d..d03d8bd1 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -190,6 +190,9 @@ class P2PBase(KernelCacheMixin, KernelComputation): knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") + from sumpy.codegen import register_optimization_preambles + knl = register_optimization_preambles(knl, self.device) + return knl @@ -714,6 +717,9 @@ class P2PFromCSR(P2PBase): knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") + from sumpy.codegen import register_optimization_preambles + knl = register_optimization_preambles(knl, self.device) + return knl def __call__(self, queue, **kwargs): -- GitLab