Skip to content
Snippets Groups Projects
Commit bc5c2e24 authored by Matt Wala's avatar Matt Wala
Browse files

Improve cache retrieval speeds for execution by using lazy unpickling

of instructions.

This requires caching the generation of invokers (which itself should
also give a speed boost), since invoker generation needed to analyze
the instructions in the kernel.
parent af7c67f1
No related branches found
No related tags found
1 merge request!242Reduce cache-to-execution latency by using lazy unpickling of instructions.
......@@ -1277,10 +1277,21 @@ class LoopKernel(ImmutableRecordWithoutPickling):
result = dict(
(key, getattr(self, key))
for key in self.__class__.fields
if hasattr(self, key))
if hasattr(self, key) and key != "instructions")
result.pop("cache_manager", None)
# Make the instructions lazily unpickling, to support faster
# cache retrieval for execution.
from loopy.kernel.instruction import _get_insn_eq_key, _get_insn_hash_key
from loopy.tools import (
LazilyUnpicklingListWithEqAndPersistentHashing as LazyList)
result["instructions"] = LazyList(
self.instructions,
eq_key_getter=_get_insn_eq_key,
persistent_hash_key_getter=_get_insn_hash_key)
# make sure that kernels are pickled with a cached hash key in place
from loopy.tools import LoopyKeyBuilder
LoopyKeyBuilder()(self)
......
......@@ -1388,4 +1388,16 @@ class BarrierInstruction(_DataObliviousInstruction):
# }}}
# {{{ key getters
def _get_insn_eq_key(insn):
return insn._key_builder.key()
def _get_insn_hash_key(insn):
return insn._key_builder.hash_key()
# }}}
# vim: foldmethod=marker
......@@ -382,8 +382,11 @@ class CKernelExecutor(KernelExecutorBase):
"""
self.compiler = compiler if compiler else CCompiler()
super(CKernelExecutor, self).__init__(kernel,
CExecutionWrapperGenerator())
super(CKernelExecutor, self).__init__(kernel)
def get_invoker_uncached(self, kernel, codegen_result):
generator = CExecutionWrapperGenerator
return generator(kernel, codegen_result)
@memoize_method
def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None):
......@@ -423,7 +426,7 @@ class CKernelExecutor(KernelExecutorBase):
kernel=kernel,
c_kernels=c_kernels,
implemented_data_info=codegen_result.implemented_data_info,
invoker=self.invoker(kernel, codegen_result))
invoker=self.get_invoker(kernel, codegen_result))
# }}}
......
......@@ -625,9 +625,10 @@ class ExecutionWrapperGeneratorBase(object):
:codegen_result: the loopy :class:`CodeGenerationResult` created
by code generation
:returns: py_func, a python function that handles excution of this
kernel
:returns: A python callable that handles execution of this
kernel
"""
options = kernel.options
implemented_data_info = codegen_result.implemented_data_info
......@@ -677,7 +678,7 @@ class ExecutionWrapperGeneratorBase(object):
with open(options.write_wrapper, "w") as outf:
outf.write(output)
return gen.get_function()
return gen.get_picklable_function()
# }}}
......@@ -697,6 +698,11 @@ typed_and_scheduled_cache = WriteOncePersistentDict(
key_builder=LoopyKeyBuilder())
invoker_cache = WriteOncePersistentDict(
"loopy-invoker-cache-v1-"+DATA_MODEL_VERSION,
key_builder=LoopyKeyBuilder())
# {{{ kernel executor
class KernelExecutorBase(object):
......@@ -707,7 +713,7 @@ class KernelExecutorBase(object):
.. automethod:: __call__
"""
def __init__(self, kernel, invoker):
def __init__(self, kernel):
"""
:arg kernel: a loopy.LoopKernel
"""
......@@ -723,8 +729,6 @@ class KernelExecutorBase(object):
arg.dtype is None
for arg in kernel.args)
self.invoker = invoker
def get_typed_and_scheduled_kernel_uncached(self, arg_to_dtype_set):
from loopy.kernel.tools import add_dtypes
......@@ -833,6 +837,29 @@ class KernelExecutorBase(object):
code = generate_code_v2(kernel)
return code.device_code()
def get_invoker_uncached(self, kernel, *args):
raise NotImplementedError()
def get_invoker(self, kernel, *args):
from loopy import CACHING_ENABLED
cache_key = (self.__class__.__name__, kernel)
if CACHING_ENABLED:
try:
return invoker_cache[cache_key]
except KeyError:
pass
logger.debug("%s: invoker cache miss" % kernel.name)
invoker = self.get_invoker_uncached(kernel, *args)
if CACHING_ENABLED:
invoker_cache.store_if_not_present(cache_key, invoker)
return invoker
# }}}
# {{{ call and info generator
......
......@@ -261,8 +261,7 @@ class PyOpenCLKernelExecutor(KernelExecutorBase):
specific arguments.
"""
super(PyOpenCLKernelExecutor, self).__init__(
kernel, invoker=PyOpenCLExecutionWrapperGenerator())
super(PyOpenCLKernelExecutor, self).__init__(kernel)
self.context = context
......@@ -270,6 +269,10 @@ class PyOpenCLKernelExecutor(KernelExecutorBase):
if isinstance(kernel.target, PyOpenCLTarget):
self.kernel = kernel.copy(target=PyOpenCLTarget(context.devices[0]))
def get_invoker_uncached(self, kernel, codegen_result):
generator = PyOpenCLExecutionWrapperGenerator()
return generator(kernel, codegen_result)
@memoize_method
def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None):
kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set)
......@@ -309,7 +312,7 @@ class PyOpenCLKernelExecutor(KernelExecutorBase):
kernel=kernel,
cl_kernels=cl_kernels,
implemented_data_info=codegen_result.implemented_data_info,
invoker=self.invoker(kernel, codegen_result))
invoker=self.get_invoker(kernel, codegen_result))
def __call__(self, queue, **kwargs):
"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment