From b8b0766570ee42211d5ea74ff8550daff0dee1c9 Mon Sep 17 00:00:00 2001 From: Yichao Yu <yyc1992@gmail.com> Date: Thu, 5 Jun 2014 18:50:28 -0400 Subject: [PATCH] always_inline --- src/c_wrapper/bitlog.cpp | 6 +++-- src/c_wrapper/error.h | 30 ++++++++++++------------ src/c_wrapper/function.h | 12 ++++++++++ src/c_wrapper/pyhelper.h | 4 ++-- src/c_wrapper/utils.h | 48 ++++++++++++++++++++------------------- src/c_wrapper/wrap_cl.cpp | 45 ++++++++++++++++++------------------ 6 files changed, 82 insertions(+), 63 deletions(-) create mode 100644 src/c_wrapper/function.h diff --git a/src/c_wrapper/bitlog.cpp b/src/c_wrapper/bitlog.cpp index da0cb1de..418eb4d8 100644 --- a/src/c_wrapper/bitlog.cpp +++ b/src/c_wrapper/bitlog.cpp @@ -1,4 +1,6 @@ #include "wrap_cl.h" +#include "function.h" + #include <climits> #include <stdint.h> @@ -22,7 +24,7 @@ static const char log_table_8[] = { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 }; -static inline unsigned +static PYOPENCL_INLINE unsigned bitlog2_16(uint16_t v) { if (unsigned long t = v >> 8) { @@ -32,7 +34,7 @@ bitlog2_16(uint16_t v) } } -static inline unsigned +static PYOPENCL_INLINE unsigned bitlog2_32(uint32_t v) { if (uint16_t t = v >> 16) { diff --git a/src/c_wrapper/error.h b/src/c_wrapper/error.h index 6ef88845..c31742fd 100644 --- a/src/c_wrapper/error.h +++ b/src/c_wrapper/error.h @@ -1,5 +1,7 @@ #include "wrap_cl.h" #include "pyhelper.h" +#include "function.h" + #include <string.h> #include <stdexcept> #include <iostream> @@ -15,7 +17,7 @@ namespace pyopencl { #ifdef PYOPENCL_TRACE template<typename FirstType, typename... ArgTypes> -static inline void +static PYOPENCL_INLINE void _print_args(std::ostream &stm, FirstType &&arg1, ArgTypes&&... args) { stm << arg1 << "; "; @@ -23,20 +25,20 @@ _print_args(std::ostream &stm, FirstType &&arg1, ArgTypes&&... args) } template<typename FirstType> -static inline void +static PYOPENCL_INLINE void _print_args(std::ostream &stm, FirstType &&arg1) { stm << arg1 << "; "; } -static inline void +static PYOPENCL_INLINE void print_call_trace(const char *name) { std::cerr << name << std::endl; } template<typename... ArgTypes> -static inline void +static PYOPENCL_INLINE void print_call_trace(const char *name, ArgTypes&&... args) { std::cerr << name << "("; @@ -47,7 +49,7 @@ print_call_trace(const char *name, ArgTypes&&... args) #else template<typename... ArgTypes> -static inline void +static PYOPENCL_INLINE void print_call_trace(ArgTypes&&...) { } @@ -67,19 +69,19 @@ public: { std::cout << rout <<";" << msg<< ";" << c << std::endl; } - inline const char* + PYOPENCL_INLINE const char* routine() const { return m_routine; } - inline cl_int + PYOPENCL_INLINE cl_int code() const { return m_code; } - inline bool + PYOPENCL_INLINE bool is_out_of_memory() const { return (code() == CL_MEM_OBJECT_ALLOCATION_FAILURE || @@ -93,7 +95,7 @@ public: // {{{ tracing and error reporting template<typename... ArgTypes2, typename... ArgTypes> -static inline void +static PYOPENCL_INLINE void call_guarded(cl_int (*func)(ArgTypes...), const char *name, ArgTypes2&&... args) { print_call_trace(name); @@ -104,7 +106,7 @@ call_guarded(cl_int (*func)(ArgTypes...), const char *name, ArgTypes2&&... args) } template<typename T, typename... ArgTypes, typename... ArgTypes2> -PYOPENCL_USE_RESULT static inline T +PYOPENCL_USE_RESULT static PYOPENCL_INLINE T call_guarded(T (*func)(ArgTypes...), const char *name, ArgTypes2&&... args) { print_call_trace(name); @@ -119,7 +121,7 @@ call_guarded(T (*func)(ArgTypes...), const char *name, ArgTypes2&&... args) pyopencl::call_guarded(func, #func, args) template<typename... ArgTypes, typename... ArgTypes2> -static inline void +static PYOPENCL_INLINE void call_guarded_cleanup(cl_int (*func)(ArgTypes...), const char *name, ArgTypes2&&... args) { @@ -135,7 +137,7 @@ call_guarded_cleanup(cl_int (*func)(ArgTypes...), const char *name, #define pyopencl_call_guarded_cleanup(func, args...) \ pyopencl::call_guarded_cleanup(func, #func, args) -PYOPENCL_USE_RESULT static inline ::error* +PYOPENCL_USE_RESULT static PYOPENCL_INLINE ::error* c_handle_error(std::function<void()> func) { try { @@ -158,7 +160,7 @@ c_handle_error(std::function<void()> func) } template<typename T> -static inline T +static PYOPENCL_INLINE T retry_mem_error(std::function<T()> func) { try { @@ -174,7 +176,7 @@ retry_mem_error(std::function<T()> func) // }}} template<typename T, typename CLType, typename... ArgTypes> -PYOPENCL_USE_RESULT static inline T* +PYOPENCL_USE_RESULT static PYOPENCL_INLINE T* convert_obj(cl_int (*clRelease)(CLType), const char *name, CLType cl_obj, ArgTypes&&... args) { diff --git a/src/c_wrapper/function.h b/src/c_wrapper/function.h new file mode 100644 index 00000000..cb3b3328 --- /dev/null +++ b/src/c_wrapper/function.h @@ -0,0 +1,12 @@ +#include <functional> + +#ifndef __PYOPENCL_FUNCTION_H +#define __PYOPENCL_FUNCTION_H + +#if defined __GNUC__ && __GNUC__ > 3 +#define PYOPENCL_INLINE inline __attribute__((__always_inline__)) +#else +#define PYOPENCL_INLINE inline +#endif + +#endif diff --git a/src/c_wrapper/pyhelper.h b/src/c_wrapper/pyhelper.h index c98e8654..f336cbae 100644 --- a/src/c_wrapper/pyhelper.h +++ b/src/c_wrapper/pyhelper.h @@ -2,7 +2,7 @@ #define __PYOPENCL_PYHELPER_H #include "wrap_cl.h" -#include <functional> +#include "function.h" namespace pyopencl { @@ -13,7 +13,7 @@ template<typename Ret, typename... Args> class WrapFunc<Ret(Args...)> { typedef Ret (*_FuncType)(Args...); _FuncType m_func; - static inline _FuncType + static PYOPENCL_INLINE _FuncType check_func(_FuncType f) { return f ? f : ([] (Args...) {return Ret();}); diff --git a/src/c_wrapper/utils.h b/src/c_wrapper/utils.h index 23ae22df..4a027a87 100644 --- a/src/c_wrapper/utils.h +++ b/src/c_wrapper/utils.h @@ -1,5 +1,7 @@ #include "wrap_cl.h" #include "error.h" +#include "function.h" + #include <string> #include <sstream> #include <string.h> @@ -9,7 +11,7 @@ #define __PYOPENCL_UTILS_H #define PYOPENCL_DEF_GET_CLASS_T(name) \ - static inline class_t \ + static PYOPENCL_INLINE class_t \ get_class_t() \ { \ return CLASS_##name; \ @@ -17,7 +19,7 @@ template<class T> -PYOPENCL_USE_RESULT static inline std::string +PYOPENCL_USE_RESULT static PYOPENCL_INLINE std::string tostring(const T& v) { std::ostringstream ostr; @@ -43,22 +45,22 @@ public: m_len(len) { } - inline size_t + PYOPENCL_INLINE size_t len() const { return m_len; } - inline T& + PYOPENCL_INLINE T& operator[](int i) { return this->get()[i]; } - inline const T& + PYOPENCL_INLINE const T& operator[](int i) const { return this->get()[i]; } - inline void + PYOPENCL_INLINE void resize(size_t len) { if (len == m_len) @@ -91,7 +93,7 @@ public: }; template<typename T> -static inline cl_bool +static PYOPENCL_INLINE cl_bool cast_bool(const T &v) { return v ? CL_TRUE : CL_FALSE; @@ -119,7 +121,7 @@ public: typedef CLType cl_type; clobj(CLType obj, bool=false) : m_obj(obj) {} - inline const CLType& + PYOPENCL_INLINE const CLType& data() const { return m_obj; @@ -139,7 +141,7 @@ clobj_from_int_ptr(intptr_t ptr) } template<typename T, typename T2> -PYOPENCL_USE_RESULT static inline pyopencl_buf<typename T::cl_type> +PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf<typename T::cl_type> buf_from_class(const T2 *buf2, size_t len) { pyopencl_buf<typename T::cl_type> buf(len); @@ -150,21 +152,21 @@ buf_from_class(const T2 *buf2, size_t len) } template<typename T, typename T2> -PYOPENCL_USE_RESULT static inline pyopencl_buf<typename T::cl_type> +PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf<typename T::cl_type> buf_from_class(const pyopencl_buf<T2> &&buf) { return buf_from_class(buf.get(), buf.len()); } template<typename T, typename T2> -PYOPENCL_USE_RESULT static inline pyopencl_buf<typename T::cl_type> +PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf<typename T::cl_type> buf_from_class(const pyopencl_buf<T2> &buf) { return buf_from_class(buf.get(), buf.len()); } template<typename T, typename T2, typename... ArgTypes> -PYOPENCL_USE_RESULT static inline pyopencl_buf<clbase*> +PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf<clbase*> buf_to_base(const T2 *buf2, size_t len, ArgTypes&&... args) { pyopencl_buf<clbase*> buf(len); @@ -185,7 +187,7 @@ buf_to_base(const T2 *buf2, size_t len, ArgTypes&&... args) } template<typename T, typename T2, typename... ArgTypes> -PYOPENCL_USE_RESULT static inline pyopencl_buf<clbase*> +PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf<clbase*> buf_to_base(const pyopencl_buf<T2> &&buf2, ArgTypes&&... args) { return buf_to_base<T>(buf2.get(), buf2.len(), @@ -193,7 +195,7 @@ buf_to_base(const pyopencl_buf<T2> &&buf2, ArgTypes&&... args) } template<typename T, typename T2, typename... ArgTypes> -PYOPENCL_USE_RESULT static inline pyopencl_buf<clbase*> +PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf<clbase*> buf_to_base(const pyopencl_buf<T2> &buf2, ArgTypes&&... args) { return buf_to_base<T>(buf2.get(), buf2.len(), @@ -201,7 +203,7 @@ buf_to_base(const pyopencl_buf<T2> &buf2, ArgTypes&&... args) } // FIXME -PYOPENCL_USE_RESULT static inline char* +PYOPENCL_USE_RESULT static PYOPENCL_INLINE char* _copy_str(const std::string& str) { return strdup(str.c_str()); @@ -210,7 +212,7 @@ _copy_str(const std::string& str) // {{{ GetInfo helpers template<typename T, typename... ArgTypes, typename... ArgTypes2> -PYOPENCL_USE_RESULT static inline pyopencl_buf<T> +PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf<T> get_vec_info(cl_int (*func)(ArgTypes...), const char *name, ArgTypes2&&... args) { @@ -224,7 +226,7 @@ get_vec_info(cl_int (*func)(ArgTypes...), const char *name, pyopencl::get_vec_info<type>(clGet##what##Info, "clGet" #what "Info", args) template<typename T> -PYOPENCL_USE_RESULT static inline generic_info +PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info convert_array_info(const char *tname, pyopencl_buf<T> &buf) { generic_info info; @@ -237,7 +239,7 @@ convert_array_info(const char *tname, pyopencl_buf<T> &buf) } template<typename T> -PYOPENCL_USE_RESULT static inline generic_info +PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info convert_array_info(const char *tname, pyopencl_buf<T> &&_buf) { pyopencl_buf<T> &buf = _buf; @@ -250,7 +252,7 @@ convert_array_info(const char *tname, pyopencl_buf<T> &&_buf) pyopencl_convert_array_info(type, pyopencl_get_vec_info(type, what, args)) template<typename T, typename Cls> -PYOPENCL_USE_RESULT static inline generic_info +PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info convert_opaque_array_info(pyopencl_buf<T> &buf) { generic_info info; @@ -262,7 +264,7 @@ convert_opaque_array_info(pyopencl_buf<T> &buf) } template<typename T, typename Cls> -PYOPENCL_USE_RESULT static inline generic_info +PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info convert_opaque_array_info(pyopencl_buf<T> &&_buf) { pyopencl_buf<T> &buf = _buf; @@ -274,7 +276,7 @@ convert_opaque_array_info(pyopencl_buf<T> &&_buf) template<typename CLType, typename Cls, typename... ArgTypes, typename... ArgTypes2> -PYOPENCL_USE_RESULT static inline generic_info +PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info get_opaque_info(cl_int (*func)(ArgTypes...), const char *name, ArgTypes2&&... args) { @@ -297,7 +299,7 @@ get_opaque_info(cl_int (*func)(ArgTypes...), const char *name, "clGet" #what "Info", args) template<typename... ArgTypes, typename... ArgTypes2> -PYOPENCL_USE_RESULT static inline generic_info +PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info get_str_info(cl_int (*func)(ArgTypes...), const char *name, ArgTypes2&&... args) { @@ -317,7 +319,7 @@ get_str_info(cl_int (*func)(ArgTypes...), const char *name, pyopencl::get_str_info(clGet##what##Info, "clGet" #what "Info", args) template<typename T, typename... ArgTypes, typename... ArgTypes2> -PYOPENCL_USE_RESULT static inline generic_info +PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info get_int_info(cl_int (*func)(ArgTypes...), const char *name, const char *tpname, ArgTypes2&&... args) { diff --git a/src/c_wrapper/wrap_cl.cpp b/src/c_wrapper/wrap_cl.cpp index 24525b44..b348b99f 100644 --- a/src/c_wrapper/wrap_cl.cpp +++ b/src/c_wrapper/wrap_cl.cpp @@ -2,6 +2,7 @@ #include "utils.h" #include "async.h" #include "pyhelper.h" +#include "function.h" #include <stdlib.h> @@ -9,7 +10,7 @@ #if PYOPENCL_CL_VERSION >= 0x1020 template<typename T> -PYOPENCL_USE_RESULT static inline T +PYOPENCL_USE_RESULT static PYOPENCL_INLINE T pyopencl_get_ext_fun(cl_platform_id plat, const char *name, const char *err) { T func = (T)clGetExtensionFunctionAddressForPlatform(plat, name); @@ -20,7 +21,7 @@ pyopencl_get_ext_fun(cl_platform_id plat, const char *name, const char *err) } #else template<typename T> -PYOPENCL_USE_RESULT static inline T +PYOPENCL_USE_RESULT static PYOPENCL_INLINE T pyopencl_get_ext_fun(cl_platform_id, const char *name, const char *err) { T func = (T)clGetExtensionFunctionAddress(name); @@ -65,7 +66,7 @@ public: }; -PYOPENCL_USE_RESULT inline pyopencl_buf<cl_device_id> +PYOPENCL_USE_RESULT PYOPENCL_INLINE pyopencl_buf<cl_device_id> platform::get_devices(cl_device_type devtype) const { cl_uint num_devices = 0; @@ -534,7 +535,7 @@ public: throw clerror("Context.get_info", CL_INVALID_VALUE); } } - PYOPENCL_USE_RESULT inline generic_info + PYOPENCL_USE_RESULT PYOPENCL_INLINE generic_info get_supported_image_formats(cl_mem_flags flags, cl_mem_object_type image_type) const { @@ -753,7 +754,7 @@ public: } #endif }; -static inline event* +static PYOPENCL_INLINE event* new_event(cl_event evt) { return pyopencl_convert_obj(event, clReleaseEvent, evt); @@ -790,7 +791,7 @@ public: py::deref(ward); } }; -static inline event* +static PYOPENCL_INLINE event* new_nanny_event(cl_event evt, void *ward) { return pyopencl_convert_obj(nanny_event, clReleaseEvent, evt, ward); @@ -901,7 +902,7 @@ public: }; // #if PYOPENCL_CL_VERSION >= 0x1020 -// inline +// PYOPENCL_INLINE // event *enqueue_migrate_mem_objects( // command_queue &cq, // py::object py_mem_objects, @@ -928,7 +929,7 @@ public: // #endif // #ifdef cl_ext_migrate_memobject -// inline +// PYOPENCL_INLINE // event *enqueue_migrate_mem_object_ext( // command_queue &cq, // py::object py_mem_objects, @@ -1030,7 +1031,7 @@ public: throw clerror("Image.get_image_info", CL_INVALID_VALUE); } } - inline type_t + PYOPENCL_INLINE type_t get_fill_type() { switch (m_format.image_channel_data_type) { @@ -1047,7 +1048,7 @@ public: } } }; -static inline image* +static PYOPENCL_INLINE image* new_image(cl_mem mem, void *buff, const cl_image_format *fmt) { return pyopencl_convert_obj(image, clReleaseMemObject, mem, buff, fmt); @@ -1057,7 +1058,7 @@ new_image(cl_mem mem, void *buff, const cl_image_format *fmt) // #if PYOPENCL_CL_VERSION >= 0x1020 -// inline +// PYOPENCL_INLINE // image *create_image_from_desc( // context const &ctx, // cl_mem_flags flags, @@ -1115,7 +1116,7 @@ new_image(cl_mem mem, void *buff, const cl_image_format *fmt) // {{{ image transfers - // inline + // PYOPENCL_INLINE // event *enqueue_copy_image_to_buffer( // command_queue &cq, // memory_object_holder &src, @@ -1141,7 +1142,7 @@ new_image(cl_mem mem, void *buff, const cl_image_format *fmt) // PYOPENCL_RETURN_NEW_EVENT(evt); // } - // inline + // PYOPENCL_INLINE // event *enqueue_copy_buffer_to_image( // command_queue &cq, // memory_object_holder &src, @@ -1176,7 +1177,7 @@ new_image(cl_mem mem, void *buff, const cl_image_format *fmt) #ifdef HAVE_GL #ifdef __APPLE__ -static inline cl_context_properties +static PYOPENCL_INLINE cl_context_properties get_apple_cgl_share_group() { CGLContextObj kCGLContext = CGLGetCurrentContext(); @@ -1241,7 +1242,7 @@ create_from_gl_texture(context &ctx, cl_mem_flags flags, GLenum texture_target, } // TODO: - // inline + // PYOPENCL_INLINE // py::tuple get_gl_object_info(memory_object_holder const &mem) // { // cl_gl_object_type otype; @@ -1254,7 +1255,7 @@ typedef cl_int (*clEnqueueGLObjectFunc)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*); -PYOPENCL_USE_RESULT static inline event* +PYOPENCL_USE_RESULT static PYOPENCL_INLINE event* enqueue_gl_objects(clEnqueueGLObjectFunc func, const char *name, command_queue *cq, const clobj_t *mem_objects, uint32_t num_mem_objects, const clobj_t *wait_for, @@ -1278,7 +1279,7 @@ enqueue_gl_objects(clEnqueueGLObjectFunc func, const char *name, // #if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) - // inline + // PYOPENCL_INLINE // py::object get_gl_context_info_khr( // py::object py_properties, // cl_gl_context_info param_name, @@ -1370,7 +1371,7 @@ enqueue_gl_objects(clEnqueueGLObjectFunc func, const char *name, // {{{ buffer class buffer; -static inline buffer *new_buffer(cl_mem mem, void *buff=0); +static PYOPENCL_INLINE buffer *new_buffer(cl_mem mem, void *buff=0); class buffer : public memory_object { public: @@ -1421,7 +1422,7 @@ public: // } #endif }; -PYOPENCL_USE_RESULT static inline buffer* +PYOPENCL_USE_RESULT static PYOPENCL_INLINE buffer* new_buffer(cl_mem mem, void *buff) { return pyopencl_convert_obj(buffer, clReleaseMemObject, mem, buff); @@ -1686,7 +1687,7 @@ public: // } // #endif }; -PYOPENCL_USE_RESULT static inline program* +PYOPENCL_USE_RESULT static PYOPENCL_INLINE program* new_program(cl_program prog, program_kind_type progkind=KND_UNKNOWN) { return pyopencl_convert_obj(program, clReleaseProgram, prog, progkind); @@ -2570,7 +2571,7 @@ enqueue_map_image(clobj_t *_evt, clobj_t *map, clobj_t _queue, clobj_t _mem, } #if PYOPENCL_CL_VERSION >= 0x1020 - // inline + // PYOPENCL_INLINE // event *enqueue_fill_image( // command_queue &cq, // memory_object_holder &mem, @@ -2611,7 +2612,7 @@ clobj__int_ptr(clobj_t obj) return obj->intptr(); } -static inline clobj_t +static PYOPENCL_INLINE clobj_t _from_int_ptr(intptr_t ptr, class_t class_) { switch(class_) { -- GitLab