diff --git a/src/c_wrapper/clhelper.h b/src/c_wrapper/clhelper.h
index 593e88bbc5d40f299a1b79fa3f393167a5f6c1d7..2c7f66b9db2272e2b5637861a0e7f05a909e02ab 100644
--- a/src/c_wrapper/clhelper.h
+++ b/src/c_wrapper/clhelper.h
@@ -190,6 +190,17 @@ public:
             call_guarded_cleanup(m_release, m_name, m_clobj);
         }
     }
+    template<bool out>
+    PYOPENCL_INLINE void
+    print(std::ostream &stm)
+    {
+        // TODO
+        if (!out) {
+            stm << &m_clobj;
+        } else {
+            stm << *m_ret << "<" << m_clobj << ">";
+        }
+    }
 };
 
 template<typename CLObj, typename... T>
@@ -236,4 +247,12 @@ get_ext_fun(const char *name, const char *err)
 
 }
 
+static PYOPENCL_INLINE std::ostream&
+operator<<(std::ostream &stm, const cl_image_format &fmt)
+{
+    stm << "channel_order: " << fmt.image_channel_order
+        << "channel_data_type: " << fmt.image_channel_data_type;
+    return stm;
+}
+
 #endif
diff --git a/src/c_wrapper/clobj.h b/src/c_wrapper/clobj.h
index 66be5a5604e4097e4f591c3da3f3d86015c7050e..1591359e586cd3e9cb0fc4336eadd75d9471596f 100644
--- a/src/c_wrapper/clobj.h
+++ b/src/c_wrapper/clobj.h
@@ -47,6 +47,14 @@ public:
     }
 };
 
+template<typename CLObj>
+static PYOPENCL_INLINE void
+_print_clobj(std::ostream &stm, CLObj *obj)
+{
+    // TODO
+    stm << obj << "<" << obj->data() << ">";
+}
+
 template<typename CLObj>
 class CLArg<CLObj,
             typename std::enable_if<
@@ -55,6 +63,7 @@ class CLArg<CLObj,
 private:
     CLObj &m_obj;
 public:
+    constexpr static bool is_out = false;
     CLArg(CLObj &obj) : m_obj(obj)
     {
     }
@@ -63,6 +72,12 @@ public:
     {
         return m_obj.data();
     }
+    template<bool>
+    PYOPENCL_INLINE void
+    print(std::ostream &stm)
+    {
+        _print_clobj(stm, &m_obj);
+    }
 };
 
 template<typename CLObj>
@@ -73,6 +88,7 @@ class CLArg<CLObj*,
 private:
     CLObj *m_obj;
 public:
+    constexpr static bool is_out = false;
     CLArg(CLObj *obj) : m_obj(obj)
     {
     }
@@ -81,6 +97,12 @@ public:
     {
         return m_obj->data();
     }
+    template<bool>
+    PYOPENCL_INLINE void
+    print(std::ostream &stm)
+    {
+        _print_clobj(stm, m_obj);
+    }
 };
 
 template<typename CLObj>
diff --git a/src/c_wrapper/error.h b/src/c_wrapper/error.h
index 0bc52ed89741832f485c0bbb0c6521a0c9ac829b..19c5b62c77dab0473154dcb83ea491b88b6ad5ef 100644
--- a/src/c_wrapper/error.h
+++ b/src/c_wrapper/error.h
@@ -15,40 +15,6 @@
 
 namespace pyopencl {
 
-template<typename FirstType, typename... ArgTypes>
-static PYOPENCL_INLINE void
-_print_args(std::ostream &stm, FirstType &&arg1, ArgTypes&&... args)
-{
-    stm << arg1 << "; ";
-    _print_args(stm, std::forward<ArgTypes>(args)...);
-}
-
-template<typename FirstType>
-static PYOPENCL_INLINE void
-_print_args(std::ostream &stm, FirstType &&arg1)
-{
-    stm << arg1 << "; ";
-}
-
-static PYOPENCL_INLINE void
-print_call_trace(const char *name)
-{
-    if (!DEBUG_ON)
-        return;
-    std::cerr << name << std::endl;
-}
-
-template<typename... ArgTypes>
-static PYOPENCL_INLINE void
-print_call_trace(const char *name, ArgTypes&&... args)
-{
-    if (!DEBUG_ON)
-        return;
-    std::cerr << name << "(";
-    _print_args(std::cerr, args...);
-    std::cerr << ")" << std::endl;
-}
-
 // {{{ error
 
 class clerror : public std::runtime_error {
@@ -131,22 +97,55 @@ struct __CLCleanup<T, decltype((void)(std::declval<T>().cleanup()))> {
     }
 };
 
+template<typename T, class = void>
+struct __CLPrintOut {
+    static PYOPENCL_INLINE void
+    call(T, std::ostream&)
+    {
+    }
+};
+
+template<typename T>
+struct __CLPrintOut<T, typename std::enable_if<T::is_out>::type> {
+    static PYOPENCL_INLINE void
+    call(T v, std::ostream &stm)
+    {
+        v.template print<true>(stm);
+        stm << ", ";
+    }
+};
+
+template<typename T, class = void>
+struct __CLPrint {
+    static PYOPENCL_INLINE void
+    call(T v, std::ostream &stm)
+    {
+        v.template print<false>(stm);
+        stm << ", ";
+    }
+};
+
 template<template<typename...> class Caller, size_t n, typename T>
 struct __CLCall {
+    template<typename... Ts>
     static PYOPENCL_INLINE void
-    call(T &&t)
+    call(T &&t, Ts&&... ts)
     {
-        __CLCall<Caller, n - 1, T>::call(std::forward<T>(t));
-        Caller<decltype(std::get<n>(t))>::call(std::get<n>(t));
+        __CLCall<Caller, n - 1, T>::call(std::forward<T>(t),
+                                         std::forward<Ts>(ts)...);
+        Caller<decltype(std::get<n>(t))>::call(std::get<n>(t),
+                                               std::forward<Ts>(ts)...);
     }
 };
 
 template<template<typename...> class Caller, typename T>
 struct __CLCall<Caller, 0, T> {
+    template<typename... Ts>
     static PYOPENCL_INLINE void
-    call(T &&t)
+    call(T &&t, Ts&&... ts)
     {
-        Caller<decltype(std::get<0>(t))>::call(std::get<0>(t));
+        Caller<decltype(std::get<0>(t))>::call(std::get<0>(t),
+                                               std::forward<Ts>(ts)...);
     }
 };
 
@@ -156,11 +155,19 @@ public:
     using ArgPack<CLArg, Types...>::ArgPack;
     template<typename Func>
     PYOPENCL_INLINE auto
-    clcall(Func func)
+    clcall(Func func, const char *name)
         -> decltype(this->template call<__CLArgGetter>(func))
     {
-        auto res = this->template call<__CLArgGetter>(func);
         typename CLArgPack::tuple_base *that = this;
+        if (DEBUG_ON) {
+            std::cerr << name << "(";
+            __CLCall<__CLPrint, sizeof...(Types) - 1,
+                     decltype(*that)>::call(*that, std::cerr);
+            std::cerr << name << ") = ";
+            // TODO print results
+            std::cerr << std::endl;
+        }
+        auto res = this->template call<__CLArgGetter>(func);
         __CLCall<__CLFinish, sizeof...(Types) - 1,
                  decltype(*that)>::call(*that);
         __CLCall<__CLCleanup, sizeof...(Types) - 1,
@@ -181,9 +188,8 @@ template<typename... ArgTypes2, typename... ArgTypes>
 static PYOPENCL_INLINE void
 call_guarded(cl_int (*func)(ArgTypes...), const char *name, ArgTypes2&&... args)
 {
-    print_call_trace(name);
     auto argpack = make_clargpack(std::forward<ArgTypes2>(args)...);
-    cl_int status_code = argpack.clcall(func);
+    cl_int status_code = argpack.clcall(func, name);
     if (status_code != CL_SUCCESS) {
         throw clerror(name, status_code);
     }
@@ -193,11 +199,12 @@ template<typename T, typename... ArgTypes, typename... ArgTypes2>
 PYOPENCL_USE_RESULT static PYOPENCL_INLINE T
 call_guarded(T (*func)(ArgTypes...), const char *name, ArgTypes2&&... args)
 {
-    print_call_trace(name);
     cl_int status_code = CL_SUCCESS;
-    auto argpack = make_clargpack(std::forward<ArgTypes2>(args)...,
-                                  &status_code);
-    T res = argpack.clcall(func);
+    // This magically turns off a weird gcc warning of uninitialized variable.
+    auto p = &status_code;
+    auto &_p = p;
+    auto argpack = make_clargpack(std::forward<ArgTypes2>(args)..., _p);
+    T res = argpack.clcall(func, name);
     if (status_code != CL_SUCCESS) {
         throw clerror(name, status_code);
     }
@@ -211,9 +218,8 @@ static PYOPENCL_INLINE void
 call_guarded_cleanup(cl_int (*func)(ArgTypes...), const char *name,
                      ArgTypes2&&... args)
 {
-    print_call_trace(name);
     auto argpack = make_clargpack(std::forward<ArgTypes2>(args)...);
-    cl_int status_code = argpack.clcall(func);
+    cl_int status_code = argpack.clcall(func, name);
     if (status_code != CL_SUCCESS) {
         std::cerr
             << ("PyOpenCL WARNING: a clean-up operation failed "
diff --git a/src/c_wrapper/function.h b/src/c_wrapper/function.h
index 5d07ca695f40f18edeb49032ad8ca934b0ef3cdb..b7fe1011165e31ca08e58ddbae0ce31a6688efa4 100644
--- a/src/c_wrapper/function.h
+++ b/src/c_wrapper/function.h
@@ -67,11 +67,6 @@ private:
     {
         return t;
     }
-    static PYOPENCL_INLINE std::tuple<>
-    ensure_tuple()
-    {
-        return std::tuple<>();
-    }
 
     template<typename T>
     using ArgConvert = Convert<_ArgType<T> >;
diff --git a/src/c_wrapper/utils.h b/src/c_wrapper/utils.h
index f96e61997eb3335bf8dcdc8ec1ea4fae2ee47f90..5979a3aef392a8c5ca4f0201f8014ee55aaee98d 100644
--- a/src/c_wrapper/utils.h
+++ b/src/c_wrapper/utils.h
@@ -29,11 +29,36 @@ tostring(const T& v)
 
 namespace pyopencl {
 
+// TODO
+template<typename T, bool, class = void>
+struct CLGenericArgPrinter {
+    static PYOPENCL_INLINE void
+    print(std::ostream &stm, T &arg)
+    {
+        stm << arg;
+    }
+};
+
+template<bool out>
+struct CLGenericArgPrinter<std::nullptr_t, out, void> {
+    static PYOPENCL_INLINE void
+    print(std::ostream &stm, std::nullptr_t&)
+    {
+        stm << (void*)nullptr;
+    }
+};
+
+template<typename T, class = void>
+struct CLGenericArgOut {
+    constexpr static bool value = false;
+};
+
 template<typename T, class = void>
 class CLArg {
 private:
     T &m_arg;
 public:
+    constexpr static bool is_out = CLGenericArgOut<T>::value;
     CLArg(T &arg) noexcept
         : m_arg(arg)
     {}
@@ -45,6 +70,12 @@ public:
     {
         return m_arg;
     }
+    template<bool out>
+    PYOPENCL_INLINE void
+    print(std::ostream &stm)
+    {
+        CLGenericArgPrinter<T, out>::print(stm, m_arg);
+    }
 };
 
 enum class ArgType {
@@ -66,6 +97,7 @@ protected:
     }
 public:
     typedef T type;
+    constexpr static size_t ele_size = sizeof(T);
     constexpr static ArgType arg_type = AT;
     ArgBuffer(T *buf, size_t l) noexcept
         : m_buf(buf), m_len(l)
@@ -143,6 +175,38 @@ struct _ArgBufferConverter<Buff, typename std::enable_if<
     }
 };
 
+template<typename Buff>
+static PYOPENCL_INLINE void
+_print_buf(std::ostream &stm, Buff &&buff, ArgType arg_type, bool content)
+{
+    typedef decltype(buff.len()) len_t;
+    len_t len = buff.len();
+    typedef typename std::remove_reference<Buff>::type _Buff;
+    size_t ele_size = _Buff::ele_size;
+    if (content) {
+        stm << "[";
+        for (len_t i = 0;i < len;i++) {
+            stm << buff.get()[i];
+            if (i != len - 1) {
+                stm << ", ";
+            }
+        }
+        stm << "] <";
+    }
+    switch (arg_type) {
+    case ArgType::SizeOf:
+        stm << ele_size * len << ", ";
+    case ArgType::Length:
+        stm << len << ", ";
+    default:
+        break;
+    }
+    stm << buff.get();
+    if (content) {
+        stm << ">";
+    }
+}
+
 template<typename Buff>
 class CLArg<Buff, typename std::enable_if<std::is_base_of<
                                               ArgBuffer<typename Buff::type,
@@ -151,6 +215,8 @@ class CLArg<Buff, typename std::enable_if<std::is_base_of<
 private:
     Buff &m_buff;
 public:
+    constexpr static bool is_out = !(std::is_const<Buff>::value ||
+                                     std::is_const<typename Buff::type>::value);
     CLArg(Buff &buff) noexcept
         : m_buff(buff)
     {}
@@ -163,6 +229,12 @@ public:
     {
         return _ArgBufferConverter<Buff>::convert(m_buff);
     }
+    template<bool out>
+    PYOPENCL_INLINE void
+    print(std::ostream &stm)
+    {
+        _print_buf(stm, m_buff, Buff::arg_type, out || !is_out);
+    }
 };
 
 template<typename T, size_t n, ArgType AT=ArgType::None>
@@ -192,6 +264,7 @@ private:
     bool m_need_cleanup;
     T &m_arg;
 public:
+    constexpr static bool is_out = true;
     CLArg(T &arg)
         : m_finished(false), m_need_cleanup(true), m_arg(arg)
     {
@@ -223,6 +296,12 @@ public:
             m_arg.cleanup(m_finished);
         }
     }
+    template<bool out>
+    PYOPENCL_INLINE void
+    print(std::ostream &stm)
+    {
+        m_arg.template print<out>(stm);
+    }
 };
 
 template<typename T>
@@ -236,6 +315,7 @@ template<typename T>
 class pyopencl_buf : public std::unique_ptr<T, _D<T> > {
     size_t m_len;
 public:
+    constexpr static size_t ele_size = sizeof(T);
     pyopencl_buf(size_t len=1) :
         std::unique_ptr<T, _D<T> >((T*)(len ? malloc(sizeof(T) * len) :
                                         nullptr)),
@@ -275,6 +355,9 @@ class CLArg<Buff, typename std::enable_if<
 private:
     Buff &m_buff;
 public:
+    constexpr static bool is_out =
+        !(std::is_const<Buff>::value ||
+          std::is_const<typename Buff::element_type>::value);
     CLArg(Buff &buff) noexcept
         : m_buff(buff)
     {}
@@ -287,6 +370,12 @@ public:
     {
         return std::make_tuple(m_buff.len(), m_buff.get());
     }
+    template<bool out>
+    PYOPENCL_INLINE void
+    print(std::ostream &stm)
+    {
+        _print_buf(stm, m_buff, ArgType::Length, out || !is_out);
+    }
 };
 
 template<typename T>