/*******************************************************************************
 * Copyright (c) 2008-2010 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
 * "Materials"), to deal in the Materials without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Materials, and to
 * permit persons to whom the Materials are furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 ******************************************************************************/

/*! \file
 *
 *   \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 17)    
 *   \author Benedict R. Gaster and Laurent Morichetti
 *   
 *   Additions and fixes from Brian Cole, March 3rd 2010.
 *   
 *   \version 1.0
 *   \date $Date: 2010-04-23 10:16:50 -0500 (Fri, 23 Apr 2010) $
 *
 */

/*! \mainpage
 * \section intro Introduction
 * For many large applications C++ is the language of choice and so it seems
 * reasonable to define C++ bindings for OpenCL.
 *
 * This document describes C++ binding interface for OpenCL 1.0 (rev 45).
 *
 * The interface is contained with a single C++ header file \em cl.hpp and all
 * definitions are contained within the namespace \em cl. There is no additional
 * requirement to include \em cl.h and to use either the C++ or original C
 * bindings it is enough to simply include \em cl.hpp.
 *
 * The bindings themselves are lightweight and correspond closely to the
 * underlying C API. Using the C++ bindings introduces no additional execution
 * overhead.
 *
 * \section example Example
 *
 * The following example shows a general use case for the C++
 * bindings, including support for the optional exception feature and
 * also the supplied vector and string classes, see following sections for
 * decriptions of these features.
 *
 * \code
 * #define __CL_ENABLE_EXCEPTIONS
 * #define __NO_STD_VECTOR
 * #define __NO_STD_STRING
 * 
 * #if defined(__APPLE__) || defined(__MACOSX)
 * #include <OpenCL/cl.hpp>
 * #else
 * #include <CL/cl.hpp>
 * #endif
 * #include <cstdio>
 * #include <cstdlib>
 * #include <iostream>
 * 
 *  const char * helloStr  = "__kernel void "
 *                           "hello(void) "
 *                           "{ "
 *                           "  "
 *                           "} ";
 * 
 *  int
 *  main(void)
 *  {
 *     cl_int err = CL_SUCCESS;
 *     try {
 *       cl::Context context(CL_DEVICE_TYPE_CPU, 0, NULL, NULL, &err); 
 * 
 *       cl::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
 * 
 *       cl::Program::Sources source(1,
 *           std::make_pair(helloStr,strlen(helloStr)));
 *       cl::Program program_ = cl::Program(context, source);
 *       program_.build(devices);
 * 
 *       cl::Kernel kernel(program_, "hello", &err);
 * 
 *       cl::CommandQueue queue(context, devices[0], 0, &err);
 *       cl::KernelFunctor func = kernel.bind(
 *          queue,
 *          cl::NDRange(4, 4),
 *          cl::NDRange(2, 2));
 * 
 *       func().wait();
 *     }
 *     catch (cl::Error err) {
 *        std::cerr 
 *           << "ERROR: "
 *           << err.what()
 *           << "("
 *           << err.err()
 *           << ")"
 *           << std::endl;
 *     }
 * 
 *    return EXIT_SUCCESS;
 *  }
 * 
 * \endcode
 *
 * \section exceptions Exceptions
 * The use of C++ exceptions can provide a structured approach to error
 * handling within large applications. The OpenCL C++ bindings exception feature
 * provides the capability of using C++ exceptions to track and handle errors
 * generated by the underlying  OpenCL C API.
 *
 * It is understood that the use of C++ exceptions is not universal and their
 * use should to optional. Furthermore, if exceptions are not used, then the
 * resulting application must compile and work without exception support. By
 * default C++ exceptions are not enabled and the OpenCL error code is returned,
 * or set, as per the original OpenCL C API.
 *
 * Exception usage must be explicitly enabled by defining the preprocessor macro
 * \em __CL_ENABLE_EXCEPTIONS. Once enabled an error, i.e. a value other than
 * \em CL_SUCCESS, originally reported via a return value will be reported by
 * throwing the exception class \em Error. By default the \em what() method of
 * the class \em Error will return a const pointer to a string naming the
 * particular OpenCL C API called that reported the error, e.g.
 * "clgetDeviceInfo", "clGetPlatformInfo", and so on.
 *
 * It is possible to override the default behavior for what() by defining the
 * preprocessor macro \em __CL_USER_OVERRIDE_ERROR_STRINGS and providing
 * string constants for each of the following preprocessor macros:\n
 * \code
 *      __GET_DEVICE_INFO_ERR
 *      __GET_PLATFORM_INFO_ERR
 *      __GET_DEVICE_IDS_ERR
 *      __GET_CONTEXT_INFO_ERR
 *      __GET_EVENT_INFO_ERR
 *      __GET_EVENT_PROFILE_INFO_ERR
 *      __GET_MEM_OBJECT_INFO_ERR
 *      __GET_IMAGE_INFO_ERR
 *      __GET_SAMPLER_INFO_ERR
 *      __GET_KERNEL_INFO_ERR
 *      __GET_KERNEL_WORK_GROUP_INFO_ERR
 *      __GET_PROGRAM_INFO_ERR
 *      __GET_PROGRAM_BUILD_INFO_ERR
 *      __GET_COMMAND_QUEUE_INFO_ERR
 *      __CREATE_CONTEXT_FROM_TYPE_ERR
 *      __GET_SUPPORTED_IMAGE_FORMATS_ERR
 *      __CREATE_BUFFER_ERR
 *      __CREATE_SUBBUFFER_ERR
 *      __CREATE_GL_BUFFER_ERR
 *      __CREATE_IMAGE2D_ERR
 *      __CREATE_IMAGE3D_ERR
 *      __CREATE_SAMPLER_ERR
 *      __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR
 *      __CREATE_USER_EVENT_ERR
 *      __SET_USER_EVENT_STATUS_ERR
 *      __SET_EVENT_CALLBACK_ERR
 *      __WAIT_FOR_EVENTS_ERR
 *      __CREATE_KERNEL_ERR
 *      __SET_KERNEL_ARGS_ERR
 *      __CREATE_PROGRAM_WITH_SOURCE_ERR
 *      __CREATE_PROGRAM_WITH_BINARY_ERR
 *      __BUILD_PROGRAM_ERR
 *      __CREATE_KERNELS_IN_PROGRAM_ERR
 *      __CREATE_COMMAND_QUEUE_ERR
 *      __SET_COMMAND_QUEUE_PROPERTY_ERR
 *      __ENQUEUE_READ_BUFFER_ERR
 *      __ENQUEUE_READ_BUFFER_RECT_ERR
 *      __ENQUEUE_WRITE_BUFFER_ERR
 *      __ENQUEUE_WRITE_BUFFER_RECT_ERR
 *      __ENQEUE_COPY_BUFFER_ERR
 *      __ENQEUE_COPY_BUFFER_RECT_ERR
 *      __ENQUEUE_READ_IMAGE_ERR
 *      __ENQUEUE_WRITE_IMAGE_ERR
 *      __ENQUEUE_COPY_IMAGE_ERR
 *      __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR
 *      __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR
 *      __ENQUEUE_MAP_BUFFER_ERR
 *      __ENQUEUE_MAP_IMAGE_ERR
 *      __ENQUEUE_UNMAP_MEM_OBJECT_ERR
 *      __ENQUEUE_NDRANGE_KERNEL_ERR
 *      __ENQUEUE_TASK_ERR
 *      __ENQUEUE_NATIVE_KERNEL
 *      __ENQUEUE_MARKER_ERR
 *      __ENQUEUE_WAIT_FOR_EVENTS_ERR
 *      __ENQUEUE_BARRIER_ERR
 *      __UNLOAD_COMPILER_ERR
 *      __FLUSH_ERR
 *      __FINISH_ERR
 * \endcode
 *
 * \section vectorstring Replacing STL's vector and string classes
 * 
 * While C++'s Standard Template library provides an excellent
 * resource for quick access to many useful algorithms and containers
 * it is ofen not used due to compatability issues across different
 * toolchains operating systems. In designing the original set of C++
 * bindings we found it useful to access std::vector and std::string
 * but at the same time realise that it maynot feasible for these to
 * be used within a production system. So like exceptions the
 * developer is given the abilty to not include anything from the STL
 * while using the C++ bindings, and replacements are provided for both
 * std::vector and std::string or the developer has the option to replace
 * their own implementations.
 * 
 * By default, to avoid issues with backward compatibility, both std::vector
 * and std::string are used. Either can be over ridden by defining, for vectors:
 *
 *    - If __NO_STD_VECTOR is defined and __USE_DEV_VECTOR is not defined, then 
 *    the vector type:
 *
 *      template cl::vector<
 *           typename T, 
 *           unsigned int N = __MAX_DEFAULT_VECTOR_SIZE>;
 *
 *   is used instead of std::vector. The space requirments for
 *   cl::vector are defined statically and default to 10 elements,
 *   this default can be over ridden by defining:
 *
 *      #define __MAX_DEFAULT_VECTOR_SIZE N
 *
 *   before including cl.hpp.
 *
 * For strings:
 *
 *   - If __NO_STD_STRING is defined and __USE_DEV_STRING is not
 *   defined, then the string type:
 *
 *     class cl::string;
 *
 *   is used instead of std::string. Unlike cl::vector the size of a
 *   given string is not defined statically but allocated at creation,
 *   however, unlike std::string once created its size cannot change.
 *
 * In the cases where the developer would like to provide their own
 * replacement implementations for either std::vector or std::string,
 * then this can be achived by defining the following.
 *
 * For vectors the developer must define:
 *
 *    #define __USE_DEV_VECTOR 
 *
 * to tell cl.hpp that std::vector and cl::vector should not be
 * used. When __USE_DEV_VECTOR the user must also provide a mapping
 * from their vector template type which must be parametrized with at
 * least a single type argument and all additional arguments must
 * follow this and have defaults. The mapping is given by defining:
 *
 *    #define VECTOR_CLASS typeName
 *
 * Note, that as C++ does not currently support typedef templates the
 * vector type is given solely by its name through a #define.
 *
 * For strings the developer must define:
 *
 *    #define __USE_DEV_STRING
 *
 * to tell cl.hpp that std::string and cl::string should not be
 * used. When __USE_DEV_STRING the user must also provide a mapping
 * from their string class. The mapping is given by defining:
 *
 *    typedef stringType STRING_CLASS;
 */
#ifndef CL_HPP_
#define CL_HPP_

#ifdef _WIN32
#include <windows.h>
#include <CL/cl.h>
#endif // _WIN32

#if defined(__APPLE__) || defined(__MACOSX)
#include <OpenCL/cl.h>
#include <OpenGL/OpenGL.h>
#include <OpenCL/cl_gl.h>
#else
#include <CL/cl.h>
#include <GL/gl.h>
#include <CL/cl_gl.h>
#include <CL/cl_gl.h>
#if defined(USE_DX_INTEROP)
#include <CL/cl_d3d10.h>
#endif
#endif // !__APPLE__

#if !defined(CL_CALLBACK)
#define CL_CALLBACK
#endif //CL_CALLBACK

#include <utility>

#if !defined(__NO_STD_VECTOR)
#include <vector>
#endif

#if !defined(__NO_STD_STRING)
#include <string>
#endif 

#if defined(linux) || defined(__APPLE__) || defined(__MACOSX)
# include <alloca.h>
#else
# include <malloc.h>
#endif // linux

#include <cstring>

/*! \namespace cl
 *
 * \brief The OpenCL C++ bindings are defined within this namespace.
 *
 */
namespace cl {

#define __INIT_CL_EXT_FCN_PTR(name) \
    if(!pfn_##name) { \
        pfn_##name = (PFN_##name) \
            clGetExtensionFunctionAddress(#name); \
        if(!pfn_##name) { \
        } \
    }

class Program;
class Device;
class Context;
class CommandQueue;
class Memory;

#if defined(__CL_ENABLE_EXCEPTIONS)
#include <exception>
/*! \class Error
 * \brief Exception class
 */
class Error : public std::exception
{
private:
    cl_int err_;
    const char * errStr_;
public:
    /*! Create a new CL error exception for a given error code
     *  and corresponding message.
     */
    Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr)
    {}

    ~Error() throw() {}

    /*! \brief Get error string associated with exception
     *
     * \return A memory pointer to the error message string.
     */
    virtual const char * what() const throw ()
    {
        if (errStr_ == NULL) {
            return "empty";
        }
        else {
            return errStr_;
        }
    }

    /*! \brief Get error code associated with exception
     *
     *  \return The error code.
     */
    cl_int err(void) const { return err_; }
};

#define __ERR_STR(x) #x
#else
#define __ERR_STR(x) NULL
#endif // __CL_ENABLE_EXCEPTIONS

//! \cond DOXYGEN_DETAIL
#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
#define __GET_DEVICE_INFO_ERR               __ERR_STR(clgetDeviceInfo)
#define __GET_PLATFORM_INFO_ERR             __ERR_STR(clGetPlatformInfo)
#define __GET_DEVICE_IDS_ERR                __ERR_STR(clGetDeviceIDs)
#define __GET_PLATFORM_IDS_ERR              __ERR_STR(clGetPlatformIDs)
#define __GET_CONTEXT_INFO_ERR              __ERR_STR(clGetContextInfo)
#define __GET_EVENT_INFO_ERR                __ERR_STR(clGetEventInfo)
#define __GET_EVENT_PROFILE_INFO_ERR        __ERR_STR(clGetEventProfileInfo)
#define __GET_MEM_OBJECT_INFO_ERR           __ERR_STR(clGetMemObjectInfo)
#define __GET_IMAGE_INFO_ERR                __ERR_STR(clGetImageInfo)
#define __GET_SAMPLER_INFO_ERR              __ERR_STR(clGetSamplerInfo)
#define __GET_KERNEL_INFO_ERR               __ERR_STR(clGetKernelInfo)
#define __GET_KERNEL_WORK_GROUP_INFO_ERR    __ERR_STR(clGetKernelWorkGroupInfo)
#define __GET_PROGRAM_INFO_ERR              __ERR_STR(clGetProgramInfo)
#define __GET_PROGRAM_BUILD_INFO_ERR        __ERR_STR(clGetProgramBuildInfo)
#define __GET_COMMAND_QUEUE_INFO_ERR        __ERR_STR(clGetCommandQueueInfo)

#define __CREATE_CONTEXT_FROM_TYPE_ERR      __ERR_STR(clCreateContextFromType)
#define __GET_SUPPORTED_IMAGE_FORMATS_ERR   __ERR_STR(clGetSupportedImageFormats)

#define __CREATE_BUFFER_ERR                 __ERR_STR(clCreateBuffer)
#define __CREATE_SUBBUFFER_ERR              __ERR_STR(clCreateSubBuffer)
#define __CREATE_GL_BUFFER_ERR              __ERR_STR(clCreateFromGLBuffer)
#define __GET_GL_OBJECT_INFO_ERR            __ERR_STR(clGetGLObjectInfo)
#define __CREATE_IMAGE2D_ERR                __ERR_STR(clCreateImage2D)
#define __CREATE_IMAGE3D_ERR                __ERR_STR(clCreateImage3D)
#define __CREATE_SAMPLER_ERR                __ERR_STR(clCreateSampler)
#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback)

#define __CREATE_USER_EVENT_ERR             __ERR_STR(clCreateUserEvent)
#define __SET_USER_EVENT_STATUS_ERR         __ERR_STR(clSetUserEventStatus)
#define __SET_EVENT_CALLBACK_ERR            __ERR_STR(clSetEventCallback)
#define __WAIT_FOR_EVENTS_ERR               __ERR_STR(clWaitForEvents)

#define __CREATE_KERNEL_ERR                 __ERR_STR(clCreateKernel)
#define __SET_KERNEL_ARGS_ERR               __ERR_STR(clSetKernelArg)
#define __CREATE_PROGRAM_WITH_SOURCE_ERR    __ERR_STR(clCreateProgramWithSource)
#define __CREATE_PROGRAM_WITH_BINARY_ERR    __ERR_STR(clCreateProgramWithBinary)
#define __BUILD_PROGRAM_ERR                 __ERR_STR(clBuildProgram)
#define __CREATE_KERNELS_IN_PROGRAM_ERR     __ERR_STR(clCreateKernelsInProgram)

#define __CREATE_COMMAND_QUEUE_ERR          __ERR_STR(clCreateCommandQueue)
#define __SET_COMMAND_QUEUE_PROPERTY_ERR    __ERR_STR(clSetCommandQueueProperty)
#define __ENQUEUE_READ_BUFFER_ERR           __ERR_STR(clEnqueueReadBuffer)
#define __ENQUEUE_READ_BUFFER_RECT_ERR      __ERR_STR(clEnqueueReadBufferRect)
#define __ENQUEUE_WRITE_BUFFER_ERR          __ERR_STR(clEnqueueWriteBuffer)
#define __ENQUEUE_WRITE_BUFFER_RECT_ERR     __ERR_STR(clEnqueueWriteBufferRect)
#define __ENQEUE_COPY_BUFFER_ERR            __ERR_STR(clEnqueueCopyBuffer)
#define __ENQEUE_COPY_BUFFER_RECT_ERR       __ERR_STR(clEnqueueCopyBufferRect)
#define __ENQUEUE_READ_IMAGE_ERR            __ERR_STR(clEnqueueReadImage)
#define __ENQUEUE_WRITE_IMAGE_ERR           __ERR_STR(clEnqueueWriteImage)
#define __ENQUEUE_COPY_IMAGE_ERR            __ERR_STR(clEnqueueCopyImage)
#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR  __ERR_STR(clEnqueueCopyImageToBuffer)
#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR  __ERR_STR(clEnqueueCopyBufferToImage)
#define __ENQUEUE_MAP_BUFFER_ERR            __ERR_STR(clEnqueueMapBuffer)
#define __ENQUEUE_MAP_IMAGE_ERR             __ERR_STR(clEnqueueMapImage)
#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR      __ERR_STR(clEnqueueUnMapMemObject)
#define __ENQUEUE_NDRANGE_KERNEL_ERR        __ERR_STR(clEnqueueNDRangeKernel)
#define __ENQUEUE_TASK_ERR                  __ERR_STR(clEnqueueTask)
#define __ENQUEUE_NATIVE_KERNEL             __ERR_STR(clEnqueueNativeKernel)
#define __ENQUEUE_MARKER_ERR                __ERR_STR(clEnqueueMarker)
#define __ENQUEUE_WAIT_FOR_EVENTS_ERR       __ERR_STR(clEnqueueWaitForEvents)
#define __ENQUEUE_BARRIER_ERR               __ERR_STR(clEnqueueBarrier)

#define __ENQUEUE_ACQUIRE_GL_ERR            __ERR_STR(clEnqueueAcquireGLObjects)
#define __ENQUEUE_RELEASE_GL_ERR            __ERR_STR(clEnqueueReleaseGLObjects)

#define __UNLOAD_COMPILER_ERR               __ERR_STR(clUnloadCompiler)

#define __FLUSH_ERR                         __ERR_STR(clFlush)
#define __FINISH_ERR                        __ERR_STR(clFinish)

#endif // __CL_USER_OVERRIDE_ERROR_STRINGS
//! \endcond

/*! \class string
 * \brief Simple string class, that provides a limited subset of std::string
 * functionality but avoids many of the issues that come with that class.
 */
class string
{
private:
    ::size_t size_;
    char * str_;
public:
    string(void) : size_(0), str_(NULL)
    {
    }

    string(char * str, ::size_t size) :
        size_(size),
        str_(NULL)
    {
        str_ = new char[size_+1];
        if (str_ != NULL) {
            memcpy(str_, str, size_  * sizeof(char));
            str_[size_] = '\0';
        }
        else {
            size_ = 0;
        }
    }

    string(char * str) :
        str_(NULL)
    {
        size_= ::strlen(str);
        str_ = new char[size_ + 1];
        if (str_ != NULL) {
            memcpy(str_, str, (size_ + 1) * sizeof(char));
        }
        else {
            size_ = 0;
        }
    }

    string& operator=(const string& rhs)
    {
        if (this == &rhs) {
            return *this;
        }

        if (rhs.size_ == 0 || rhs.str_ == NULL) {
            size_ = 0;
            str_  = NULL;
        } 
        else {
            size_ = rhs.size_;
            str_ = new char[size_ + 1];
            if (str_ != NULL) {
                memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char));
            }
            else {
                size_ = 0;
            }
        }

        return *this;
    }

    string(const string& rhs)
    {
        *this = rhs;
    }

    ~string()
    {
        if (str_ != NULL) {
            delete[] str_;
        }
    }

    ::size_t size(void) const   { return size_; }
    ::size_t length(void) const { return size(); }

    const char * c_str(void) const { return (str_) ? str_ : "";}
};

#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING)
#include <string>
typedef std::string STRING_CLASS;
#elif !defined(__USE_DEV_STRING) 
typedef cl::string STRING_CLASS;
#endif

#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR)
#include <vector>
#define VECTOR_CLASS std::vector
#elif !defined(__USE_DEV_VECTOR) 
#define VECTOR_CLASS cl::vector 
#endif

#if !defined(__MAX_DEFAULT_VECTOR_SIZE)
#define __MAX_DEFAULT_VECTOR_SIZE 10
#endif

/*! \class vector
 * \brief Fixed sized vector implementation that mirroring 
 * std::vector functionality.
 */
template <typename T, unsigned int N = __MAX_DEFAULT_VECTOR_SIZE>
class vector
{
private:
    T data_[N];
    unsigned int size_;
    bool empty_;
public:
    vector() : 
        size_(-1),
        empty_(true)
    {}

    ~vector() {}

    unsigned int size(void) const
    {
        return size_ + 1;
    }

    void clear()
    {
        size_ = -1;
        empty_ = true;
    }

    void push_back (const T& x)
    { 
        if (size() < N) {
            size_++;  
            data_[size_] = x;
            empty_ = false;
        }
    }

    void pop_back(void)
    {
        if (!empty_) {
            data_[size_].~T();
            size_--;
            if (size_ == -1)	{
                empty_ = true;
            }
        }
    }
  
    vector(const vector<T, N>& vec) : 
        size_(vec.size_),
        empty_(vec.empty_)
    {
        if (!empty_) {
            memcpy(&data_[0], &vec.data_[0], size() * sizeof(T));
        }
    } 

    vector(unsigned int size, const T& val = T()) :
        size_(-1),
        empty_(true)
    {
        for (unsigned int i = 0; i < size; i++) {
            push_back(val);
        }
    }

    vector<T, N>& operator=(const vector<T, N>& rhs)
    {
        if (this == &rhs) {
            return *this;
        }

        size_  = rhs.size_;
        empty_ = rhs.empty_;

        if (!empty_) {	
            memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T));
        }
    
        return *this;
    }

    bool operator==(vector<T,N> &vec)
    {
        if (empty_ && vec.empty_) {
            return true;
        }

        if (size() != vec.size()) {
            return false;
        }

        return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false;
    }
  
    operator T* ()             { return data_; }
    operator const T* () const { return data_; }
   
    bool empty (void) const
    {
        return empty_;
    }
  
    unsigned int max_size (void) const
    {
        return N;
    }

    unsigned int capacity () const
    {
        return sizeof(T) * N;
    }

    T& operator[](int index)
    {
        return data_[index];
    }
  
    T operator[](int index) const
    {
        return data_[index];
    }
  
    template<class I>
    void assign(I start, I end)
    {
        clear();   
        while(start < end) {
            push_back(*start);
            start++;
        }
    }

    /*! \class iterator
     * \brief Iterator class for vectors
     */
    class iterator
    {
    private:
        vector<T,N> vec_;
        int index_;
        bool initialized_;
    public:
        iterator(void) : 
            index_(-1),
            initialized_(false)
        {
            index_ = -1;
            initialized_ = false;
        }

        ~iterator(void) {}

        static iterator begin(vector<T,N> &vec)
        {
            iterator i;

            if (!vec.empty()) {
                i.index_ = 0;
            }

            i.vec_ = vec;
            i.initialized_ = true;
            return i;
        }

        static iterator end(vector<T,N> &vec)
        {
            iterator i;

            if (!vec.empty()) {
                i.index_ = vec.size();
            }
            i.vec_ = vec;
            i.initialized_ = true;
            return i;
        }
    
        bool operator==(iterator i)
        {
            return ((vec_ == i.vec_) && 
                    (index_ == i.index_) && 
                    (initialized_ == i.initialized_));
        }

        bool operator!=(iterator i)
        {
            return (!(*this==i));
        }

        void operator++()
        {
            index_++;
        }

        void operator++(int x)
        {
            index_ += x;
        }

        void operator--()
        {
            index_--;
        }

        void operator--(int x)
        {
            index_ -= x;
        }

        T operator *()
        {
            return vec_[index_];
        }
    };

    iterator begin(void)
    {
        return iterator::begin(*this);
    }

    iterator end(void)
    {
        return iterator::end(*this);
    }

    T& front(void)
    {
        return data_[0];
    }

    T& back(void)
    {
        return data_[size_];
    }

    const T& front(void) const
    {
        return data_[0];
    }

    const T& back(void) const
    {
        return data_[size_];
    }
};  
    
/*!
 * \brief size_t class used to interface between C++ and
 * OpenCL C calls that require arrays of size_t values, who's
 * size is known statically.
 */
template <int N>
struct size_t : public cl::vector< ::size_t, N> { };

namespace detail {

// GetInfo help struct
template <typename Functor, typename T>
struct GetInfoHelper
{
    static cl_int
    get(Functor f, cl_uint name, T* param)
    {
        return f(name, sizeof(T), param, NULL);
    }
};

// Specialized GetInfoHelper for VECTOR_CLASS params
template <typename Func, typename T>
struct GetInfoHelper<Func, VECTOR_CLASS<T> >
{
    static cl_int get(Func f, cl_uint name, VECTOR_CLASS<T>* param)
    {
        ::size_t required;
        cl_int err = f(name, 0, NULL, &required);
        if (err != CL_SUCCESS) {
            return err;
        }

        T* value = (T*) alloca(required);
        err = f(name, required, value, NULL);
        if (err != CL_SUCCESS) {
            return err;
        }

        param->assign(&value[0], &value[required/sizeof(T)]);
        return CL_SUCCESS;
    }
};

// Specialized GetInfoHelper for STRING_CLASS params
template <typename Func>
struct GetInfoHelper<Func, STRING_CLASS>
{
    static cl_int get(Func f, cl_uint name, STRING_CLASS* param)
    {
        ::size_t required;
        cl_int err = f(name, 0, NULL, &required);
        if (err != CL_SUCCESS) {
            return err;
        }

        char* value = (char*) alloca(required);
        err = f(name, required, value, NULL);
        if (err != CL_SUCCESS) {
            return err;
        }

        *param = value;
        return CL_SUCCESS;
    }
};

#define __PARAM_NAME_INFO_1_0(F) \
    F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \
    F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \
    F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \
    F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \
    F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \
    \
    F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \
    F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \
    F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \
    F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \
    F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t)               \
    F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \
    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint)	\
    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint)	\
    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint)	\
    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \
    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \
    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \
    F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \
    F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \
    F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \
    F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \
    F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \
    F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \
    F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \
    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \
    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \
    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \
    F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \
    F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \
    F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \
    F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \
    F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \
    F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \
    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \
    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\
    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \
    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \
    F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \
    F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \
    F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \
    F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \
    F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \
    F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \
    F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \
    F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \
    F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \
    F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \
    F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \
    F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \
    F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS)                        \
    F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS)                        \
    F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS)                        \
    F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS)                        \
    F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS)                        \
    F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS)                \
    \
    F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \
    F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS<Device>) \
    F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS<cl_context_properties>) \
    \
    F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \
    F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \
    F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \
    F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \
    \
    F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \
    F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \
    F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \
    F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \
    \
    F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \
    F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \
    F(cl_mem_info, CL_MEM_SIZE, ::size_t) \
    F(cl_mem_info, CL_MEM_HOST_PTR, void*) \
    F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \
    F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \
    F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \
    \
    F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \
    F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \
    F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \
    F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \
    F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \
    F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \
    F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \
    \
    F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \
    F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \
    F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \
    F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \
    F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \
    \
    F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \
    F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \
    F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \
    F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS<cl_device_id>) \
    F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS)	\
    F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \
    F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS<char *>) \
    \
    F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \
    F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \
    F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \
    \
    F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \
    F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \
    F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \
    F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \
    F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \
    \
    F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \
    F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \
    F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \
    \
    F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \
    F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \
    F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \
    F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties)

#if defined(CL_VERSION_1_1)
#define __PARAM_NAME_INFO_1_1(F) \
	F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\
    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \
    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint)      \
    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint)     \
    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint)       \
    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \
    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \
    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \
    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \
    F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \
    F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \
    F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \
    \
    F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \
    F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \
    \
    F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \
    F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \
    \
    F(cl_event_info, CL_EVENT_CONTEXT, cl::Context)
#endif // CL_VERSION_1_1

template <typename enum_type, cl_int Name>
struct param_traits {};

#define __DECLARE_PARAM_TRAITS(token, param_name, T) \
struct token;                                        \
template<>                                           \
struct param_traits<detail:: token,param_name>       \
{                                                    \
    enum { value = param_name };                     \
    typedef T param_type;                            \
};

__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS);
#if defined(CL_VERSION_1_1)
__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS);
#endif // CL_VERSION_1_1

#undef __DECLARE_PARAM_TRAITS

// Convenience functions

template <typename Func, typename T>
inline cl_int
getInfo(Func f, cl_uint name, T* param)
{
    return GetInfoHelper<Func, T>::get(f, name, param);
}

template <typename Func, typename Arg0>
struct GetInfoFunctor0
{
    Func f_; const Arg0& arg0_;
    cl_int operator ()(
        cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
    { return f_(arg0_, param, size, value, size_ret); }
};

template <typename Func, typename Arg0, typename Arg1>
struct GetInfoFunctor1
{
    Func f_; const Arg0& arg0_; const Arg1& arg1_;
    cl_int operator ()(
        cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
    { return f_(arg0_, arg1_, param, size, value, size_ret); }
};

template <typename Func, typename Arg0, typename T>
inline cl_int
getInfo(Func f, const Arg0& arg0, cl_uint name, T* param)
{
    GetInfoFunctor0<Func, Arg0> f0 = { f, arg0 };
    return GetInfoHelper<GetInfoFunctor0<Func, Arg0>, T>
        ::get(f0, name, param);
}

template <typename Func, typename Arg0, typename Arg1, typename T>
inline cl_int
getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param)
{
    GetInfoFunctor1<Func, Arg0, Arg1> f0 = { f, arg0, arg1 };
    return GetInfoHelper<GetInfoFunctor1<Func, Arg0, Arg1>, T>
        ::get(f0, name, param);
}

template<typename T>
struct ReferenceHandler
{ };

template <>
struct ReferenceHandler<cl_device_id>
{
    // cl_device_id does not have retain().
    static cl_int retain(cl_device_id)
    { return CL_INVALID_DEVICE; }
    // cl_device_id does not have release().
    static cl_int release(cl_device_id)
    { return CL_INVALID_DEVICE; }
};

template <>
struct ReferenceHandler<cl_platform_id>
{
    // cl_platform_id does not have retain().
    static cl_int retain(cl_platform_id)
    { return CL_INVALID_PLATFORM; }
    // cl_platform_id does not have release().
    static cl_int release(cl_platform_id)
    { return CL_INVALID_PLATFORM; }
};

template <>
struct ReferenceHandler<cl_context>
{
    static cl_int retain(cl_context context)
    { return ::clRetainContext(context); }
    static cl_int release(cl_context context)
    { return ::clReleaseContext(context); }
};

template <>
struct ReferenceHandler<cl_command_queue>
{
    static cl_int retain(cl_command_queue queue)
    { return ::clRetainCommandQueue(queue); }
    static cl_int release(cl_command_queue queue)
    { return ::clReleaseCommandQueue(queue); }
};

template <>
struct ReferenceHandler<cl_mem>
{
    static cl_int retain(cl_mem memory)
    { return ::clRetainMemObject(memory); }
    static cl_int release(cl_mem memory)
    { return ::clReleaseMemObject(memory); }
};

template <>
struct ReferenceHandler<cl_sampler>
{
    static cl_int retain(cl_sampler sampler)
    { return ::clRetainSampler(sampler); }
    static cl_int release(cl_sampler sampler)
    { return ::clReleaseSampler(sampler); }
};

template <>
struct ReferenceHandler<cl_program>
{
    static cl_int retain(cl_program program)
    { return ::clRetainProgram(program); }
    static cl_int release(cl_program program)
    { return ::clReleaseProgram(program); }
};

template <>
struct ReferenceHandler<cl_kernel>
{
    static cl_int retain(cl_kernel kernel)
    { return ::clRetainKernel(kernel); }
    static cl_int release(cl_kernel kernel)
    { return ::clReleaseKernel(kernel); }
};

template <>
struct ReferenceHandler<cl_event>
{
    static cl_int retain(cl_event event)
    { return ::clRetainEvent(event); }
    static cl_int release(cl_event event)
    { return ::clReleaseEvent(event); }
};

template <typename T>
class Wrapper
{
protected:
    typedef T cl_type;
    cl_type object_;

public:
    Wrapper() : object_(NULL) { }

    ~Wrapper()
    {
        if (object_ != NULL) { release(); }
    }

    Wrapper(const Wrapper<cl_type>& rhs)
    {
        object_ = rhs.object_;
        if (object_ != NULL) { retain(); }
    }

    Wrapper<cl_type>& operator = (const Wrapper<cl_type>& rhs)
    {
        if (object_ != NULL) { release(); }
        object_ = rhs.object_;
        if (object_ != NULL) { retain(); }
        return *this;
    }

    cl_type operator ()() const { return object_; }

    cl_type& operator ()() { return object_; }

protected:

    cl_int retain() const
    {
        return ReferenceHandler<cl_type>::retain(object_);
    }

    cl_int release() const
    {
        return ReferenceHandler<cl_type>::release(object_);
    }
};

#if defined(__CL_ENABLE_EXCEPTIONS)
static inline cl_int errHandler (
    cl_int err,
    const char * errStr = NULL) throw(Error)
{
    if (err != CL_SUCCESS) {
        throw Error(err, errStr);
    }
    return err;
}
#else
static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
{
    return err;
}
#endif // __CL_ENABLE_EXCEPTIONS

} // namespace detail
//! \endcond

/*! \stuct ImageFormat
 * \brief ImageFormat interface fro cl_image_format.
 */
struct ImageFormat : public cl_image_format
{
	ImageFormat(){}

    /*! \brief Create an image format.
     *
     * \param order
     * \param type
     *
     */
    ImageFormat(cl_channel_order order, cl_channel_type type)
    {
        image_channel_order = order;
        image_channel_data_type = type;
    }

	/*!
     * \brief Assignment operator
     *
     * \param rhs the imageformat object on rhs of the assignment.
    */
    ImageFormat& operator = (const ImageFormat& rhs)
    {
        if (this != &rhs) {
			this->image_channel_data_type = rhs.image_channel_data_type;
			this->image_channel_order     = rhs.image_channel_order;
        }
        return *this;
    }
};

/*! \class Device
 * \brief Device interface for cl_device_id.
 */
class Device : public detail::Wrapper<cl_device_id>
{
public:
    //! Construct a new device from a device ID.
    Device(cl_device_id device) { object_ = device; }

    //! Default constructor; device is not valid at this point.
    Device() : detail::Wrapper<cl_type>() { }

    /*!
     * \brief Construct a new device from a valid device.
     *
     * \param device The device object used for creation.
    */
    Device(const Device& device) : detail::Wrapper<cl_type>(device) { }

    /*!
     * \brief Assign a device to device.
     *
     * \param rhs the device object on rhs of the assignment.
     */
    Device& operator = (const Device& rhs)
    {
        if (this != &rhs) {
            detail::Wrapper<cl_type>::operator=(rhs);
        }
        return *this;
    }

    /*!
     * \brief Get specific information about an OpenCL device.
     *
     * \param name is an enum that identifies the device information being
     *        queried.
     * \param param is a pointer to memory location where appropriate values
     *        for a given param_name will be returned. If value is NULL,
     *        it is ignored
     *
     * \retval CL_INVALID_DEVICE if device is not valid.
     * \retval CL_INVALID_VALUE if name is not one of the supported values.
     * \retval CL_SUCCESS if the function is executed successfully.
     *
     * \note In the case that exceptions are enabled and a return value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     *
     */
    template <typename T>
    cl_int getInfo(cl_device_info name, T* param) const
    {
        return detail::errHandler(
            detail::getInfo(&::clGetDeviceInfo, object_, name, param),
            __GET_DEVICE_INFO_ERR);
    }

    /*!
     * \brief Get specific information about an OpenCL device.
     *
     * \param name is is an enum that identifies the device information being
     *        queried. As this value is a template parameter if it is not a
     *        value member of cl_device_info, then a compile error will be
     *        generated.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *      - CL_INVALID_DEVICE if device is not valid.
     *      - CL_INVALID_VALUE if name is not one of the supported values.
     *      - CL_SUCCESS if the function is executed successfully.
     *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     *
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_device_info, name>::param_type
    getInfo(cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_device_info, name>::param_type param;
        cl_int result = getInfo(name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }
};

/*! \class Platform
 *  \brief Platform interface.
 */
class Platform : public detail::Wrapper<cl_platform_id>
{
public:
    static const Platform null();

    //! Construct a new platform from a platform ID.
    Platform(cl_platform_id platform) { object_ = platform; }

    //! Default constructor; platform is not valid at this point.
    Platform() : detail::Wrapper<cl_type>()  { }

    /*!
     * \brief Construct a new platform from a valid platform.
     *
     * \param platform The platform object used for creation.
    */
    Platform(const Platform& platform) : detail::Wrapper<cl_type>(platform) { }

    /*!
     * \brief Assign a platform to platform.
     *
     * \param rhs the platform object on rhs of the assignment.
     */
    Platform& operator = (const Platform& rhs)
    {
        if (this != &rhs) {
            detail::Wrapper<cl_type>::operator=(rhs);
        }
        return *this;
    }

    /*!
     * \brief Get specific information about the OpenCL platform.
     *
     * \param name is an enum that identifies the platform information being
     *        queried.
     * \param param is a pointer to memory location where appropriate values
     *        for a given name will be returned. If value is NULL,
     *        it is ignored
     *
     * \retval CL_INVALID_VALUE if param_name is not one of the supported
     *         values or if size in bytes specified by param_value_size
     *         is < size of return type and param_value is not a NULL value.
     * \retval CL_SUCCESS if the function is executed successfully.
     *
     * \note In the case that exceptions are enabled and a return value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const
    {
        return detail::errHandler(
            detail::getInfo(&::clGetPlatformInfo, object_, name, param),
            __GET_PLATFORM_INFO_ERR);
    }

    /*!
     * \brief Get specific information about the OpenCL Platform.
     *
     * \param name is is an enum that identifies the device information being
     *        queried. As this value is a template parameter if it is not a
     *        value member of cl_platform_info, then a compile error will be
     *        generated.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *      - CL_INVALID_VALUE if name is not one of the supported values.
     *      - CL_SUCCESS if the function is executed successfully.
     *           *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_platform_info, name>::param_type
    getInfo(cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_platform_info, name>::param_type param;
        cl_int result = getInfo(name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }

    /*! \brief Get the list of available devices.
     *
     *  \param type is a bitfield that identifies the type of OpenCL device.
     *  The \a device_type can be used to query specific OpenCL devices or all
     *  OpenCL devices available.
     *
     *  \param devices returns a vector of OpenCL devices found. The cl::Device
     *  values returned in devices can be used to identify a specific OpenCL
     *  device. If \a devices argument is NULL, this argument is ignored.
     *
     *  \return One of the following values:
     *    - CL_INVALID_DEVICE_TYPE if \a type is not a valid value.
     *    - CL_DEVICE_ NOT_FOUND if no OpenCL devices that matched \a device_type
     *      were found.
     *    - CL_SUCCESS if the function is executed successfully.
     *
     *  The application can query specific capabilities of the OpenCL device(s)
     *  returned by cl::getDevices. This can be used by the application to
     *  determine which device(s) to use.
     *
     * \note In the case that exceptions are enabled and a return value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int getDevices(
        cl_device_type type,
        VECTOR_CLASS<Device>* devices) const
    {
        cl_uint n = 0;
        cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n);
        if (err != CL_SUCCESS) {
            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
        }

        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
        err = ::clGetDeviceIDs(object_, type, n, ids, NULL);
        if (err != CL_SUCCESS) {
            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
        }

        devices->assign(&ids[0], &ids[n]);
        return CL_SUCCESS;
    }

#if defined(USE_DX_INTEROP)
   /*! \brief Get the list of available D3D10 devices.
     *
     *  \param d3d_device_source.
	 *
	 *  \param d3d_object.
	 *
	 *  \param d3d_device_set.
     *
     *  \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device
     *  values returned in devices can be used to identify a specific OpenCL
     *  device. If \a devices argument is NULL, this argument is ignored.
     *
     *  \return One of the following values:
     *    - CL_SUCCESS if the function is executed successfully.
     *
     *  The application can query specific capabilities of the OpenCL device(s)
     *  returned by cl::getDevices. This can be used by the application to
     *  determine which device(s) to use.
     *
     * \note In the case that exceptions are enabled and a return value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int getDevices(
		cl_d3d10_device_source_khr d3d_device_source,
		void *                     d3d_object,
		cl_d3d10_device_set_khr    d3d_device_set,
        VECTOR_CLASS<Device>* devices) const
    {
		typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)(
		cl_platform_id platform, 
		cl_d3d10_device_source_khr d3d_device_source, 
		void * d3d_object,
		cl_d3d10_device_set_khr d3d_device_set,
		cl_uint num_entries,
		cl_device_id * devices,
		cl_uint* num_devices);

		static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL;
		__INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR);

        cl_uint n = 0;
        cl_int err = pfn_clGetDeviceIDsFromD3D10KHR(
			object_, 
			d3d_device_source, 
			d3d_object,
			d3d_device_set, 
			0, 
			NULL, 
			&n);
        if (err != CL_SUCCESS) {
            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
        }

        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
        err = pfn_clGetDeviceIDsFromD3D10KHR(
			object_, 
			d3d_device_source, 
			d3d_object,
			d3d_device_set,
			n, 
			ids, 
			NULL);
        if (err != CL_SUCCESS) {
            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
        }

        devices->assign(&ids[0], &ids[n]);
        return CL_SUCCESS;
    }
#endif

    static cl_int get(
        VECTOR_CLASS<Platform>* platforms)
    {
        cl_uint n = 0;
        cl_int err = ::clGetPlatformIDs(0, NULL, &n);
        if (err != CL_SUCCESS) {
            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
        }

        cl_platform_id* ids = (cl_platform_id*) alloca(
            n * sizeof(cl_platform_id));
        err = ::clGetPlatformIDs(n, ids, NULL);
        if (err != CL_SUCCESS) {
            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
        }

        platforms->assign(&ids[0], &ids[n]);
        return CL_SUCCESS;
    }
};

/*! \brief Allow to runtime to release the resources allocated by the OpenCL
 *  compiler.
 *
 *  This is a hint from the application and does not guarantee that the compiler
 *  will not be used in the future or that the compiler will actually be
 *  unloaded by the implementation.
 *
 *  \return This call currently always returns CL_SUCCESS
 *
 */
static inline cl_int
UnloadCompiler()
{
    return ::clUnloadCompiler();
}

/*! \class Context
 * \brief Context interface for cl_context.
 */
class Context : public detail::Wrapper<cl_context>
{
public:
    /*! \brief Construct an OpenCL context.
     *
     *  An OpenCL context is created with one or more devices. Contexts are used by
     *  the OpenCL runtime for managing objects such as command-queues, memory,
     *  program and kernel objects and for executing kernels on one or more devices
     *  specified in the context.
     *
     *
     *  \param devices is a pointer to a list of unique devices returned by
     *  clGetDevices. If more than one device is specified in devices,
     *  a selection criteria may be applied to determine if the list of devices
     *  specified can be used together to create a context.
     *
     *  \param properties is reserved and must be zero, which is its default
     *  value.
     *
     *  \param pfn_notify is a callback function that can be registered by the
     *  application. This callback function will be used by the runtime to
     *  report information on errors that occur in this context. This callback
     *  function may be called asynchronously by the runtime. If \a pfn_notify
     *  is NULL, its default value, no callback function is registered.
     *
     *  \param user_data will be passed as the data argument when
     *  \a pfn_notify is called. \a data can be NULL, which is the default value.
     *
     *  \param err will return an appropriate error code.
     *  If \a err is NULL, its default value, no error code is returned.
     *
     *  \return A valid non-zero context and errcode_ret is set to CL_SUCCESS
     *  if the context is created successfully or NULL with the following
     *  error values stored in \a errcode_ret:
     *    - CL_INVALID_VALUE if \a properties is not zero.
     *    - CL_INVALID_DEVICE if \a devices contains an invalid device.
     *    - CL_INVALID_DEVICE_LIST if more than one device is specified in
     *      \a devices and the list of devices specified cannot be used together
     *      to create a context.
     *    - CL_DEVICE_NOT_AVAILABLE if a device in \a devices is currently not
     *      available even though the device was returned by clGetDevices.
     *    - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *      required by the runtime.
     *
     * \note In the case that exceptsions are enabled and a return value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    Context(
        const VECTOR_CLASS<Device>& devices,
        cl_context_properties* properties = NULL,
        void (CL_CALLBACK * notifyFptr)(
            const char *,
            const void *,
            ::size_t,
            void *) = NULL,
        void* data = NULL,
        cl_int* err = NULL)
    {
        cl_int error;
        object_ = ::clCreateContext(
            properties, (cl_uint) devices.size(),
            (cl_device_id*) &devices.front(),
            notifyFptr, data, &error);

        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    /*! \brief Create an OpenCL context from a device type that identifies the
     *  specific device(s) to use.
     *
     *  \param type is a bit-field that identifies the type of device.
     *
     *  \param properties is reserved and must be zero.
     *
     *  \param pfn_notify described in previous definition of Context
     *  constructor.
     *
     *  \param data described in previous definition of Context
     *  constructor.
     *
     *  \param err will return an appropriate error code. If \a err
     *  is NULL, its default value, no error code is returned.
     *
     *  \return A valid non-zero context and errcode_ret is set to CL_SUCCESS
     *  if the context is created successfully or NULL with the following error
     *  values stored in errcode_ret:
     *    - CL_INVALID_VALUE if \a properties is not zero.
     *    - CL_INVALID_DEVICE_TYPE if \a device_type is not a valid value.
     *    - CL_DEVICE_NOT_AVAILABLE if no devices that match \a device_type
     *      are currently available.
     *    - CL_DEVICE_NOT_FOUND if no devices that match \a device_type were found.
     *    - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *      required by the runtime.
     *
     * \note In the case that exceptions are enabled and a return value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    Context(
        cl_device_type type,
        cl_context_properties* properties = NULL,
        void (CL_CALLBACK * notifyFptr)(
            const char *,
            const void *,
            ::size_t,
            void *) = NULL,
        void* data = NULL,
        cl_int* err = NULL)
    {
        cl_int error;
        object_ = ::clCreateContextFromType(
            properties, type, notifyFptr, data, &error);

        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; context is not valid at this point.
    Context() : detail::Wrapper<cl_type>() { }

    /*!
     * \brief Construct a new context from a valid context.
     *
     * \param context The context object used for creation.
    */
    Context(const Context& context) : detail::Wrapper<cl_type>(context) { }

    /*!
     * \brief Assign a context to context.
     *
     * \param rhs the context object on rhs of the assignment.
     */
    Context& operator = (const Context& rhs)
    {
        if (this != &rhs) {
            detail::Wrapper<cl_type>::operator=(rhs);
        }
        return *this;
    }

    /*! \brief Query information about a context.
     *
     *  \param context specifies the OpenCL context being queried.
     *
     *  \param name is an enum that specifies the information to query.
     *
     *  \param param  is a pointer to memory where the appropriate result being
     *  queried is returned. If \a param_value is NULL, it is ignored.
     *
     *  \return One of the following values:
     *    - CL_INVALID_CONTEXT if context is not a valid context.
     *    - CL_INVALID_VALUE if \a param_name is not one of the supported
     *      values.
     *    - CL_SUCCESS if the function is executed successfully.
     *
     * \note In the case that exceptions are enabled and a return value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int getInfo(cl_context_info name, T* param) const
    {
        return detail::errHandler(
            detail::getInfo(&::clGetContextInfo, object_, name, param),
            __GET_CONTEXT_INFO_ERR);
    }

    /*! \brief Query information about a context.
     *
     *  \param context specifies the OpenCL context being queried.
     *
     *  \param name is an enum that specifies the information to query.
     *
     * * \param err pointer to memory location where error value will be
     * returned. If not null, the default value, then one of the following
     * values is returned:
     *    - CL_INVALID_CONTEXT if context is not a valid context.
     *    - CL_INVALID_VALUE if \a param_name is not one of the supported
     *      values.
     *    - CL_SUCCESS if the function is executed successfully.
     *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and a return value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_context_info, name>::param_type
    getInfo(cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_context_info, name>::param_type param;
        cl_int result = getInfo(name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }

    /*! \brief Query information supported image formats.
     *
     * \param flags is a bit-field that is used to specify allocation and usage
     * information about the image memory object being created
     * \param type describes the image type and must be either
     * CL_MEM_OBJECT_IMAGE2D or CL_MEM_OBJECT_IMAGE3D.
     * \param formats is a pointer to a memory location where the vector of
     * supported image formats are returned. Each vector element describes a
     * cl_image_format structure supported by the OpenCL implementation. If
     * \a formats is NULL, it is ignored.
     *
     * \return One of the following values:
     *    - CL_INVALID_CONTEXT if context is not a valid context.
     *    - CL_INVALID_VALUE if \a flags or \type are not valid.
     *    - CL_SUCCESS if the function is executed successfully.
     *
     * \note In the case that exceptions are enabled and a return value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */

    cl_int getSupportedImageFormats(
        cl_mem_flags flags,
        cl_mem_object_type type,
        VECTOR_CLASS<ImageFormat>* formats) const
    {
        cl_uint numEntries;
        cl_int err = ::clGetSupportedImageFormats(
	   object_, flags,type, 0, NULL, &numEntries);
        if (err != CL_SUCCESS) {
            return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
        }

        ImageFormat* value = (ImageFormat*)
            alloca(numEntries * sizeof(ImageFormat));
        err = ::clGetSupportedImageFormats(
            object_, flags, type, numEntries,
            (cl_image_format*) value, NULL);
        if (err != CL_SUCCESS) {
            return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
        }

        formats->assign(&value[0], &value[numEntries]);
        return CL_SUCCESS;
    }
};

/*! \class Event
 * \brief Event interface for cl_event.
 */
class Event : public detail::Wrapper<cl_event>
{
public:
    //! Default constructor; event is not valid at this point.
    Event() : detail::Wrapper<cl_type>() { }

    /*!
     * \brief Construct a new event from a valid event.
     *
     * \param event The event object used for creation.
    */
    Event(const Event& event) : detail::Wrapper<cl_type>(event) { }

    /*!
     * \brief Assign a event to event.
     *
     * \param rhs the event object on rhs of the assignment.
     */
    Event& operator = (const Event& rhs)
    {
        if (this != &rhs) {
            detail::Wrapper<cl_type>::operator=(rhs);
        }
        return *this;
    }

    /*! \brief Return information about the event.
     *
     *  \param name specifies the information to query.
     *
     *  \param param is a pointer to memory where the appropriate result being
     *  queried is returned. If \a param_value is NULL, it is ignored.
     *
     *  Using cl::Event::getEventInfo to determine if a command identified by
     *  event has finished execution (i.e. CL_EVENT_COMMAND_EXECUTION_STATUS
     *  returns CL_COMPLETE) is not a synchronization point i.e. there are
     *  no guarantees that the memory objects being modified by command
     *  associated with event will be visible to other enqueued commands.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \note In the case that exceptions are enabled and a return value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int getInfo(cl_event_info name, T* param) const
    {
        return detail::errHandler(
            detail::getInfo(&::clGetEventInfo, object_, name, param),
            __GET_EVENT_INFO_ERR);
    }

    /*!
     * \brief \brief Return information about the event.
     *
     * \param \param name specifies the information to query.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_event_info, name>::param_type
    getInfo(cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_event_info, name>::param_type param;
        cl_int result = getInfo(name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }

    /*! \brief Return profiling information for the command associated with
     *         event.
     *
     *  \param name specifies the profiling data to query.
     *
     *  \param param is a pointer to memory where the appropriate result being
     *  queried is returned. If \a param is NULL, it is ignored.
     *
     *  The unsigned 64-bit values returned can be used to measure the time in
     *  nano-seconds consumed by OpenCL commands. OpenCL devices are required to
     *  correctly track time across changes in frequency and p-states. The
     *  CL_DEVICE_PROFILING_TIMER_RESOLUTION specifies the resolution of the timer
     *  i.e. the number of nanoseconds elapsed before the timer is incremented.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is executed successfully and the profiling
     *    information has been recorded
     *  - CL_PROFILING_INFO_NOT_AVAILABLE if the profiling information is currently
     *    not available (because the command identified by event has not completed)
     *  - CL_INVALID_VALUE if \a param_name is not valid.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int getProfilingInfo(cl_profiling_info name, T* param) const
    {
        return detail::errHandler(detail::getInfo(
            &::clGetEventProfilingInfo, object_, name, param),
            __GET_EVENT_PROFILE_INFO_ERR);
    }

    /*!
     * \brief Return profiling information for the command associated with
     *        event.
     *
     * \param name specifies the profiling data to query.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *  - CL_SUCCESS if the function is executed successfully and the profiling
     *    information has been recorded
     *  - CL_PROFILING_INFO_NOT_AVAILABLE if the profiling information is currently
     *    not available (because the command identified by event has not completed)
     *  - CL_INVALID_VALUE if \a param_name is not valid.

     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_profiling_info, name>::param_type
    getProfilingInfo(cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_profiling_info, name>::param_type param;
        cl_int result = getProfilingInfo(name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }

    /*! \brief Wait on the host thread for command identified by event to
     * complete.
     *
     *  A command is considered complete if its execution status is CL_COMPLETE
     *  or a negative value. The events specified in event_list act as
     *  synchronization points.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function was executed successfully.
     */
    cl_int wait() const
    {
        return detail::errHandler(
            ::clWaitForEvents(1, &object_),
            __WAIT_FOR_EVENTS_ERR);
    }

#if defined(CL_VERSION_1_1)
  /*!
     * \brief Register a user callback function.
     *
     * \param type specifies the command execution status for which the callback 
	 * is registered. The command execution callback mask values for which a 
	 * callback can be registered are: CL_COMPLETE. There is no guarantee that 
	 * the callback functions registered for various execution status values for 
	 * an event will be called in the exact order that the execution status of a 
	 * command changes.
	 *
	 * \param pfn_event_notify is the event callback function that can be 
	 * registered by the application. This callback function may be called 
	 * asynchronously by the OpenCL implementation. It is the application�s 
	 * responsibility to ensure that the callback function is thread-safe. 
	 * The parameters to this callback function are:
	 * 
	 *    - event is the event object for which the callback function is invoked.
	 *    - event_command_exec_status represents the execution status of command 
	 *      for which this callback function is invoked. Refer to table 5.15 for 
	 *      the command execution status values. If the callback is called as the 
	 *      result of the command associated with event being abnormally terminated,
	 *      an appropriate error code for the error that caused the termination 
	 *      will be passed to event_command_exec_status instead. 
	 *    - user_data is a pointer to user supplied data.
	 *
	 * \param user_data will be passed as the user_data argument when pfn_notify 
	 * is called. user_data can be NULL.
	 *
	 * \return CL_SUCCESS if successfull otherwise one of the following 
	 *  error values:
	 *
	 *  - CL_INVALID_EVENT if event is not a valid event object or is a user 
	 *    event object created using clCreateUserEvent.
	 *  - CL_INVALID_VALUE if pfn_event_notify is NULL or if 
	 *    command_exec_callback_type is not a valid command execution status.
	 *
	 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources 
	 *    required by the OpenCL implementation on the host.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
	cl_int setCallback(
		cl_int type,
        void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *),		
		void * user_data = NULL)
	{
        return detail::errHandler(
			::clSetEventCallback(
				object_,
				type,
				pfn_notify,
				user_data), 
			__SET_EVENT_CALLBACK_ERR);
	}
#endif

	/*! \brief Wait on the host thread for commands identified by event objects in
	 *  event_list to complete.
	 *
	 *  A command is considered complete if its execution status is CL_COMPLETE or
	 *  a negative value. The events specified in event_list act as synchronization
	 *  points.
	 *
	 * \param events is a vector of events.
	 *
	 *  \return One of the following values:
	 *  - CL_SUCCESS if the function was executed successfully.
	 *  - CL_INVALID_VALUE if size of \a events is zero.
	 *  - CL_INVALID_EVENT if an event in \a events is not valid.
	 *
	 * \note In the case that exceptions are enabled and error value
	 * other than CL_SUCCESS is generated, then cl::Error exception is
	 * generated.
	 */
	static cl_int
	waitForEvents(const VECTOR_CLASS<Event>& events)
	{
		return detail::errHandler(
			::clWaitForEvents(
				(cl_uint) events.size(), (cl_event*)&events.front()),
				__WAIT_FOR_EVENTS_ERR);
	}
};

#if defined(CL_VERSION_1_1)
/*! \class UserEvent
 * \brief User event interface for cl_event.
 */
class UserEvent : public Event
{
public:
    /*! \brief Create a user event object.
     *
     *  \param context is a valid OpenCL context used to create the event object.
     *
     *  \param err will return an appropriate error code.
     *  If \a err is NULL, no error code is returned.
     *
     *  \return A valid non-zero buffer object and \a err is set to
     *  CL_SUCCESS if the buffer object is created successfully or a NULL value
     *  with one of the following error values returned in \a err:
     *  - CL_INVALID_CONTEXT if \a context is not a valid context.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    UserEvent(
        const Context& context,
        cl_int * err = NULL)
    {
        cl_int error;
        object_ = ::clCreateUserEvent(
            context(),
            &error);

        detail::errHandler(error, __CREATE_USER_EVENT_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; buffer is not valid at this point.
	UserEvent() : Event() { }

    /*!
     * \brief Construct a new user event from a valid user event.
     *
     * \param event The event object used for creation.
    */
    UserEvent(const UserEvent& event) : Event(event) { }

    /*!
     * \brief Assign a user event.
     *
     * \param rhs the user event object on rhs of the assignment.
     */
    UserEvent& operator = (const UserEvent& rhs)
    {
        if (this != &rhs) {
            Event::operator=(rhs);
        }
        return *this;
    }

   /*!
     * \brief Set the execution status.
     *
     * \param status specifies the new execution status to be set
	 * and can be CL_COMPLETE or a negative integer value to indicate an error.
	 *
	 * \return CL_SUCCESS if the status is updated successfully or 
     *  one of the following error values:
     *  - CL_INVALID_VALUE if the execution_status is not CL_COMPLETE or a 
	 *    negative integer value.
	 *  - CL_INVALID_OPERATION if the execution_status for event has already 
	 *  been changed by a previous call to setStatus.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
	cl_int setStatus(cl_int status)
	{
        return detail::errHandler(
			::clSetUserEventStatus(object_,status), 
			__SET_USER_EVENT_STATUS_ERR);
	}
};
#endif

/*! \brief Wait on the host thread for commands identified by event objects in
 *  event_list to complete.
 *
 *  A command is considered complete if its execution status is CL_COMPLETE or
 *  a negative value. The events specified in event_list act as synchronization
 *  points.
 *
 * \param events is a vector of events.
 *
 *  \return One of the following values:
 *  - CL_SUCCESS if the function was executed successfully.
 *  - CL_INVALID_VALUE if size of \a events is zero.
 *  - CL_INVALID_EVENT if an event in \a events is not valid.
 *
 * \note In the case that exceptions are enabled and error value
 * other than CL_SUCCESS is generated, then cl::Error exception is
 * generated.
 */
inline static cl_int
WaitForEvents(const VECTOR_CLASS<Event>& events)
{
    return detail::errHandler(
        ::clWaitForEvents(
            (cl_uint) events.size(), (cl_event*)&events.front()),
            __WAIT_FOR_EVENTS_ERR);
}

/*! \class Memory
 * \brief Memory interface for cl_mem.
 */
class Memory : public detail::Wrapper<cl_mem>
{
public:
    //! Default constructor; memory is not valid at this point.
    Memory() : detail::Wrapper<cl_type>() { }

    /*!
     * \brief Construct a new memory from a valid memory.
     *
     * \param memory The memory object used for creation.
    */
    Memory(const Memory& memory) : detail::Wrapper<cl_type>(memory) { }

    /*!
     * \brief Assign a memory to memory.
     *
     * \param rhs the memory object on rhs of the assignment.
     */
    Memory& operator = (const Memory& rhs)
    {
        if (this != &rhs) {
            detail::Wrapper<cl_type>::operator=(rhs);
        }
        return *this;
    }

    /*! \brief Get information that is common to all memory objects (buffer and
     *  image objects)
     *
     *  \param name specifies the information to query.
     *
     *  \param param is a pointer to memory where the appropriate result being
     *  queried is returned. If \a param_value is NULL, it is ignored.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is executed successfully.
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int getInfo(cl_mem_info name, T* param) const
    {
        return detail::errHandler(
            detail::getInfo(&::clGetMemObjectInfo, object_, name, param),
            __GET_MEM_OBJECT_INFO_ERR);
    }

    /*!
     * \brief Get information that is common to all memory objects (buffer and
     *  image objects).
     *
     * \param name specifies the information to query.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_mem_info, name>::param_type
    getInfo(cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_mem_info, name>::param_type param;
        cl_int result = getInfo(name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }

#if defined(CL_VERSION_1_1)
  /*!
     * \brief Register a destructor callback function.
     *
	 * \param pfn_event_notify is the event callback function that can be 
	 * registered by the application. This callback function may be called 
	 * asynchronously by the OpenCL implementation. It is the application�s 
	 * responsibility to ensure that the callback function is thread-safe. 
	 * The parameters to this callback function are:
	 * 
	 *    - memobj is the memory object being deleted.
	 *    - user_data is a pointer to user supplied data.
	 *
	 * \param user_data will be passed as the user_data argument when pfn_notify 
	 * is called. user_data can be NULL.
	 *
	 * \return CL_SUCCESS if successfull otherwise one of the following 
	 *  error values:
	 *
	 * - CL_INVALID_MEM_OBJECT if memobj is not a valid memory object.
	 *
	 * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources 
	 * required by the OpenCL implementation on the host.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
	cl_int setDestructorCallback(
        void (CL_CALLBACK * pfn_notify)(cl_mem, void *),		
		void * user_data = NULL)
	{
        return detail::errHandler(
			::clSetMemObjectDestructorCallback(
				object_,
				pfn_notify,
				user_data), 
			__SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR);
	}
#endif

};

/*! \class Buffer
 * \brief Memory buffer interface.
 */
class Buffer : public Memory
{
public:
    /*! \brief Create a buffer object.
     *
     *  \param context is a valid OpenCL context used to create the buffer object.
     *
     *  \param flags is a bit-field that is used to specify allocation and usage
     *  information such as the memory arena that should be used to allocate the
     *  buffer object and how it will be used.
     *
     *  \param size is the size in bytes of the buffer memory object to be
     *  allocated.
     *
     *  \param host_ptr is a pointer to the buffer data that may already be
     *  allocated by the application. The size of the buffer that host_ptr points
     *  to must be >= \a size bytes. Passing in a pointer to an already allocated
     *  buffer on the host and using it as a buffer object allows applications to
     *  share data efficiently with kernels and the host.
     *
     *  \param err will return an appropriate error code.
     *  If \a err is NULL, no error code is returned.
     *
     *  \return A valid non-zero buffer object and \a err is set to
     *  CL_SUCCESS if the buffer object is created successfully or a NULL value
     *  with one of the following error values returned in \a err:
     *  - CL_INVALID_CONTEXT if \a context is not a valid context.
     *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
     *  - CL_INVALID_BUFFER_SIZE if \a size is 0 or is greater than
     *    CL_DEVICE_MAX_MEM_ALLOC_SIZE value.
     *  - CL_INVALID_HOST_PTR if host_ptr is NULL and CL_MEM_USE_HOST_PTR or
     *    CL_MEM_COPY_HOST_PTR are set in \a flags or if \a host_ptr is not NULL
     *    but CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are not set in
     *    \a flags.
     *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate
     *    memory for buffer object.
     *  - CL_INVALID_OPERATION if the buffer object cannot be created for all
     *    devices in \a context.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    Buffer(
        const Context& context,
        cl_mem_flags flags,
        ::size_t size,
        void* host_ptr = NULL,
        cl_int* err = NULL)
    {
        cl_int error;
        object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error);

        detail::errHandler(error, __CREATE_BUFFER_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; buffer is not valid at this point.
    Buffer() : Memory() { }

    /*!
     * \brief Construct a new buffer from a valid buffer.
     *
     * \param buffer The buffer object used for creation.
    */
    Buffer(const Buffer& buffer) : Memory(buffer) { }

    /*!
     * \brief Assign a buffer to buffer.
     *
     * \param rhs the buffer object on rhs of the assignment.
     */
    Buffer& operator = (const Buffer& rhs)
    {
        if (this != &rhs) {
            Memory::operator=(rhs);
        }
        return *this;
    }

#if defined(CL_VERSION_1_1)
   /*!
     * \brief Create a new buffer object from current.
     *
     * \param flags is a bit-field that is used to specify allocation 
	 * and usage information about the buffer memory object being created.
	 * 
	 * \param buffer_create_type describes the type of buffer object to be 
	 * created. 
	 *
	 * \param buffer_create_info is the buffer descriptor. 
	 *
     *  \param err is  A valid non-zero buffer object and \a err is set to
     *  CL_SUCCESS if the buffer object is created successfully or a NULL value
     *  with one of the following error values returned in \a err:
     *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
     *  - CL_INVALID_VALUE if value specified in \a buffer_create_type is not valid.
     *  - CL_INVALID_VALUE if value(s) specified in \a buffer_create_info
	 *    (for a given \a buffer_create_type) is not a valid or if 
	 *    \a buffer_create_type is NULL.
     *
	 * \return Buffer object, if the creation fails then the object is not valid.
	 *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
	Buffer createSubBuffer(
		cl_mem_flags flags,
		cl_buffer_create_type buffer_create_type,
		const void * buffer_create_info,
		cl_int * err = NULL)
	{
		Buffer result;
		cl_int error;
        result.object_ = ::clCreateSubBuffer(
			object_, 
			flags, 
			buffer_create_type, 
			buffer_create_info, 
			&error);

        detail::errHandler(error, __CREATE_SUBBUFFER_ERR);
        if (err != NULL) {
            *err = error;
        }
	}		
#endif
};

#if defined (USE_DX_INTEROP)
class BufferD3D10 : public Buffer
{
public:
	typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)(
    cl_context context, cl_mem_flags flags, ID3D10Buffer*  buffer,
    cl_int* errcode_ret);

    BufferD3D10(
        const Context& context,
        cl_mem_flags flags,
        ID3D10Buffer* bufobj,
        cl_int * err = NULL)
    {
		static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL;
		__INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR);

        cl_int error;
        object_ = pfn_clCreateFromD3D10BufferKHR(
            context(),
            flags,
            bufobj,
            &error);

        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; buffer is not valid at this point.
	BufferD3D10() : Buffer() { }

    /*!
     * \brief Construct a new D3D10 buffer from a valid D3D10 buffer.
     *
     * \param buffer The buffer object used for creation.
    */
    BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { }

    /*!
     * \brief Assign a D3D10 buffer to D3D10 buffer.
     *
     * \param rhs the D3D10 buffer object on rhs of the assignment.
     */
    BufferD3D10& operator = (const BufferD3D10& rhs)
    {
        if (this != &rhs) {
            Buffer::operator=(rhs);
        }
        return *this;
    }
};
#endif

/*! \class BufferGL
 * \brief Memory buffer interface for GL interop.
 */
class BufferGL : public Buffer
{
public:
    /*! \brief Create a buffer object.
     *
     *  \param context is a valid OpenCL context used to create the buffer object.
     *
     *  \param flags is a bit-field that is used to specify allocation and usage
     *  information such as the memory arena that should be used to allocate the
     *  buffer object and how it will be used.
     *
     *  \param bufobj is the name fo a GL buffer object.
     *
     *  \param err will return an appropriate error code.
     *  If \a err is NULL, no error code is returned.
     *
     *  \return A valid non-zero buffer object and \a err is set to
     *  CL_SUCCESS if the buffer object is created successfully or a NULL value
     *  with one of the following error values returned in \a err:
     *  - CL_INVALID_CONTEXT if \a context is not a valid context.
     *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
     *  - CL_INVALID_GL_OBJECT if bufobj is not a GL buffer object or is a GL
	 *    buffer object but does not have an existing data store.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    BufferGL(
        const Context& context,
        cl_mem_flags flags,
        GLuint bufobj,
        cl_int * err = NULL)
    {
        cl_int error;
        object_ = ::clCreateFromGLBuffer(
            context(),
            flags,
            bufobj,
            &error);

        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; buffer is not valid at this point.
	BufferGL() : Buffer() { }

    /*!
     * \brief Construct a new GL buffer from a valid GL buffer.
     *
     * \param buffer The buffer object used for creation.
    */
    BufferGL(const BufferGL& buffer) : Buffer(buffer) { }

    /*!
     * \brief Assign a GL buffer to GL buffer.
     *
     * \param rhs the GL buffer object on rhs of the assignment.
     */
    BufferGL& operator = (const BufferGL& rhs)
    {
        if (this != &rhs) {
            Buffer::operator=(rhs);
        }
        return *this;
    }

    /*!
     * \brief Report the type of GL buffer used to create the object.
     *
     * \param type type of GL buffer.
	 * \param gl_object_name 
     */
	cl_int getObjectInfo(
		cl_gl_object_type *type,
		GLuint * gl_object_name)
	{
		return detail::errHandler(
			::clGetGLObjectInfo(object_,type,gl_object_name),
            __GET_GL_OBJECT_INFO_ERR);
	}
};

/*! \class BufferRenderGL
 * \brief Memory buffer interface for GL interop with renderbuffer.
 */
class BufferRenderGL : public Buffer
{
public:
    /*! \brief Create a buffer object.
     *
     *  \param context is a valid OpenCL context used to create the buffer object.
     *
     *  \param flags is a bit-field that is used to specify allocation and usage
     *  information such as the memory arena that should be used to allocate the
     *  buffer object and how it will be used.
     *
     *  \param bufobj is the name for a GL render buffer object.
     *
     *  \param err will return an appropriate error code.
     *  If \a err is NULL, no error code is returned.
     *
     *  \return A valid non-zero buffer object and \a err is set to
     *  CL_SUCCESS if the buffer object is created successfully or a NULL value
     *  with one of the following error values returned in \a err:
     *  - CL_INVALID_CONTEXT if \a context is not a valid context.
     *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
     *  - CL_INVALID_GL_OBJECT if bufobj is not a GL render buffer object or is a GL
	 *    render buffer object but does not have an existing data store.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    BufferRenderGL(
        const Context& context,
        cl_mem_flags flags,
        GLuint bufobj,
        cl_int * err = NULL)
    {
        cl_int error;
        object_ = ::clCreateFromGLRenderbuffer(
            context(),
            flags,
            bufobj,
            &error);

        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; buffer is not valid at this point.
	BufferRenderGL() : Buffer() { }

    /*!
     * \brief Construct a new GL buffer from a valid GL buffer.
     *
     * \param buffer The buffer object used for creation.
    */
    BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { }

    /*!
     * \brief Assign a GL buffer to GL buffer.
     *
     * \param rhs the GL buffer object on rhs of the assignment.
     */
    BufferRenderGL& operator = (const BufferRenderGL& rhs)
    {
        if (this != &rhs) {
            Buffer::operator=(rhs);
        }
        return *this;
    }

    /*!
     * \brief Report the type of GL buffer used to create the object.
     *
     * \param type type of GL buffer.
	 * \param gl_object_name .
     */
	cl_int getObjectInfo(
		cl_gl_object_type *type,
		GLuint * gl_object_name)
	{
		return detail::errHandler(
			::clGetGLObjectInfo(object_,type,gl_object_name),
            __GET_GL_OBJECT_INFO_ERR);
	}
};


/*! \class Image
 * \brief Base class  interface for all images.
 */
class Image : public Memory
{
protected:
    //! Default constructor; image is not valid at this point.
    Image() : Memory() { }

    /*!
     * \brief Construct a new image from a valid image.
     *
     * \param image The image object used for creation.
    */
    Image(const Image& image) : Memory(image) { }

    /*!
     * \brief Assign a image to image.
     *
     * \param rhs the image object on rhs of the assignment.
     */
    Image& operator = (const Image& rhs)
    {
        if (this != &rhs) {
            Memory::operator=(rhs);
        }
        return *this;
    }
public:
    /*! \brief Get information specific to an image object.
     *
     *  \param name specifies the information to query.
     *
     *  \param param is a pointer to memory where the appropriate result being
     *  queried is returned. If \a param_value is NULL, it is ignored.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a param_name is not valid.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int getImageInfo(cl_image_info name, T* param) const
    {
        return detail::errHandler(
            detail::getInfo(&::clGetImageInfo, object_, name, param),
            __GET_IMAGE_INFO_ERR);
    }

    /*!
     * \brief Get information specific to an image object.
     *
     * \param name specifies the information to query.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_image_info, name>::param_type
    getImageInfo(cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_image_info, name>::param_type param;
        cl_int result = getImageInfo(name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }
};

/*! \class Image2D
 * \brief Image interface for 2D images.
 */
class Image2D : public Image
{
public:
    /*! \brief Create a (1D, or 2D) image object.
     *
     *  \param context is a valid OpenCL context on which the image object is
     *  to be created.
     *
     *  \param flags is a bit-field that is used to specify allocation and usage
     *  information about the image memory object being created.
     *
     *  \param format is a pointer to a structure that describes format
     *  properties of the image to be allocated.
     *
     *  \param width is the width of the image in pixels. Must be greater
     *  than or equal to 1.
     *
     *  \param height is the height of the image in pixels. Must be greater
     *  than or equal to 1.
     *
     *  \param row_pitch is the scan-line pitch in bytes. This must be 0 if
     *  \a host_ptr is NULL and can be either 0 or >= \a width * size of
     *  element in bytes if \a host_ptr is not NULL. If \a host_ptr is not NULL
     *  and \a row_pitch = 0, \a row_pitch is calculated as
     *  \a width * size of element in bytes.
     *
     *  \param host_ptr is a pointer to the image data that may already be
     *  allocated by the application. The size of the buffer that \a host_ptr
     *  points to must be >= \a row_pitch * \a height. The size of each element
     *  in bytes must be a power of 2. Passing in a pointer to an already
     *  allocated buffer on the host and using it as a memory object allows
     *  applications to share data efficiently with kernels and the host.
     *
     *  \param err will return an appropriate error code. If \a err
     *  is NULL, no error code is returned.
     *
     *  \return A valid non-zero image object and errcode_ret is set to CL_SUCCESS
     *  if the image object is created successfully. It returns a NULL value with
     *  one of the following error values returned in \a err:
     *  - CL_INVALID_CONTEXT if \a context is not a valid context.
     *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
     *  - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if values specified in \a format
     *    are not valid or if \a image_format is NULL.
     *  - CL_INVALID_IMAGE_SIZE if \a image_width or \a height are 0 or if
     *    they exceed values specified in CL_DEVICE_IMAGE2D_MAX_WIDTH or
     *    CL_DEVICE_IMAGE2D_MAX_HEIGHT respectively or if values specified by
     *    \a image_row_pitch do not follow rules described in the argument
     *    description above.
     *  - CL_INVALID_HOST_PTR if \a host_ptr is NULL and CL_MEM_USE_HOST_PTR or
     *    CL_MEM_COPY_HOST_PTR are set in \a flags or if \a host_ptr is not NULL
     *    but CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are not set in \a flags.
     *  - CL_IMAGE_FORMAT_NOT_SUPPORTED if the \a image_format is not supported.
     *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
     *    for image object.
     *  - CL_INVALID_OPERATION if the image object as specified by the
     *    \a image_format, \a flags and dimensions cannot be created for all devices
     *    in context that support images or if there are no devices in context that
     *    support images.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
     *    by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    Image2D(
        const Context& context,
        cl_mem_flags flags,
        ImageFormat format,
        ::size_t width,
        ::size_t height,
        ::size_t row_pitch,
        void* host_ptr = NULL,
        cl_int* err = NULL)
    {
        cl_int error;
        object_ = ::clCreateImage2D(
            context(), flags,&format, width, height, row_pitch, host_ptr, &error);

        detail::errHandler(error, __CREATE_IMAGE2D_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; image is not valid at this point.
    Image2D() { }

    /*!
     * \brief Construct a new image2D from a valid image2D.
     *
     * \param image2D The image2D object used for creation.
    */
    Image2D(const Image2D& image2D) : Image(image2D) { }

    /*!
     * \brief Assign a image2D to image2D.
     *
     * \param rhs the image2D object on rhs of the assignment.
     */
    Image2D& operator = (const Image2D& rhs)
    {
        if (this != &rhs) {
            Image::operator=(rhs);
        }
        return *this;
    }
};

/*! \class Image2DGL
 * \brief 2D image interface for GL interop.
 */
class Image2DGL : public Image2D
{
public:
    /*! \brief Create a 2D image object.
     *
     *  \param context is a valid OpenCL context used to create the buffer object.
     *
     *  \param flags is a bit-field that is used to specify allocation and usage
     *  information such as the memory arena that should be used to allocate the
     *  buffer object and how it will be used.
     *
	 *  \param target.
	 *
     *  \param miplevel is the level for the incomming texture.
	 *
     *  \param texobj is the name fo a GL buffer object.
     *
     *  \param err will return an appropriate error code.
     *  If \a err is NULL, no error code is returned.
     *
     *  \return A valid non-zero buffer object and \a err is set to
     *  CL_SUCCESS if the buffer object is created successfully or a NULL value
     *  with one of the following error values returned in \a err:
     *  - CL_INVALID_CONTEXT if \a context is not a valid context.
     *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
     *  - CL_INVALID_GL_OBJECT if bufobj is not a GL buffer object or is a GL
	 *    buffer object but does not have an existing data store.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    Image2DGL(
        const Context& context,
        cl_mem_flags flags,
		GLenum target,
		GLint  miplevel,
        GLuint texobj,
        cl_int * err = NULL)
    {
        cl_int error;
        object_ = ::clCreateFromGLTexture2D(
            context(),
            flags,
			target,
			miplevel,
            texobj,
            &error);

        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; image is not valid at this point.
	Image2DGL() : Image2D() { }

    /*!
     * \brief Construct a new CL 2D image from a valid GL 2D texture.
     *
     * \param image The buffer object used for creation.
    */
    Image2DGL(const Image2DGL& image) : Image2D(image) { }

    /*!
     * \brief Assign a GL 2D image to GL 2D image buffer.
     *
     * \param rhs the GL buffer object on rhs of the assignment.
     */
    Image2DGL& operator = (const Image2DGL& rhs)
    {
        if (this != &rhs) {
            Image2D::operator=(rhs);
        }
        return *this;
    }
};

/*! \class Image3D
 * \brief Image interface for 3D images.
 */
class Image3D : public Image
{
public:
    /*! \brief Create a 3D image object.
     *
     *  \param context is a valid OpenCL context on which the image object is to be
     *  created.
     *
     *  \param flags is a bit-field that is used to specify allocation and usage
     *  information about the image memory object being created.
     *
     *  \param format is a pointer to a structure that describes format
     *  properties of the image to be allocated.
     *
     *  \param width is the width of the image in pixels. Must be greater
     *  than or equal to 1.
     *
     *  \param height is the height of the image in pixels. Must be greater
     *  than or equal to 1.
     *
     *  \param depth is the depth of the image in pixels. This must be a
     *  value > 1.
     *
     *  \param row_pitch is the scan-line pitch in bytes. This must be 0 if
     *  \a host_ptr is NULL and can be either 0 or >= \a width * size of
     *  element in bytes if \a host_ptr is not NULL. If \a host_ptr is not NULL and
     *  \a row_pitch = 0, \a row_pitch is calculated as
     *  \a width * size of element in bytes.
     *
     *  \param slice_pitch is the size in bytes of each 2D slice in the 3D
     *  image. This must be 0 if \a host_ptr is NULL and can be either 0 or >=
     *  \a row_pitch * \a height if \a host_ptr is not NULL.
     *  If \a host_ptr is not NULL and \a image_slice_pitch = 0,
     *  \a slice_pitch is calculated as \a row_pitch * \a height.
     *
     *  \param host_ptr is a pointer to the image data that may already be allocated
     *  by the application. The size of the buffer that \a host_ptr points to must
     *  be >= \a row_pitch * \a height * \a depth. The size of
     *  each element in bytes must be a power of 2. Passing in a pointer to an
     *  already allocated buffer on the host and using it as a memory object allows
     *  applications to share data efficiently with kernels and the host.
     *
     *  \param err will return an appropriate error code. If \a err
     *  is NULL, no error code is returned.
     *
     *  \return valid non-zero image object created and the \a err is set to
     *  CL_SUCCESS if the image object is created successfully. It returns a NULL
     *  value with one of the following error values returned in \a err:
     *  - CL_INVALID_CONTEXT if \a context is not a valid context.
     *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
     *  - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if values specified in \a format
     *    are not valid or if \a format is NULL.
     *  - CL_INVALID_IMAGE_SIZE if \a width, \a height or \a depth
     *    are 0 or if they exceed values specified in CL_DEVICE_IMAGE3D_MAX_WIDTH,
     *    CL_DEVICE_IMAGE3D_MAX_HEIGHT or CL_DEVICE_IMAGE3D_MAX_DEPTH respectively
     *    or if values specified by \a row_pitch and \a slice_pitch do
     *    not follow rules described in the argument description above.
     *  - CL_INVALID_HOST_PTR if \a host_ptr is NULL and CL_MEM_USE_HOST_PTR or
     *    CL_MEM_COPY_HOST_PTR are set in \a flags or if \a host_ptr is not NULL but
     *    CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are not set in \a flags.
     *  - CL_IMAGE_FORMAT_NOT_SUPPORTED if the \a format is not supported.
     *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
     *    for image object.
     *  - CL_INVALID_OPERATION if the image object as specified by the
     *    \a image_format, \a flags and dimensions cannot be created for all devices
     *    in context that support images, or if there are no devices in context that
     *    support images.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
     *    by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    Image3D(
        const Context& context,
        cl_mem_flags flags,
        ImageFormat format,
        ::size_t width,
        ::size_t height,
        ::size_t depth,
        ::size_t row_pitch,
        ::size_t slice_pitch,
        void* host_ptr = NULL,
        cl_int* err = NULL)
    {
        cl_int error;
        object_ = ::clCreateImage3D(
            context(), flags, &format, width, height, depth, row_pitch,
            slice_pitch, host_ptr, &error);

        detail::errHandler(error, __CREATE_IMAGE3D_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; image is not valid at this point.
    Image3D() { }

    /*!
     * \brief Construct a new image3D from a valid image3D.
     *
     * \param image3D The image3D object used for creation.
    */
    Image3D(const Image3D& image3D) : Image(image3D) { }

    /*!
     * \brief Assign a image3D to image3D.
     *
     * \param rhs the image3D object on rhs of the assignment.
     */
    Image3D& operator = (const Image3D& rhs)
    {
        if (this != &rhs) {
            Image::operator=(rhs);
        }
        return *this;
    }
};

/*! \class Image2DGL
 * \brief 2D image interface for GL interop.
 */
class Image3DGL : public Image3D
{
public:
    /*! \brief Create a 3D image object.
     *
     *  \param context is a valid OpenCL context used to create the buffer object.
     *
     *  \param flags is a bit-field that is used to specify allocation and usage
     *  information such as the memory arena that should be used to allocate the
     *  buffer object and how it will be used.
     *
	 *  \param target.
	 *
     *  \param miplevel is the level for the incomming texture.
	 *
     *  \param texobj is the name fo a GL buffer object.
     *
     *  \param err will return an appropriate error code.
     *  If \a err is NULL, no error code is returned.
     *
     *  \return A valid non-zero buffer object and \a err is set to
     *  CL_SUCCESS if the buffer object is created successfully or a NULL value
     *  with one of the following error values returned in \a err:
     *  - CL_INVALID_CONTEXT if \a context is not a valid context.
     *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
     *  - CL_INVALID_GL_OBJECT if bufobj is not a GL buffer object or is a GL
	 *    buffer object but does not have an existing data store.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    Image3DGL(
        const Context& context,
        cl_mem_flags flags,
		GLenum target,
		GLint  miplevel,
        GLuint texobj,
        cl_int * err = NULL)
    {
        cl_int error;
        object_ = ::clCreateFromGLTexture3D(
            context(),
            flags,
			target,
			miplevel,
            texobj,
            &error);

        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; image is not valid at this point.
	Image3DGL() : Image3D() { }

    /*!
     * \brief Construct a new CL 2D image from a valid GL 2D texture.
     *
     * \param image The buffer object used for creation.
    */
    Image3DGL(const Image3DGL& image) : Image3D(image) { }

    /*!
     * \brief Assign a GL 2D image to GL 2D image buffer.
     *
     * \param rhs the GL buffer object on rhs of the assignment.
     */
    Image3DGL& operator = (const Image3DGL& rhs)
    {
        if (this != &rhs) {
            Image3D::operator=(rhs);
        }
        return *this;
    }
};

/*! \class Sampler
 * \brief Sampler interface for cl_sampler.
 */
class Sampler : public detail::Wrapper<cl_sampler>
{
public:
    //! Default constructor.
    Sampler() { }

	/*! \brief Create a sampler object.
     *
     */
    Sampler(
        const Context& context,
        cl_bool normalized_coords,
        cl_addressing_mode addressing_mode,
		cl_filter_mode filter_mode,
        cl_int* err = NULL)
    {
        cl_int error;
        object_ = ::clCreateSampler(
            context(), 
			normalized_coords,
			addressing_mode,
			filter_mode,
			&error);

        detail::errHandler(error, __CREATE_SAMPLER_ERR);
        if (err != NULL) {
            *err = error;
        }
    }


    /*!
     * \brief Construct a new sampler from a valid sampler.
     *
     * \param sampler The sampler object used for creation.
    */
    Sampler(const Sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }

    /*!
     * \brief Assign a sampler to sampler.
     *
     * \param rhs the sampler object on rhs of the assignment.
     */
    Sampler& operator = (const Sampler& rhs)
    {
        if (this != &rhs) {
            detail::Wrapper<cl_type>::operator=(rhs);
        }
        return *this;
    }

    /*! \brief Return information about the sampler object.
     *
     *  \param name specifies the information to query.
     *
     *  \param param is a pointer to memory where the appropriate result
     *  being queried is returned. If \a param_value is NULL, it is ignored.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is executed successfully.
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int getInfo(cl_sampler_info name, T* param) const
    {
        return detail::errHandler(
            detail::getInfo(&::clGetSamplerInfo, object_, name, param),
            __GET_SAMPLER_INFO_ERR);
    }

    /*!
     * \brief Return information about the sampler object.
     *
     * \param name specifies the information to query.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_sampler_info, name>::param_type
    getInfo(cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_sampler_info, name>::param_type param;
        cl_int result = getInfo(name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }
};

class Program;
class CommandQueue;
class Kernel;

/*! \class NDRange
 * \brief NDRange interface
 */
class NDRange
{
private:
    size_t<3> sizes_;
    cl_uint dimensions_;

public:
    NDRange()
        : dimensions_(0)
    { }

    //! Create a 1D range
    NDRange(::size_t size0)
        : dimensions_(1)
    {
        sizes_.push_back(size0);
    }

    //! Create a 2D range
    NDRange(::size_t size0, ::size_t size1)
        : dimensions_(2)
    {
        sizes_.push_back(size0);
        sizes_.push_back(size1);
    }

    //! Create a 3D range
    NDRange(::size_t size0, ::size_t size1, ::size_t size2)
        : dimensions_(3)
    {
        sizes_.push_back(size0);
        sizes_.push_back(size1);
        sizes_.push_back(size2);
    }

    operator const ::size_t*() const { return (const ::size_t*) sizes_; }
    ::size_t dimensions() const { return dimensions_; }
};

//! Null range object
static const NDRange NullRange;

/*!
 * \struct LocalSpaceArg
 * \brief Local address raper for use with Kernel::setArg
 */
struct LocalSpaceArg
{
    ::size_t size_;
};

//! \cond DOXYGEN_DETAIL
namespace detail {

template <typename T>
struct KernelArgumentHandler
{
    static ::size_t size(const T&) { return sizeof(T); }
    static T* ptr(T& value) { return &value; }
};

template <>
struct KernelArgumentHandler<LocalSpaceArg>
{
    static ::size_t size(const LocalSpaceArg& value) { return value.size_; }
    static void* ptr(LocalSpaceArg&) { return NULL; }
};

} // namespace detail
//! \endcond

/*! \brief Create a local address space argument
 *
 * \param size is the size in bytes of the memory to be allocated in the
 * __local memory space.
 *
 * \return A local address space argument, of \a size bytes, that can
 * be used as an argument to Kernel::setArgs or to a KernelFunctor(...) call.
 */
inline LocalSpaceArg
__local(::size_t size)
{
    LocalSpaceArg ret = { size };
    return ret;
}

class KernelFunctor;

/*! \class Kernel
 * \brief Kernel interface that implements cl_kernel
 */
class Kernel : public detail::Wrapper<cl_kernel>
{
public:

    /*! \brief Create a kernel object.
     *
     *  \param program is a program object with a successfully built executable.
     *
     *  \param name is a function name in the program declared with the
     *  __kernel qualifer.
     *
     *  \param err will return an appropriate error code. If \a err
     *  is NULL, no error code is returned.
     *
     *  \return A valid non-zero kernel object and \a err is set to
     *  CL_SUCCESS if the kernel object is created successfully. It returns a
     *  NULL value with one of the following error values returned in \a err:
     *  - CL_INVALID_PROGRAM if \a program is not a valid program object
     *  - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built
     *    executable for \a program.
     *  - CL_INVALID_KERNEL_NAME if \a name is not found in \a program.
     *  - CL_INVALID_KERNEL_DEFINITION if the function definition for __kernel
     *    function given by \a name such as the number of arguments, the
     *    argument types are not the same for all devices for which the program
     *    executable has been built.
     *  - CL_INVALID_VALUE if \a name is NULL.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    inline Kernel(const Program& program, const char* name, cl_int* err = NULL);

    //! Default constructor; kernel is not valid at this point.
    Kernel() { }

    /*!
     * \brief Construct a new kernel from a valid kernel.
     *
     * \param kernel The kernel object used for creation.
    */
    Kernel(const Kernel& kernel) : detail::Wrapper<cl_type>(kernel) { }

    /*!
     * \brief Assign a kernel to kernel.
     *
     * \param rhs the kernel object on rhs of the assignment.
     */
    Kernel& operator = (const Kernel& rhs)
    {
        if (this != &rhs) {
            detail::Wrapper<cl_type>::operator=(rhs);
        }
        return *this;
    }

    /*! \brief Return information about the kernel object.
     *
     *  \param name specifies the information to query.
     *
     *  \param param is a pointer to memory where the appropriate result
     *  being queried is returned. If \a param is NULL, it is ignored.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a param is not valid.
     *  - CL_INVALID_KERNEL if \a kernel is a not a valid kernel object.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int getInfo(cl_kernel_info name, T* param) const
    {
        return detail::errHandler(
            detail::getInfo(&::clGetKernelInfo, object_, name, param),
            __GET_KERNEL_INFO_ERR);
    }

    /*!
     *  \brief Return information about the kernel object.
     *
     * \param name specifies the information to query.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_kernel_info, name>::param_type
    getInfo(cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_kernel_info, name>::param_type param;
        cl_int result = getInfo(name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }

    /*! \brief Return information about the kernel object that may be specific
     *  to a device.
     *
     *  \param device identifies a specific device in the list of devices
     *  associated with \a kernel. The list of devices is the list of devices
     *  in the OpenCL context that is associated with \a kernel.
     *
     *  \param name specifies the information to query
     *
     *  \param param is a pointer to memory where the appropriate result being
     *  queried is returned. If \a param is NULL, it is ignored.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is executed successfully,
     *  - CL_INVALID_DEVICE if \a device is not in the list of devices
     *    associated with \a kernel.
     *  - CL_INVALID_VALUE if \a name is not valid.
     *  - CL_INVALID_KERNEL if \a kernel is a not a valid kernel object.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int getWorkGroupInfo(
        const Device& device, cl_kernel_work_group_info name, T* param) const
    {
        return detail::errHandler(
            detail::getInfo(
                &::clGetKernelWorkGroupInfo, object_, device(), name, param),
                __GET_KERNEL_WORK_GROUP_INFO_ERR);
    }

    /*!
     *  \brief Return information about the kernel object that may be specific
     *  to a device.
     *
     * \param name specifies the information to query.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_kernel_work_group_info, name>::param_type
	getWorkGroupInfo(const Device& device, cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_kernel_work_group_info, name>::param_type param;
        cl_int result = getWorkGroupInfo(device, name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }

    /*! \brief Set the argument value for a specific argument of a kernel.
     *
     *  \param index is the argument index. Arguments to the kernel are referred
     *  by indices that go from 0 for the leftmost argument to n - 1,
     *  where n is the total number of arguments declared by a kernel.
     *
     *  \param value is the data to be used as the argument
     *  value for argument specified by \a index. If the argument is
     *  a memory object (buffer or image), the \a value entry will be a pointer
     *  to the appropriate buffer or image object. The memory object must be
     *  created with the context associated with the kernel object. If the
     *  argument is declared with the __local qualifier, the \a value must
     *  be a of type detail::LocalSpaceArg (use __local helper function to build
     *  a value of this type). The memory object specified
     *  as argument value must be a buffer object if the argument is declared
     *  to be a pointer of a built-in or user defined type with the __global
     *  or __constant qualifier. If the argument is declared with the
     *  __constant qualifier, the size in bytes of the memory object cannot
     *  exceed CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE and the number of arguments
     *  declared with the __constant qualifier cannot exceed
     *  CL_DEVICE_MAX_CONSTANT_ARGS. The memory object specified as argument
     *  value must be a 2D image object if the argument is declared to be of
     *  type image2d_t. The memory object specified as argument value must be a
     *  3D image object if argument is declared to be of type image3d_t. If the
     *  argument is of type sampler_t, the value entry
     *  must be a pointer to the sampler object.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function was executed successfully
     *  - CL_INVALID_ARG_INDEX if \a arg_index is not a valid argument index.
     *  - CL_INVALID_ARG_VALUE if \a value specified is of type
     *    detail::LocalSpaceArg for an argument that is not declared with the
     *    __local qualifier or vice-versa.
     *  - CL_INVALID_MEM_OBJECT for an argument declared to be a memory object
     *    but the specified \a arg_value is not a valid memory object.
     *  - CL_INVALID_SAMPLER for an argument declared to be of type sampler_t but
     *    the specified \a arg_value is not a valid sampler object.
     *  - CL_INVALID_ARG_SIZE if \a argument size does not match the size of
     *    the data type for an argument that is not a memory object or if the
     *    argument is a memory object and \a arg_size != sizeof(cl_mem) or if
     *    the argument is a sampler and argument size != sizeof(cl_sampler).
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int setArg(cl_uint index, T value)
    {
        return detail::errHandler(
            ::clSetKernelArg(
                object_,
                index,
                detail::KernelArgumentHandler<T>::size(value),
                detail::KernelArgumentHandler<T>::ptr(value)),
            __SET_KERNEL_ARGS_ERR);
    }

    /*! \brief Set the argument value for a specific argument of a kernel.
     *
     *  \param index is the argument index. Arguments to the kernel are referred
     *  by indices that go from 0 for the leftmost argument to n - 1, where n is
     *  the total number of arguments declared by a kernel.
     *
     *  \param value is a pointer to data that should be used as the argument
     *  value for argument specified by \a index. The argument data pointed to
     *  by \a value is copied and the \a value pointer can therefore be
     *  reused by the application after setArg returns. If the argument is
     *  a memory object (buffer or image), the \a value entry will be a pointer
     *  to the appropriate buffer or image object. The memory object must be
     *  created with the context associated with the kernel object. If the
     *  argument is declared with the __local qualifier, the \a value entry must
     *  be NULL. For all other kernel arguments, the \a value entry must be a
     *  pointer to the actual data to be used as argument value. The memory
     *  object specified as argument value must be a buffer object if the
     *  argument is declared to be a pointer of a built-in or user defined type
     *  with the __global or __constant qualifier. If the argument is declared
     *  with the __constant qualifier, the size in bytes of the memory object
     *  cannot exceed CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE and the number of
     *  arguments declared with the __constant qualifier cannot exceed
     *  CL_DEVICE_MAX_CONSTANT_ARGS. The memory object specified as argument
     *  value must be a 2D image object if the argument is declared to be of
     *  type image2d_t. The memory object specified as argument value must be a
     *  3D image object if argument is declared to be of type image3d_t. If the
     *  argument is of type sampler_t, the value entry must be a pointer to the
     *  sampler object.
     *
     *  \param size specifies the size of the argument value. If the argument is
     *  a memory object, the size is the size of the buffer or image object type.
     *  For arguments declared with the __local qualifier, the size specified
     *  will be the size in bytes of the buffer that must be allocated for the
     *  __local argument. If the argument is of type sampler_t, the \a size
     *  value must be equal to sizeof(cl_sampler). For all other arguments, the
     *  size will be the size of argument type.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function was executed successfully
     *  - CL_INVALID_ARG_INDEX if \a index is not a valid argument index.
     *  - CL_INVALID_ARG_VALUE if \a value specified is NULL for an argument
     *    that is not declared with the __local qualifier or vice-versa.
     *  - CL_INVALID_MEM_OBJECT for an argument declared to be a memory object
     *    but the specified \a value is not a valid memory object.
     *  - CL_INVALID_SAMPLER for an argument declared to be of type sampler_t
     *    but the specified \a value is not a valid sampler object.
     *  - CL_INVALID_ARG_SIZE if \a size does not match the size of the data
     *    type for an argument that is not a memory object or if the argument is
     *    a memory object and \a size != sizeof(cl_mem) or if \a size is zero
     *    and the argument is declared with the __local qualifier or if the
     *    argument is a sampler and arg_size != sizeof(cl_sampler).
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int setArg(cl_uint index, ::size_t size, void* argPtr)
    {
        return detail::errHandler(
            ::clSetKernelArg(object_, index, size, argPtr),
            __SET_KERNEL_ARGS_ERR);
    }

    /*! \brief Bind a kernel to a command-queue and launch dimensions.
     *
     *  \param queue is the command-queue to bind with.
     *
     *  \param offset must currently be  a  NullRange value. In  a future
     *  revision of OpenCL, \a global_work_offset can be used to specify an
     *  array of \a work_dim unsigned values that describe the offset used to
     *  calculate the global ID of a work-item instead of having the global IDs
     *  always start at offset (0, 0, 0).
     *
     *  \param global describes  the number of global work-items in will execute
     *  the  kernel  function. The  total  number  of  global
     *  work-items is computed as global_work_size[0] * ...
     *  * global_work_size[work_dim - 1].
     *
     *  \param local describes the number of work-items that  make  up  a
     *  work-group (also referred to as the size of the work-group) that
     *  will execute the  kernel specified by kernel.
     *
     *  \return A KernelFunctor object that when called with the appropriate
     *  number of arguments, as defined by kernel itself, will be launched
     *  with the corresponding queue, offset, global, and local values.
     */
    KernelFunctor bind(
        const CommandQueue& queue,
        const NDRange& offset,
        const NDRange& global,
        const NDRange& local);

    /*! \brief Bind a kernel to a command-queue and launch dimensions.
     *
     * \param queue is the command-queue to bind with.
     *
     *  \param global describes  the number of global work-items in will execute
     *  the  kernel  function. The  total  number  of  global
     *  work-items is computed as global_work_size[0] * ...
     *  * global_work_size[work_dim - 1].
     *
     *  \param local describes the number of work-items that  make  up  a
     *  work-group (also referred to as the size of the work-group) that
     *  will execute the  kernel specified by kernel.
     *
     *  /return A KernelFunctor object that when called with the appropriate
     *  number of arguments, as defined by kernel itself, will be launched
     *  with the corresponding queue, offset=NullRange, global, and local values.
     */
    KernelFunctor bind(
        const CommandQueue& queue,
        const NDRange& global,
        const NDRange& local);
};


/*! \class Program
 * \brief Program interface that implements cl_program.
 */
class Program : public detail::Wrapper<cl_program>
{
public:
    typedef VECTOR_CLASS<std::pair<const void*, ::size_t> > Binaries;
    typedef VECTOR_CLASS<std::pair<const char*, ::size_t> > Sources;

    /*! \brief Create a program object for a context, and loads the source code
     *  specified by the text strings in the strings array into the program
     *  object.
     *
     *  \param context must be a valid OpenCL context.
     *
     *  \param sources is the source code.
     *
     *  \param err will return an appropriate error code. If \a err
     *  is NULL, no error code is returned. The error code returned will be
     *  one of:
     *   - CL_SUCCESS if the program object is created successfully.
     *   - CL_INVALID_CONTEXT if \a context is not a valid context.
     *   - CL_COMPILER_NOT_AVAILABLE if a compiler is not available.
     *   - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *   required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    Program(
        const Context& context,
        const Sources& sources,
        cl_int* err = NULL)
    {
        cl_int error;

		const ::size_t n = (::size_t)sources.size();
        ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t));
        const char** strings = (const char**) alloca(n * sizeof(const char*));

        for (::size_t i = 0; i < n; ++i) {
            strings[i] = sources[(int)i].first;
            lengths[i] = sources[(int)i].second;
        }

        object_ = ::clCreateProgramWithSource(
            context(), (cl_uint)n, strings, lengths, &error);

        detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    /*! \brief Create a program object for a context, and loads the binary
     *         images into the program object.
     *
     *  \param context must be a valid OpenCL context.
     *
     *  \param devices is a list of devices that are in context.
     *  The binaries are loaded for devices specified in this list.
     *
     *  \param num_devices is the number of devices listed in \a device_list.
     *
     *  \param device_list The devices associated with the program object. The
     *  list of devices specified by \a device_list must be devices associated
     *  with \a context.
     *
     *  \param lengths is an array of the size in bytes of the program binaries
     *  to be loaded for devices specified by \a device_list.
     *
     *  \param binaries is a program binarie to be loaded
     *  for devices specified by \a device_list. For each device given by
     *  \a device_list[i], the  program binary for that device is
     *  given by \a binaries[i]. The program binaries specified by binaries
     *  contain the bits that describe the program executable that will be run
     *  on the device(s) associated with context. The program binary can consist
     *  of either or both:
     *     - Device-specific executable(s)
     *     - Implementation specific intermediate representation (IR) which will
     *       beconverted to the device-specific executable.
     *
     *  \param binaryStatus returns whether the program binary for each device
     *  specified in \a device_list was loaded successfully or not. It is an
     *  array of \a num_devices entries and returns CL_SUCCESS in \a
     *  binaryStatus[i] if binary was successfully loaded for device specified
     *  by \a devices[i]; otherwise returns CL_INVALID_VALUE if \a lengths[i] is
     *  zero or if \a binaries[i] is a NULL value or CL_INVALID_BINARY in
     *  \a binaryStatus[i] if program binary is not a valid binary for the
     *  specified device. If \a binary_status is NULL, it is ignored.
     *
     *  \param err will return an appropriate error code. If \a err
     *  is NULL, no error code is returned. The error code returned will be
     *  one of:
     *  - CL_SUCCESS if the program object is created successfully.
     *  - CL_INVALID_CONTEXT if \a context is not a valid context.
     *  - CL_INVALID_VALUE if \a devices has a size of zero.
     *  - CL_INVALID_DEVICE if OpenCL devices listed in \a devices are not in
     *    the list of devices associated with \a context,
     *  - CL_INVALID_BINARY if an invalid program binary was encountered for any
     *    device. \a binaryStatus will return specific status for each device.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    Program(
        const Context& context,
        const VECTOR_CLASS<Device>& devices,
        const Binaries& binaries,
        VECTOR_CLASS<cl_int>* binaryStatus = NULL,
        cl_int* err = NULL)
    {
        cl_int error;
        const ::size_t n = binaries.size();
        ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t));
        const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*));

        for (::size_t i = 0; i < n; ++i) {
            images[i] = (const unsigned char*)binaries[(int)i].first;
            lengths[i] = binaries[(int)i].second;
        }

        object_ = ::clCreateProgramWithBinary(
            context(), (cl_uint) devices.size(),
            (cl_device_id*)&devices.front(),
            lengths, images, binaryStatus != NULL
               ? (cl_int*) &binaryStatus->front()
               : NULL, &error);

        detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; program is not valid at this point.
    Program() { }

    /*!
     * \brief Construct a new program from a valid program.
     *
     * \param program The program object used for creation.
    */
    Program(const Program& program) : detail::Wrapper<cl_type>(program) { }

    /*!
     * \brief Assign a program to program.
     *
     * \param rhs the program object on rhs of the assignment.
     */
    Program& operator = (const Program& rhs)
    {
        if (this != &rhs) {
            detail::Wrapper<cl_type>::operator=(rhs);
        }
        return *this;
    }

    /*! \brief Build (compile & link) a program executable from the program
     *   source or binary for all the devices or a specific device(s) in the
     *   OpenCL context associated with program.
     *
     *  OpenCL allows program executables to be built using the sources or
     *  binaries.
     *
     *  \param program is the program object.
     *
     *  \param devices is  a list of devices associated with
     *  \a program. The program executable is built for devices specified in
     *  this list for which a source or binary has been loaded.
     *
     *  \param options is a pointer to a string that describes the build options
     *  to be used for building the program executable. \options can be NULL and
     *  defaults to this value if not given.
     *
     *  \param notifyFptr is a function pointer to a notification routine. The
     *  notification routine allows an application to register a callback
     *  function which will be called when the program executable has been built
     *  (successfully or unsuccessfully). If \a notifyFptr is not NULL,
     *  clBuildProgram does not need to wait for the build to complete and can
     *  return immediately. If \a notifyFptr is NULL, its default value,
     *  build does not return until the build has completed. This callback
     *  function may be called asynchronously by the OpenCL implementation. It
     *  is the application's responsibility to ensure that the callback function
     *  is thread-safe.
     *
     *  \param data will be passed as the argument when \a notifyFptr is
     *  called. \a data can be NULL and is its default value.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is executed successfully.
     *  - CL_INVALID_DEVICE if OpenCL devices listed in \a devices are not in
     *    the list of devices associated with \a program.
     *  - CL_INVALID_BINARY if \a program is created with.
     *    createWithProgramBinary and devices listed in \a devices do not have a
     *    valid program binary loaded.
     *  - CL_INVALID_BUILD_OPTIONS if the build options specified by \a options
     *    are invalid.
     *  - CL_INVALID_OPERATION if the build of a program executable for any of
     *    the devices listed in \a devices by a previous call to build for
     *    \a program has not completed.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int build(
        const VECTOR_CLASS<Device>& devices,
        const char* options = NULL,
        void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
        void* data = NULL) const
    {
        return detail::errHandler(
            ::clBuildProgram(
                object_,
                (cl_uint)
                devices.size(),
                (cl_device_id*)&devices.front(),
                options,
                notifyFptr,
                data),
                __BUILD_PROGRAM_ERR);
    }

    /*! \brief Return information about the program object.
     *
     *  \param name specifies the information to query.
     *
     *  \param param is a pointer to memory where the appropriate result
     *  being queried is returned. If \a param_value is NULL, it is ignored.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is executed successfully.
     *  - CL_INVALID_VALUE if \a name is not valid.
     *  - CL_INVALID_PROGRAM if \a program is a not a valid program object.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int getInfo(cl_program_info name, T* param) const
    {
        return detail::errHandler(
            detail::getInfo(&::clGetProgramInfo, object_, name, param),
            __GET_PROGRAM_INFO_ERR);
    }

    /*!
     *  \brief Return information about the program object.
     *
     * \param name specifies the information to query.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_program_info, name>::param_type
    getInfo(cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_program_info, name>::param_type param;
        cl_int result = getInfo(name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }

    /*! \brief Return build information for each device in the program object.
     *
     *  \param device specifies the device for which build information is being
     *  queried. \a device must be a valid device associated with \a program.
     *
     *  \param name specifies the information to query.
     *
     *  \param param is a pointer to memory where the appropriate result being
     *  queried is returned. If \a param_value is NULL, it is ignored.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is executed successfully.
     *  - CL_INVALID_DEVICE if \a device is not in the list of devices
     *    associated with \a program.
     *  - CL_INVALID_VALUE if \a name is not valid.
     *  - CL_INVALID_PROGRAM if \a program is a not a valid program object.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int getBuildInfo(
        const Device& device, cl_program_build_info name, T* param) const
    {
        return detail::errHandler(
            detail::getInfo(
                &::clGetProgramBuildInfo, object_, device(), name, param),
                __GET_PROGRAM_BUILD_INFO_ERR);
    }

    /*!
     *  \brief Return build information for each device in the program object.
     *
     * \param name specifies the information to query.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_program_build_info, name>::param_type
    getBuildInfo(const Device& device, cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_program_build_info, name>::param_type param;
        cl_int result = getBuildInfo(device, name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }

    /*! \brief Create kernel objects for all kernel functions in program.
     *
     *  Kernel objects may not be created for any __kernel functions in program
     *  that do not have the same function definition across all devices for
     *  which a program executable has been successfully built.
     *
     *  \param kernels is the vector where the kernel objects for kernels in
     *  \a program will be returned. If \a kernels is NULL, it is ignored.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the kernel objects were successfully allocated.
     *  - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built
          executable for any device in \a program.
     *  - CL_INVALID_VALUE if \a kernels is not NULL and its size is less
     *    than the number of kernels in program.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     *  Kernel objects can only be created once you have a program object with a
     *  valid program source or binary loaded into the program object and the
     *  program executable has been successfully built for one or more devices
     *  associated with \a program. No changes to the program executable are
     *  allowed while there are kernel objects associated with a program object.
     *  This means that calls to \a build return CL_INVALID_OPERATION if there
     *  are kernel objects attached to a program object. The OpenCL context
     *  associated with program will be the context associated with kernel.
     *  Devices associated with a program object for which a valid program
     *  executable has been built can be used to execute kernels declared in the
     *  program object.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int createKernels(VECTOR_CLASS<Kernel>* kernels)
    {
        cl_uint numKernels;
        cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels);
        if (err != CL_SUCCESS) {
            return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
        }

        Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel));
        err = ::clCreateKernelsInProgram(
            object_, numKernels, (cl_kernel*) value, NULL);
        if (err != CL_SUCCESS) {
            return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
        }

        kernels->assign(&value[0], &value[numKernels]);
        return CL_SUCCESS;
    }
};

inline Kernel::Kernel(const Program& program, const char* name, cl_int* err)
{
    cl_int error;

    object_ = ::clCreateKernel(program(), name, &error);
    detail::errHandler(error, __CREATE_KERNEL_ERR);

    if (err != NULL) {
        *err = error;
    }

}

/*! \class CommandQueue
 * \brief CommandQueue interface for cl_command_queue.
 */
class CommandQueue : public detail::Wrapper<cl_command_queue>
{
public:
    /*! \brief Create a command-queue on a specific device.
     *
     *  \param context must be a valid OpenCL context.
     *
     *  \param device must be a device associated with context. It can either be
     *  in the list of devices specified when context is created using
     *  cl::Context or have the same device type as device type specified
     *  when context is created using cl::Context.
     *
     *  \param properties specifies a list of properties for the command-queue.
     *
     *  \param err will return an appropriate error code. If \a err
     *  is NULL, its default value, no error code is returned.
     *
     *  \return A valid non-zero command-queue and \a err is set to
     *  CL_SUCCESS if the command-queue is created successfully or a NULL value
     *  with one of the following error values returned \a in err:
     *    - CL_INVALID_CONTEXT if context is not a valid.
     *    - CL_INVALID_DEVICE if device is not a valid device or is not
     *      associated with context
     *    - CL_INVALID_VALUE if values specified in properties are not valid.
     *    - CL_INVALID_QUEUE_PROPERTIES if values specified in properties are
     *      valid but are not supported by the device.
     *    - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *      required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    CommandQueue(
        const Context& context,
        const Device& device,
        cl_command_queue_properties properties = 0,
        cl_int* err = NULL)
    {
        cl_int error;
        object_ = ::clCreateCommandQueue(
            context(), device(), properties, &error);

        detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
        if (err != NULL) {
            *err = error;
        }
    }

    //! Default constructor; command queue is not valid at this point.
    CommandQueue() { }

    /*!
     * \brief Construct a new commandQueue from a valid commandQueue.
     *
     * \param commandQueue The commandQueue object used for creation.
    */
    CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { }

    /*!
     * \brief Assign a commandQueue to commandQueue.
     *
     * \param rhs the commandQueue object on rhs of the assignment.
     */
    CommandQueue& operator = (const CommandQueue& rhs)
    {
        if (this != &rhs) {
            detail::Wrapper<cl_type>::operator=(rhs);
        }
        return *this;
    }

    /*! \brief Query information about a command-queue.
     *
     *  \param name specifies the information to query.
     *
     *  \param param is a pointer to memory where the appropriate result
     *  being queried is returned. If \a param_value is NULL, it is ignored.
     *
     *  \return One of the following values:
     *    - CL_SUCCESS if the function is executed successfully.
     *    - CL_INVALID_VALUE if \a name is not one of the supported
     *      values.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <typename T>
    cl_int getInfo(cl_command_queue_info name, T* param) const
    {
        return detail::errHandler(
            detail::getInfo(
                &::clGetCommandQueueInfo, object_, name, param),
                __GET_COMMAND_QUEUE_INFO_ERR);
    }

    /*!
     * \brief Query information about a command-queue.
     *
     * \param name specifies the information to query.
     *
     * \param err pointer to memory location where error value will be returned.
     * If not null, the default value, then one of the following values is
     * returned:
     *  - CL_SUCCESS if the function is executed successfully
     *  - CL_INVALID_VALUE if \a name is not valid.
     *
     * \return the appropriate values for \em name will be returned.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    template <cl_int name> typename
    detail::param_traits<detail::cl_command_queue_info, name>::param_type
    getInfo(cl_int* err = NULL) const
    {
        typename detail::param_traits<
            detail::cl_command_queue_info, name>::param_type param;
        cl_int result = getInfo(name, &param);
        if (err != NULL) {
            *err = result;
        }
        return param;
    }

    /*! \brief Enable or disable the properties of a command-queue.
     *
     *  \param properties specifies the new command-queue properties to be
     *  applied to \a command_queue.
     *
     *  \param enable determines whether the values specified by properties are
     *  enabled (if enable is CL_TRUE) or disabled (if enable is CL_FALSE) for
     *  the command-queue .
     *
     *  \param old_properties returns the command-queue properties before they
     *  were changed by setProperty. If \a old_properties is NULL, its default,
     *  it is ignored.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the command-queue properties are successfully updated.
     *  - CL_INVALID_VALUE if the values specified in properties are not valid.
     *  - CL_INVALID_QUEUE_PROPERTIES if values specified in properties are
     *    not supported by the device.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int setProperty(
        cl_command_queue_properties properties,
        cl_bool enable,
        cl_command_queue_properties* old_properties = NULL) const
    {
        return detail::errHandler(
            ::clSetCommandQueueProperty(
                object_,
                properties,
                enable,
                old_properties),
                __SET_COMMAND_QUEUE_PROPERTY_ERR);
    }

    /*! \brief Enqueue a command to read from a buffer object to host memory.
     *
     *  \param buffer refers to a valid buffer object.
     *
     *  \param blocking indicates if the read operation is blocking or
     *  nonblocking. If \a blocking is CL_TRUE i.e. the read command is
     *  blocking, enqueueReadBuffer does not return until the buffer data has
     *  been read and copied into memory pointed to by ptr.
     *  If \a blocking is CL_FALSE i.e. the read command is non-blocking,
     *  enqueueReadBuffer queues a non-blocking read command and returns. The
     *  contents of the buffer that ptr points to cannot be used until the read
     *  command has completed. The \a event argument returns an event object
     *  which can be used to query the execution status of the read command.
     *  When the read command has completed, the contents of the buffer that ptr
     *  points to can be used by the application.
     *
     *  \param offset is the offset in bytes in the buffer object to read from
     *  or write to.
     *
     *  \param cb is the size in bytes of data being read or written.
     *
     *  \param ptr is the pointer to buffer in host memory where data is to be
     *  read into or to be written from.
     *
     *  \param events specifies events that need to complete before this
     *  particular command can be executed. If \a events is NULL,
     *  its default, then this particular command does not wait on any event to
     *  complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \param event returns an event object that identifies this particular
     *  read command and can be used to query or queue a wait for this
     *  particular command to complete. \a event can be NULL, its default, in
     *  which case it will not be possible for the application to query the
     *  status of this command or queue a wait for this command to complete.
     *
     *  \return CL_SUCCESS if the function is executed successfully. Otherwise
     *  it returns one of the following errors:
     *  - CL_INVALID_CONTEXT if the context associated with \a command_queue and
     *    \a buffer are not the same.
     *  - CL_INVALID_MEM_OBJECT if \a buffer is not a valid buffer object.
     *  - CL_INVALID_VALUE if the region being read or written specified by
     *    (offset, size) is out of bounds or if \a ptr is a NULL value.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not
     *    valid events.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueReadBuffer(
        const Buffer& buffer,
        cl_bool blocking,
        ::size_t offset,
        ::size_t size,
        void* ptr,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueReadBuffer(
                object_, buffer(), blocking, offset, size,
                ptr,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQUEUE_READ_BUFFER_ERR);
    }

    /*! \brief Enqueue a command to write to a buffer object from host memory.
     *
     *  \param buffer refers to a valid buffer object.
     *
     *  \param blocking indicates if  the  write  operation  is  blocking  or
     *  non-blocking. If \a blocking is CL_TRUE,  the  OpenCL  implementation
     *  copies the data referred to by \a ptr and enqueues the write  operation
     *  in the command-queue. The memory pointed to by \a ptr can  be  reused
     *  by  the application after the enqueueWriteBuffer call returns. If
     *  \a blocking is CL_FALSE, the OpenCL implementation will use \a ptr to
     *  perform a nonblocking write. As the write is non-blocking the
     *  implementation can return immediately. The memory pointed to by \a ptr
     *  cannot be reused by the application after the call returns.
     *  The \a event  argument  returns  an event object which can be used to
     *  query the execution status of  the  write command. When the write
     *  command has completed, the  memory  pointed  to  by \a ptr can then be
     *  reused by the application
     *
     *  \param offset is the offset in bytes in the buffer object to write to.
     *
     *  \param cb is the size in bytes of data being read or written.
     *
     *  \param ptr is the pointer to buffer in host memory where data is to be
     *  read into or to be written from.
     *
     *  \param events specifies events that need to complete  before  this
     *  particular command can be executed. If \a events is NULL, its default,
     *  then this particular command does not wait on any event to  complete.
     *  The events specified in \a event_wait_list act as synchronization
     *  points.
     *
     *  \param event returns an event object that identifies this particular
     *  write command and can be used to query or queue a wait for this
     *  particular command to complete. \a event can be NULL in which case it
     *  will not be possible for the application to query the status of this
     *  command or queue a wait for this command to complete.
     *
     *  \return CL_SUCCESS if the function is executed successfully. Otherwise
     *  it returns one of the following errors:
     *  - CL_INVALID_MEM_OBJECT if \a buffer is not a valid buffer object.
     *  - CL_INVALID_VALUE if the region being read or written specified by
     *    (offset, size) is out of bounds or if \a ptr is a NULL value.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not
     *    valid events.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required
     *    by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueWriteBuffer(
        const Buffer& buffer,
        cl_bool blocking,
        ::size_t offset,
        ::size_t size,
        const void* ptr,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueWriteBuffer(
                object_, buffer(), blocking, offset, size,
                ptr,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
                __ENQUEUE_WRITE_BUFFER_ERR);
    }

    /*! \brief Enqueues a command to copy a buffer object to another
     *
     *  \param src is the source buffer object.
     *
     *  \param dst is the destination buffer object.
     *
     *  \param src_offset refers to the offset where to begin reading data in
     *  \a src.
     *
     *  \param dst_offset refers to the offset where to begin copying data in
     *  \a dst.
     *
     *  \param size refers to the size in bytes to copy.
     *
     *  \param events specifies events that need to complete before this
     *  particular command can be executed. If \a events is NULL,
     *  then this particular command does not wait on  any event to complete.
     *  The events specified in \a event_wait_list act as synchronization
     *  points.
     *
     *  \param event returns an event object that identifies this particular
     *  copy command and can be used to query or queue a wait for this
     *  particular command to complete. \a event can be NULL in which case it
     *  will not be possible for the application to query the status of this
     *  command or queue and wait for this command to complete. enqueueBarrier
     *  can be used instead.
     *
     *  \return CL_SUCCESS if the function is executed successfully. Otherwise
     *  it returns one of the following errors:
     *  - CL_INVALID_CONTEXT if the context associated with \a command_queue,
     *    \a src and \a dst are not the same.
     *  - CL_INVALID_MEM_OBJECT if \a src_buffer and \a dst_buffer are not valid
     *    buffer objects.
     *  - CL_INVALID_VALUE if \a src_offset, \a dst_offset, \a cb,
     *    \a src_offset + \a size or \a dst_offset + \a size require accessing
     *    elements outside the buffer memory objects.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not
     *    valid events.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueCopyBuffer(
        const Buffer& src,
        const Buffer& dst,
        ::size_t src_offset,
        ::size_t dst_offset,
        ::size_t size,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueCopyBuffer(
                object_, src(), dst(), src_offset, dst_offset, size,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQEUE_COPY_BUFFER_ERR);
    }

#if defined(CL_VERSION_1_1)
    /*! \brief Enqueue a command to read a 2D or 3D rectangular region from
	 *  a buffer object to host memory.
     *
     *  \param buffer refers to a valid buffer object.
     *
     *  \param blocking indicates if  the  write  operation  is  blocking  or
     *  non-blocking. If \a blocking is CL_TRUE,  the  OpenCL  implementation
     *  copies the data referred to by \a ptr and enqueues the write  operation
     *  in the command-queue. The memory pointed to by \a ptr can  be  reused
     *  by  the application after the enqueueReadBufferRect call returns. If
     *  \a blocking is CL_FALSE, the OpenCL implementation will use \a ptr to
     *  perform a nonblocking write. As the write is non-blocking the
     *  implementation can return immediately. The memory pointed to by \a ptr
     *  cannot be reused by the application after the call returns.
     *  The \a event  argument  returns  an event object which can be used to
     *  query the execution status of  the  write command. When the write
     *  command has completed, the  memory  pointed  to  by \a ptr can then be
     *  reused by the application
     *
	 *  \param buffer_origin defines the (x, y, z) offset in the memory region 
	 *  associated with buffer. For a 2D rectangle region, the z value given 
	 *  by buffer_origin[2] should be 0. The offset in bytes is computed as 
	 *  buffer_origin[2] * buffer_slice_pitch + buffer_origin[1] * buffer_row_pitch
	 *  + buffer_origin[0].
	 *
	 * \param host_origin defines the (x, y, z) offset in the memory region 
	 *  pointed to by ptr. For a 2D rectangle region, the z value given by 
	 *  host_origin[2] should be 0. The offset in bytes is computed as
	 *  host_origin[2] * host_slice_pitch + host_origin[1] * host_row_pitch + 
	 *  host_origin[0].
	 *
	 * \param region defines the (width, height, depth) in bytes of the 2D or 
	 * 3D rectangle being read or written. For a 2D rectangle copy, the depth 
	 * value given by region[2] should be 1. 
	 * 
	 * \param buffer_row_pitch is the length of each row in bytes to be used 
	 * for the memory region associated with buffer. If buffer_row_pitch is 0, 
	 * buffer_row_pitch is computed as region[0].
	 *
	 * \param buffer_slice_pitch is the length of each 2D slice in bytes to be 
	 * used for the memory region associated with buffer. If buffer_slice_pitch 
	 * is 0, buffer_slice_pitch is computed as region[1] * buffer_row_pitch.
     *
	 * \param host_row_pitch is the length of each row in bytes to be used for 
	 * the memory region pointed to by ptr. If host_row_pitch is 0, host_row_pitch 
	 * is computed as region[0].
	 *
	 * \param host_slice_pitch is the length of each 2D slice in bytes to be 
	 * used for the memory region pointed to by ptr. If host_slice_pitch is 0, 
	 * host_slice_pitch is computed as region[1] * host_row_pitch.
	 *
     *  \param events specifies events that need to complete  before  this
     *  particular command can be executed. If \a events is NULL, its default,
     *  then this particular command does not wait on any event to  complete.
     *  The events specified in \a event_wait_list act as synchronization
     *  points.
     *
     *  \param event returns an event object that identifies this particular
     *  write command and can be used to query or queue a wait for this
     *  particular command to complete. \a event can be NULL in which case it
     *  will not be possible for the application to query the status of this
     *  command or queue a wait for this command to complete.
     *
     *  \return CL_SUCCESS if the function is executed successfully. Otherwise
     *  it returns one of the following errors:
     *  - CL_INVALID_CONTEXT if the context associated with command_queue and 
	 *  buffer are not the same or if the context associated with command_queue 
	 *  and events in event_wait_list are not the same.
	 * 
	 *  - CL_INVALID_MEM_OBJECT if buffer is not a valid buffer object.
	 *
	 *  - CL_INVALID_VALUE if the region being read or written specified by 
	 *  (buffer_offset,region) is out of bounds.
	 *
	 * - CL_INVALID_VALUE if ptr is a NULL value.
	 * 
	 * - CL_MISALIGNED_SUB_BUFFER_OFFSET if buffer is a sub-buffer object and 
	 *   offset specified when the sub-buffer object is created is not aligned to
	 *   CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue.
	 *
	 * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory 
	 * for data store associated with buffer.
	 * 
	 * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources 
	 *   required by the OpenCL implementation on the host.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueReadBufferRect(
        const Buffer& buffer,
        cl_bool blocking,
		const size_t<3>& buffer_offset,
		const size_t<3>& host_offset,
		const size_t<3>& region,
		::size_t buffer_row_pitch,
		::size_t buffer_slice_pitch,
		::size_t host_row_pitch,
		::size_t host_slice_pitch,
		void *ptr,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueReadBufferRect(
                object_, 
				buffer(), 
				blocking, 
				(const ::size_t *)buffer_offset,
				(const ::size_t *)host_offset,
				(const ::size_t *)region,
				buffer_row_pitch,
				buffer_slice_pitch,
				host_row_pitch,
				host_slice_pitch,
                ptr,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
                __ENQUEUE_READ_BUFFER_RECT_ERR);
    }

   /*! \brief Enqueue a command to write a 2D or 3D rectangular region from
	 *  host memory to a buffer object.
     *
     *  \param buffer refers to a valid buffer object.
     *
     *  \param blocking indicates if  the  write  operation  is  blocking  or
     *  non-blocking. If \a blocking is CL_TRUE,  the  OpenCL  implementation
     *  copies the data referred to by \a ptr and enqueues the write  operation
     *  in the command-queue. The memory pointed to by \a ptr can  be  reused
     *  by  the application after the enqueueWriteBufferRect call returns. If
     *  \a blocking is CL_FALSE, the OpenCL implementation will use \a ptr to
     *  perform a nonblocking write. As the write is non-blocking the
     *  implementation can return immediately. The memory pointed to by \a ptr
     *  cannot be reused by the application after the call returns.
     *  The \a event  argument  returns  an event object which can be used to
     *  query the execution status of  the  write command. When the write
     *  command has completed, the  memory  pointed  to  by \a ptr can then be
     *  reused by the application
     *
	 *  \param buffer_origin defines the (x, y, z) offset in the memory region 
	 *  associated with buffer. For a 2D rectangle region, the z value given 
	 *  by buffer_origin[2] should be 0. The offset in bytes is computed as 
	 *  buffer_origin[2] * buffer_slice_pitch + buffer_origin[1] * buffer_row_pitch
	 *  + buffer_origin[0].
	 *
	 * \param host_origin defines the (x, y, z) offset in the memory region 
	 *  pointed to by ptr. For a 2D rectangle region, the z value given by 
	 *  host_origin[2] should be 0. The offset in bytes is computed as
	 *  host_origin[2] * host_slice_pitch + host_origin[1] * host_row_pitch + 
	 *  host_origin[0].
	 *
	 * \param region defines the (width, height, depth) in bytes of the 2D or 
	 * 3D rectangle being read or written. For a 2D rectangle copy, the depth 
	 * value given by region[2] should be 1. 
	 * 
	 * \param buffer_row_pitch is the length of each row in bytes to be used 
	 * for the memory region associated with buffer. If buffer_row_pitch is 0, 
	 * buffer_row_pitch is computed as region[0].
	 *
	 * \param buffer_slice_pitch is the length of each 2D slice in bytes to be 
	 * used for the memory region associated with buffer. If buffer_slice_pitch 
	 * is 0, buffer_slice_pitch is computed as region[1] * buffer_row_pitch.
     *
	 * \param host_row_pitch is the length of each row in bytes to be used for 
	 * the memory region pointed to by ptr. If host_row_pitch is 0, host_row_pitch 
	 * is computed as region[0].
	 *
	 * \param host_slice_pitch is the length of each 2D slice in bytes to be 
	 * used for the memory region pointed to by ptr. If host_slice_pitch is 0, 
	 * host_slice_pitch is computed as region[1] * host_row_pitch.
	 *
     *  \param events specifies events that need to complete  before  this
     *  particular command can be executed. If \a events is NULL, its default,
     *  then this particular command does not wait on any event to  complete.
     *  The events specified in \a event_wait_list act as synchronization
     *  points.
     *
     *  \param event returns an event object that identifies this particular
     *  write command and can be used to query or queue a wait for this
     *  particular command to complete. \a event can be NULL in which case it
     *  will not be possible for the application to query the status of this
     *  command or queue a wait for this command to complete.
     *
     *  \return CL_SUCCESS if the function is executed successfully. Otherwise
     *  it returns one of the following errors:
     *  - CL_INVALID_CONTEXT if the context associated with command_queue and 
	 *  buffer are not the same or if the context associated with command_queue 
	 *  and events in event_wait_list are not the same.
	 * 
	 *  - CL_INVALID_MEM_OBJECT if buffer is not a valid buffer object.
	 *
	 *  - CL_INVALID_VALUE if the region being read or written specified by 
	 *  (buffer_offset,region) is out of bounds.
	 *
	 * - CL_INVALID_VALUE if ptr is a NULL value.
	 * 
	 * - CL_MISALIGNED_SUB_BUFFER_OFFSET if buffer is a sub-buffer object and 
	 *   offset specified when the sub-buffer object is created is not aligned to
	 *   CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue.
	 *
	 * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory 
	 * for data store associated with buffer.
	 * 
	 * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources 
	 *   required by the OpenCL implementation on the host.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueWriteBufferRect(
        const Buffer& buffer,
        cl_bool blocking,
		const size_t<3>& buffer_offset,
		const size_t<3>& host_offset,
		const size_t<3>& region,
		::size_t buffer_row_pitch,
		::size_t buffer_slice_pitch,
		::size_t host_row_pitch,
		::size_t host_slice_pitch,
		const void *ptr,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueWriteBufferRect(
                object_, 
				buffer(), 
				blocking, 
				(const ::size_t *)buffer_offset,
				(const ::size_t *)host_offset,
				(const ::size_t *)region,
				buffer_row_pitch,
				buffer_slice_pitch,
				host_row_pitch,
				host_slice_pitch,
                ptr,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
                __ENQUEUE_WRITE_BUFFER_RECT_ERR);
    }

   /*! \brief Enqueues a command to copy a 2D or 3D rectangular region from
     *        a buffer object to a 2D or 3D region of another.
     *
     *  \param src is the source buffer object.
     *
     *  \param dst is the destination buffer object.
     *
     *  \param src_origin defines the (x, y, z) offset in the memory region 
	 *  associated with src_buffer. For a 2D rectangle region, the z value 
	 *  given by src_origin[2] should be 0. The offset in bytes is computed as 
	 *  src_origin[2] * src_slice_pitch + src_origin[1] * 
	 *  src_row_pitch + src_origin[0].
     *
     *  \param dst_origin dst_origin defines the (x, y, z) offset in the memory 
	 *  region associated with dst_buffer. For a 2D rectangle region, the z 
	 *  value given by dst_origin[2] should be 0. The offset in bytes is 
	 *  computed as dst_origin[2] * dst_slice_pitch + dst_origin[1] * 
	 *  dst_row_pitch + dst_origin[0].
     *
	 *  \param region defines the (width, height, depth) in bytes of the 2D or 
	 *  3D rectangle being copied. For a 2D rectangle, the depth value 
	 *  given by region[2] should be 1.
     *
	 * \param src_row_pitch is the length of each row in bytes to be used for 
	 *  the memory region associated with src_buffer. If src_row_pitch is 0, 
	 * src_row_pitch is computed as region[0].
     *
	 * \param src_slice_pitch is the length of each 2D slice in bytes to be used 
	 * for the memory region associated with src_buffer. If src_slice_pitch is 0, 
	 * src_slice_pitch is computed as region[1] * src_row_pitch.
	 *
	 * \param dst_row_pitch is the length of each row in bytes to be used for the memory 
	 * region associated with dst_buffer. If dst_row_pitch is 0, dst_row_pitch 
	 * is computed as region[0].
     *
     * \param dst_slice_pitch is the length of each 2D slice in bytes to be used 
	 * for the memory region associated with dst_buffer. If dst_slice_pitch is 0, 
	 * dst_slice_pitch is computed as region[1] * dst_row_pitch.
     *
     *  \param events specifies events that need to complete before this
     *  particular command can be executed. If \a events is NULL,
     *  then this particular command does not wait on  any event to complete.
     *  The events specified in \a event_wait_list act as synchronization
     *  points.
     *
     *  \param event returns an event object that identifies this particular
     *  copy command and can be used to query or queue a wait for this
     *  particular command to complete. \a event can be NULL in which case it
     *  will not be possible for the application to query the status of this
     *  command or queue and wait for this command to complete. enqueueBarrier
     *  can be used instead.
     *
     *  \return CL_SUCCESS if the function is executed successfully. Otherwise
     *  it returns one of the following errors:
     *  - CL_INVALID_CONTEXT if the context associated with command_queue, 
	 *    src_buffer and dst_buffer are not the same or if the context 
	 *    associated with command_queue and events in \a events are not the same.
	 *
	 *  - CL_INVALID_MEM_OBJECT if src_buffer and dst_buffer are not valid 
	 *    buffer objects.
     *
	 *  - CL_INVALID_VALUE if (src_offset, region) or (dst_offset, region) 
	 *    require accessing elements outside the src_buffer and dst_buffer 
	 *    buffer objects respectively.
	 * 
	 *  - CL_MEM_COPY_OVERLAP if src_buffer and dst_buffer are the same buffer 
	 *    object and the source and destination regions overlap.
	 *
	 *  - CL_MISALIGNED_SUB_BUFFER_OFFSET if src_buffer is a sub-buffer object and 
	 *    offset specified when the sub-buffer object is created is not aligned to
	 *
	 *  - CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue.
	 *
	 *  - CL_MISALIGNED_SUB_BUFFER_OFFSET if dst_buffer is a sub-buffer object 
	 *    and offset specified when the sub-buffer object is created is not 
	 *    aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated 
	 *    with queue.
	 *
	 *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate 
	 *    memory for data store associated with src_buffer or dst_buffer.
	 *
	 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources 
	 *    required by the OpenCL implementation on the host.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueCopyBufferRect(
        const Buffer& src,
        const Buffer& dst,
		const size_t<3>& src_origin,
		const size_t<3>& dst_origin,
		const size_t<3>& region,
		::size_t src_row_pitch,
		::size_t src_slice_pitch,
		::size_t dst_row_pitch,
		::size_t dst_slice_pitch,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueCopyBufferRect(
                object_, 
				src(), 
				dst(), 
				(const ::size_t *)src_origin, 
				(const ::size_t *)dst_origin, 
				(const ::size_t *)region,
				src_row_pitch,
				src_slice_pitch,
				dst_row_pitch,
				dst_slice_pitch,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQEUE_COPY_BUFFER_RECT_ERR);
    }
#endif

    /*! \brief Enqueue a command to read from a 2D or 3D image object to host
     *         memory
     *
     *  \param image refers to a valid 2D or 3D image object.
     *
     *  \param blocking indicates if the read is blocking or nonblocking. If
     *  \a blocking is CL_TRUE i.e. the read command is blocking,
     *  enqueueReadImage does not return until the buffer data has been read and
     *  copied into memory pointed to by \a ptr. If \a blocking is CL_FALSE
     *  i.e. the read command is non-blocking, enqueueReadImage queues a
     *  non-blocking read command and returns. The contents of the buffer that
     *  \a ptr points to cannot be used until the read command has completed.
     *  The \a event argument returns an event object which can be used to query
     *  the execution status of the read command. When the read command has
     *  completed, the contents of the buffer that ptr points to can be used by
     *  the application
     *
     *  \param origin defines the (x, y, z) offset in the image from where to
     *  read or write. If image is a 2D image object, the z value given by
     *  origin[2] must be 0.
     *
     *  \param region defines the (width, height, depth) of the 2D or 3D
     *  rectangle being read or written. If image is a 2D image object, the
     *  depth value given by region[2] must be 1.
     *
     *  \param row_pitch in enqueueReadImage is the length of each row in bytes.
     *  This value must be greater than or equal to the element size in bytes
     *  width. If \a row_pitch is set to 0, the appropriate row pitch is
     *  calculated based on the size of each element in bytes multiplied by
     *  width.
     *
     *  \param slice_pitch in enqueueReadImage is the size in bytes of the 2D
     *  slice of the 3D region of a 3D image being read or written respectively.
     *  This must be 0 if image is a 2D image. This value must be greater than
     *  or equal to row_pitch * height. If \a slice_pitch is set to 0, the
     *  appropriate slice pitch is calculated based on the \a row_pitch *
     *  \a height.
     *
     *  \param ptr is the pointer to a buffer in host memory where image data is
     *  to be read from.
     *
     *  \param events specifies events that need to complete before
     *  this particular command can be executed. If \a events is NULL, it
     *  default then this particular command does not wait on any event to
     *  complete.The events specified in \a events act as synchronization
     *  points.
     *
     *  \param event returns an event object that identifies this particular
     *  read command and can be used to query or queue a wait for this
     *  particular command to complete. \a event can be NULL in which case it
     *  will not be possible for the application to query the status of this
     *  command or queue a wait for this command to complete.
     *
     *  \return CL_SUCCESS if the function is executed successfully. Otherwise
     *  it returns one of the following errors:
     *  - CL_INVALID_CONTEXT if the context associated with \a command_queue and
     *    \a image are not the same.
     *  - CL_INVALID_MEM_OBJECT if \a image is not a valid image object.
     *  - CL_INVALID_VALUE if the region being read specified by \a origin and
     *    \a region is out of bounds or if \a ptr is a NULL value.
     *  - CL_INVALID_VALUE if \a image is a 2D image object and \a origin[2]
     *    is not equal to 0 or \a region[2] is not equal to 1 or \a slice_pitch
     *    is not equal to 0.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not valid
     *    events.
     *  - CL_INVALID_VALUE if blocking is CL_FALSE and \a event is NULL.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueReadImage(
        const Image& image,
        cl_bool blocking,
        const size_t<3>& origin,
        const size_t<3>& region,
        ::size_t row_pitch,
        ::size_t slice_pitch,
        void* ptr,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueReadImage(
                object_, image(), blocking, (const ::size_t *) origin,
                (const ::size_t *) region, row_pitch, slice_pitch, ptr,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQUEUE_READ_IMAGE_ERR);
    }

    /*! \brief Enqueue a command to write to a 2D or 3D image object from host
     *         memory
     *
     *  \param image refers to a valid 2D or 3D image object.
     *
     *  \param blocking indicates if the write operation is blocking or
     *  nonblocking. If blocking is CL_TRUE, the OpenCL implementation copies
     *  the data referred to by \a ptr and enqueues the write command in the
     *  command-queue. The memory pointed to by ptr can be reused by the
     *  application after the enqueueWriteImage call returns. If blocking is
     *  CL_FALSE, the OpenCL implementation will use ptr to perform a
     *  nonblocking write. As the write is non-blocking the implementation can
     *  return immediately. The memory pointed to by ptr cannot be reused by the
     *  application after the call returns. The event argument returns an event
     *  object which can be used to query the execution status of the write
     *  command. When the write command has completed, the memory pointed to by
     *  ptr can then be reused by the application.
     *
     *  \param origin defines the (x, y, z) offset in the image from where to
     *  read or write. If image is a 2D image object, the z value given by
     *  origin[2] must be 0.
     *
     *  \param region defines the (width, height, depth) of the 2D or 3D
     *  rectangle being read or written. If image is a 2D image object, the
     *  depth value given by region[2] must be 1.
     *
     *  \param input_row_pitch in is the length of each row in bytes.
     *  This value must be greater than or equal to the element size in bytes
     *  width. If \a input_row_pitch is set to 0, the appropriate row pitch is
     *  calculated based on the size of each element in bytes multiplied by
     *  width.
     *
     *  \param input_slice_pitch is the size
     *  in bytes of the 2D slice of the 3D region of a 3D image being read or
     *  written respectively. This must be 0 if image is a 2D image. This value
     *  must be greater than or equal to input_row_pitch * height. If
     *  \a input_slice_pitch is  set to 0, the appropriate slice pitch is
     *  calculated based on the  \a input_row_pitch * \a height.
     *
     *  \param ptr is the pointer to a buffer in host memory where image data is
     *  to be written to.
     *
     *  \param events specifies events that need to complete before
     *  this particular command can be executed. If \a events is NULL, it
     *  default then this particular command does not wait on any event to
     *  complete.The events specified in \a events act as synchronization
     *  points.
     *
     *  \param event returns an event object that identifies this particular
     *  read command and can be used to query or queue a wait for this
     *  particular command to complete. \a event can be NULL in which case it
     *  will not be possible for the application to query the status of this
     *  command or queue a wait for this command to complete.
     *
     *  \return CL_SUCCESS if the function is executed successfully. Otherwise
     *  it returns one of the following errors:
     *  - CL_INVALID_CONTEXT if the context associated with \a command_queue and
     *    \a image are not the same.
     *  - CL_INVALID_MEM_OBJECT if \a image is not a valid image object.
     *  - CL_INVALID_VALUE if the region being written specified by \a origin
     *    and \a region is out of bounds or if \a ptr is a NULL value.
     *  - CL_INVALID_VALUE if \a image is a 2D image object and \a origin[2]
     *    is not equal to 0 or \a region[2] is not equal to 1 or \a slice_pitch
     *    is not equal to 0.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not valid
     *    events.
     *  - CL_INVALID_VALUE if blocking is CL_FALSE and \a event is NULL.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueWriteImage(
        const Image& image,
        cl_bool blocking,
        const size_t<3>& origin,
        const size_t<3>& region,
        ::size_t row_pitch,
        ::size_t slice_pitch,
        void* ptr,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueWriteImage(
                object_, image(), blocking, (const ::size_t *) origin,
                (const ::size_t *) region, row_pitch, slice_pitch, ptr,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQUEUE_WRITE_IMAGE_ERR);
    }

    /*! \brief Enqueue a command to copy image objects.
     *
     *  \param src is the source image object.
     *
     *  \param dst is the destination image object.
     *
     *  \param src_origin defines the starting (x, y, z) location in
     *  \a src from where to start the data copy.  If \a src is a
     *  2D image object, the z value given by \a src_origin[2] must be 0.
     *
     *  \param dst_origin defines the starting (x, y, z) location in \a
     *  dst from where to start the data copy. If \a dst is a
     *  2D image object, the z value given by \a dst_origin[2] must be 0.
     *
     *  \param region defines the (width, height, depth) of the 2D or 3D
     *  rectangle to copy. If \a src or \a dst is a 2D image object,
     *  the depth value given by \a region[2] must be 1.
     *
     *  \param events specifies events that need to complete before
     *  this particular command can be executed. If \a events is NULL, it
     *  default then this particular command does not wait on any event to
     *  complete.The events specified in \a events act as synchronization
     *  points.
     *
     *  \param event returns an event object that identifies this particular
     *  copy command and can be used to query or queue a wait for this
     *  particular command to complete. \a event can be NULL, its default, in
     *  which case it will not be possible for the application to query the
     *  status of this command or queue a wait for this command to complete.
     *  enqueueBarrier can be used instead. It is currently a requirement that
     *  the \a src_image and \a dst_image image memory objects for
     *  enqueueCopyImage must have the exact image format (i.e. channel order
     *  and channel data type must match).
     *
     *  \return CL_SUCCESS if the function is executed successfully. Otherwise
     *  it returns one of the following errors:
     *  - CL_INVALID_CONTEXT if the context associated with \a command-queue,
     *    \a src and \a dst are not the same.
     *  - CL_INVALID_MEM_OBJECT if \a src and \a dst are not valid
     *    image objects.
     *  - CL_IMAGE_FORMAT_MISMATCH if src and dst do not use the
     *    same image format.
     *  - CL_INVALID_VALUE if the 2D or 3D rectangular region specified by
     *    \a src_origin and \a src_origin + \a region refers to a region outside
     *    \a src, or if the 2D or 3D rectangular region specified by
     *    \a dst_origin and \a dst_origin + \a region refers to a region outside
     *    \a dst.
     *  - CL_INVALID_VALUE if \a src is a 2D image object and \a origin[2]
     *    is not equal to 0 or \a region[2] is not equal to 1.
     *  - CL_INVALID_VALUE if \a dst is a 2D image object and \a
     *    dst_origin[2] is not equal to 0 or \a region[2] is not equal to 1.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not valid
     *    events.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueCopyImage(
        const Image& src,
        const Image& dst,
        const size_t<3>& src_origin,
        const size_t<3>& dst_origin,
        const size_t<3>& region,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueCopyImage(
                object_, src(), dst(), (const ::size_t *) src_origin,
                (const ::size_t *)dst_origin, (const ::size_t *) region,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQUEUE_COPY_IMAGE_ERR);
    }

    /*! \brief Enqueue a command to copy an image object to a buffer object.
     *
     *  \param src is a valid image object.
     *
     *  \param dst is a valid buffer object.
     *
     *  \param src_origin defines the (x, y, z) offset in the image from where
     *  to copy. If \a src is a 2D image object, the z value given by
     *  \a src_origin[2] must be 0.
     *
     *  \param region defines the (width, height, depth) of the 2D or 3D
     *  rectangle to copy. If \a src_image is a 2D image object, the depth value
     *  given by \a region[2] must be 1.
     *
     *  \param dst refers to the offset where to begin copying data in
     *  \a dst. The size in bytes of the region to be copied referred to
     *  as \a dst_cb is computed as width * height * depth * bytes/image element
     *  if \a src is a 3D image object and is computed as
     *  width * height * bytes/image element if \a src is a 2D image
     *  object.
     *
     *  \param events specifies events that need to complete before
     *  this particular command can be executed. If \a events is NULL, it
     *  default then this particular command does not wait on any event to
     *  complete.The events specified in \a events act as synchronization
     *  points.
     *
     *  \param event returns an event object that identifies this particular
     *  copy command and can be used to query or queue a wait for this
     *  particular command to complete. \a event can be NULL, its default value,
     *  in which case it will not be possible for the application to query the
     *  status of this command or queue a wait for this command to complete.
     *  enqueueBarrier can be used instead.
     *
     *  \return CL_SUCCESS if the function is executed successfully. Otherwise
     *  it returns one of the following errors:
     *  - CL_INVALID_CONTEXT if the context associated with \a command-queue,
     *    \a src and \a dst_buffer are not the same.
     *  - CL_INVALID_MEM_OBJECT if \a src_image is not a valid image object or
     *    \a dst is not a valid buffer object.
     *  - CL_INVALID_VALUE if the 2D or 3D rectangular region specified by
     *    \a src_origin and \a src_origin + \a region refers to a region outside
     *    \a src, or if the region specified by \a dst_offset and
     *    \a dst_offset + \a dst_cb to a region outside \a dst.
     *  - CL_INVALID_VALUE if \a src is a 2D image object and \a
     *    src_origin[2] is not equal to 0 or \a region[2] is not equal to 1.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not
     *    valid events.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueCopyImageToBuffer(
        const Image& src,
        const Buffer& dst,
        const size_t<3>& src_origin,
        const size_t<3>& region,
        ::size_t dst_offset,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueCopyImageToBuffer(
                object_, src(), dst(), (const ::size_t *) src_origin,
                (const ::size_t *) region, dst_offset,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR);
    }

    /*! \brief Enqueue a command to copy a buffer object to an image object.
     *
     *  \param src is a valid buffer object.
     *
     *  \param dst is a valid image object.
     *
     *  \param src_offset refers to the offset where to begin copying data in
     *  \a src.
     *
     *  \param dst_origin defines the (x, y, z) offset in the image from where
     *  to copy. If \a dst is a 2D image object, the z value given by
     *  \a dst_origin[2] must be 0.
     *
     *  \param region defines the (width, height, depth) of the 2D or 3D
     *  rectangle to copy. If dst is a 2D image object, the depth value
     *  given by \a region[2] must be 1. The size in bytes of the region to be
     *  copied from \a src referred to as \a src_cb is computed as
     *  width * height * depth * bytes/image element if \a dst is a 3D image
     *  object and is computed as width * height * bytes/image element if
     *  \a dst is a 2D image object.
     *
     *  \param events specifies events that need to complete before this
     *  particular command can be executed. If \a events is NULL, then
     *  this particular command does not wait on any event to complete.
     *  The events specified in \a events act as synchronization points.
     *
     *  \param event returns an event object that identifies this particular
     *  copy command and can be used to query or queue a wait for this
     *  particular command to complete. \a event can be NULL, its default value,
     *  in which case it will not be possible for the application to query the
     *  status of this command or queue a wait for this command to complete.
     *  enqueueBarrier can be used instead.
     *
     *  \return CL_SUCCESS if the function is executed successfully. Otherwise
     *  it returns one of the following errors:
     *  - CL_INVALID_CONTEXT if the context associated with \a command_queue,
     *    \a src and \a dst are not the same.
     *  - CL_INVALID_MEM_OBJECT if \a src_buffer is not a valid buffer object or
     *    \a dst is not a valid image object.
     *  - CL_INVALID_VALUE if the 2D or 3D rectangular region specified by
     *    \a dst_origin and \a dst_origin + \a region refers to a region outside
     *    \a dst, or if the region specified by \a src_offset and
     *    \a src_offset + \a src_cb to a region outside \a src.
     *  - CL_INVALID_VALUE if event objects in \a events are not valid events.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueCopyBufferToImage(
        const Buffer& src,
        const Image& dst,
        ::size_t src_offset,
        const size_t<3>& dst_origin,
        const size_t<3>& region,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueCopyBufferToImage(
                object_, src(), dst(), src_offset,
                (const ::size_t *) dst_origin, (const ::size_t *) region,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR);
    }

    /*! \brief Enqueue a command to map a region of a buffer object into the
     *         host address.
     *
     *  \param blocking indicates if the map operation is blocking or
     *  non-blocking. If \a blocking is CL_TRUE, enqueueMapBuffer does not
     *  return until the specified region in \a buffer can be mapped. If
     *  \a blocking is CL_FALSE i.e. map operation is non-blocking, the pointer
     *  to the mapped region returned by enqueueMapBuffer cannot be used until
     *  the map command has completed. The event argument returns an event
     *  object which can be used to query the execution status of the map
     *  command. When the map command is completed, the application can access
     *  the contents of the mapped region using the pointer returned by
     *  enqueueMapBuffer.
     *
     *  \param map_flags is a bit-field and can be set to CL_MAP_READ to
     *  indicate that the region specified by (\a offset, \a size) in the buffer
     *  object is being mapped for reading, and/or CL_MAP_WRITE to indicate that
     *  the region specified by (\a offset, \a size) in the buffer object is
     *  being mapped for writing.
     *
     *  \param buffer is a valid buffer object. The OpenCL context associated
     *  with \a command-queue and \a buffer must be the same.
     *
     *  \param offset is the offset in bytes of the region in the buffer object
     *  that is being mapped
     *
     *  \param size is the size in bytes of the region in the buffer object that
     *  is being mapped.
     *
     *  \param events specifies events that need to complete before this
     *  particular command can be executed. If \a events is NULL, its default
     *  value, then this particular command does not wait on any event to
     *  complete. The events specified in \a event_wait_list act as
     *  synchronization points.
     *
     *  \param event returns an event object that identifies this particular
     *  command and can be used to query or queue a wait for this particular
     *  command to complete. \a event can be NULL in which case it will not be
     *  possible for the application to query the status of this command or
     *  queue a wait for this command to complete. enqueueBarrier can be used
     *  instead.
     *
     *  \param err will return an appropriate error code. If \a err
     *  is NULL, its default value, no error code is returned.
     *
     *  \return A pointer to the mapped region if  buffer  is  a memory object
     *  created  with  clCreateBuffer  and the region specified by (offset , cb)
     *  is a valid region in the buffer  object  and is successfully mapped into
     *  the host address space .  The  \a errcode_ret  is set to CL_SUCCESS.
     *  A NULL pointer is returned otherwise with one of the following error
     *  values returned in \a errcode_ret:
     *  - CL_INVALID_CONTEXT if context associated with \a command-queue and
     *    \a buffer are not the same.
     *  - CL_INVALID_MEM_OBJECT if \a buffer is not a valid buffer object.
     *  - CL_INVALID_VALUE if region being mapped given by (\a offset, \a size)
     *    is out of bounds or if values specified in \a map_flags are not valid.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not
     *    valid events.
     *  - CL_MEM_O BJECT_MAP_FAILURE  if there is a failure to map  the
     *    specified region  in the host address space.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     *  The pointer returned maps a region starting at \a offset and is at least
     *  \a size bytes in size. The result of a memory access outside this region
     *  is undefined.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    void* enqueueMapBuffer(
        const Buffer& buffer,
        cl_bool blocking,
        cl_map_flags flags,
        ::size_t offset,
        ::size_t size,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL,
        cl_int* err = NULL) const
    {
        cl_int error;
        void * result = ::clEnqueueMapBuffer(
	    object_, buffer(), blocking, flags, offset, size,
	    (events != NULL) ? (cl_uint) events->size() : 0,
	    (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
	    (cl_event*) event,
	    &error);

        detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
        if (err != NULL) {
            *err = error;
        }
        return result;
    }

    /*! \brief Enqueue a command to map a region in an image object given into
     *  the host address.
     *
     *  \param image is a valid image object. The OpenCL context associated with
     *  the command-queue and \a image must be the same.
     *
     *  \param blocking indicates if the map operation is blocking or
     *  non-blocking. If \a blocking is CL_TRUE, enqueueMapImage does not
     *  return until the specified region in image is mapped. If \a blocking is
     *  CL_FALSE i.e. map operation is non-blocking, the pointer to the mapped
     *  region returned by enqueueMapImage cannot be used until the map command
     *  has completed. The event argument returns an event object which can be
     *  used to query the execution status of the map command. When the map
     *  command is completed, the application can access the contents of the
     *  mapped region using the pointer returned by enqueueMapImage.
     *
     *  \param flags is a bit-field and can be set to CL_MAP_READ to indicate
     *  that the region specified by (\a origin, \a region) in the image object
     *  is being mapped for reading, and/or CL_MAP_WRITE to indicate that the
     *  region specified by (\a origin, \a region) in the image object is being
     *  mapped for writing.
     *
     *  \param origin define the (x, y, z) offset of the 2D or 3D rectangle
     *  region that is to be mapped. If image is a 2D image object, the z value
     *  given by \a origin[2] must be 0.
     *
     *  \param region define the (width, height, depth) of the 2D or 3D
     *  rectangle region that is to be mapped. If image is a 2D image object,
     *  the depth value given by \a region[2] must be 1.
     *
     *  \param row_pitch returns the scan-line pitch in bytes for the mapped
     *  region. This must be a non- NULL value.
     *
     *  \param slice_pitch returns the size in bytes of each 2D slice for the
     *  mapped region. For a 2D image this argument is ignored. For a 3D image
     *  this must be a non-NULL value.
     *
     *  \param events specifies events that need to complete before
     *  enqueueMapImage can be executed. If \a events is NULL, then
     *  enqueueMapImage does not wait on any event to complete. The events
     *  specified in \a events act as synchronization points.
     *
     *  \param event returns an event object that identifies this particular
     *  command and can be used to query or queue a wait for this particular
     *  command to complete. \a event can be NULL, its default value, in which
     *  case it will not be possible for the application to query the status of
     *  this command or queue a wait for this command to complete.
     *  enqueueBarrier can be used instead.
     *
     *  \param err will return an appropriate error code. If \a err
     *  is NULL, its default value, o error code is returned.
     *
     *  \return A pointer to the mapped region if  image  is  a memory object
     *  created  with  clCreateImage {2D|3D},  and the 2D or 3D rectangle
     *  specified by  origin  and  region is a valid region in the image object
     *  and can be mapped into the host address space.
     *  The \a err is set to CL_SUCCESS. A NULL pointer is returned
     *  otherwise with one of the following error values returned in \a err:
     *  - CL_INVALID_CONTEXT if context associated with \a command_queue and
     *    \a image are not the same.
     *  - CL_INVALID_MEM_OBJECT if \a image is not a valid image object.
     *  - CL_INVALID_VALUE if region being mapped given by
     *    (\a origin, \a origin + \a region) is out of bounds or if values
     *    specified in \a map_flags are not valid.
     *  - CL_INVALID_VALUE if \a image is a 2D image object and \a origin[2]
     *    is not equal to 0 or \a region[2] is not equal to 1.
     *  - CL_INVALID_VALUE if \a row_pitch is NULL.
     *  - CL_INVALID_VALUE if \a image is a 3D image object and \a slice_pitch
     *    is NULL.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not
     *    valid events.
     *  - CL_MEM_OBJECT_MAP_FAILURE  if there is a failure to map the  specified
     *    region in the host address space.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * The pointer returned maps a 2D or 3D region starting at origin and is
     * at least (\a row_pitch * \a region[1] + \a region[0]) pixels in size
     * for a 2D image, and is at least (\a slice_pitch * \a region[2] +
     * \a row_pitch * \a region[1] + \a region[0]) pixels in size for a 3D
     * image. The result of a memory access outside this region is undefined.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    void* enqueueMapImage(
        const Image& buffer,
        cl_bool blocking,
        cl_map_flags flags,
        const size_t<3>& origin,
        const size_t<3>& region,
        ::size_t * row_pitch,
        ::size_t * slice_pitch,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL,
        cl_int* err = NULL) const
    {
        cl_int error;
        void * result = ::clEnqueueMapImage(
	    object_, buffer(), blocking, flags,
	    (const ::size_t *) origin, (const ::size_t *) region,
	    row_pitch, slice_pitch,
	    (events != NULL) ? (cl_uint) events->size() : 0,
	    (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
	    (cl_event*) event,
	    &error);

        detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR);
        if (err != NULL) {
              *err = error;
        }
        return result;
    }

    /*! \brief Enqueue a command to unmap a previously mapped region of a memory
     *  object.
     *
     *  Reads or writes from the host using the pointer returned by
     *  enqueueMapBuffer or enqueueMapImage are considered to be complete.
     *
     *  \param memobj is a valid memory object. The OpenCL context associated
     *  with the command-queue and \a memobj must be the same.
     *
     *  \param mapped_ptr is the host address returned by a previous call to
     *  enqueueMapBuffer or enqueueMapImage for \a memobj.
     *
     *
     *  \param events specifies events that need to complete before
     *  enqueueUnmapMemObject can be executed. If \a events is NULL,
     *  then enqueueUnmapMemObject does not wait on any event to complete. The
     *  events specified in \a event_wait_list act as synchronization points.
     *
     *  \param event returns an event object that identifies this particular
     *  command and can be used to query or queue a wait for this particular
     *  command to complete. \a event can be NULL, its default value, in which
     *  case it will not be possible for the application to query the status
     *  of this command or queue a wait for this command to complete.
     *  enqueueBarrier can be used instead.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is executed successfully.
     *  - CL_INVALID_MEM_OBJECT if \a memobj is not a valid memory object.
     *  - CL_INVALID_VALUE if \a mapped_ptr is not a valid pointer returned by
     *    enqueueMapBuffer or enqueueMapImage for \a memobj.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not
     *    valid events.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *  - CL_INVALID_CONTEXT if context associated with the command-queue and
     *    \a memobj are not the same.
     *
     * enqueueMapBuffer and enqueueMapImage increments the mapped count of the
     * memory object. Multiple calls to enqueueMapBuffer or enqueueMapImage on
     * the same memory object will increment this mapped count by appropriate
     * number of calls. enqueueUnmapMemObject decrements the mapped count of the
     * memory object. enqueueMapBuffer and enqueueMapImage act as
     * synchronization points for a region of the memory object being mapped.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueUnmapMemObject(
        const Memory& memory,
        void* mapped_ptr,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueUnmapMemObject(
                object_, memory(), mapped_ptr,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
    }

    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param kernel is a valid kernel object. The OpenCL context associated
     *  with \a kernel and \a command-queue must be the same.
     *
     *  \param offset must currently be  a  NullRange value. In  a future
     *  revision of OpenCL, \a global_work_offset can be used to specify an
     *  array of \a work_dim unsigned values that describe the offset used to
     *  calculate the global ID of a work-item instead of having the global IDs
     *  always start at offset (0, 0, 0).
     *
     *  \param global describes  the number of global work-items in will execute
     *  the  kernel  function. The  total  number  of  global
     *  work-items is computed as global_work_size[0] * ...
     *  * global_work_size[work_dim - 1].
     *
     *  \param local describes the number of work-items that  make  up  a
     *  work-group (also referred to as the size of the work-group) that
     *  will execute the  kernel specified by kernel.
     *
     *  \param events specifies events that need to complete  before  this
     *  particular command can be executed. If \a events  is  NULL, its
     *  default, or size zero then this particular command does not wait on
     *  any event to complete. The events specified in \a event_wait_list act as
     *  synchronization points.
     *
     *  \param event returns an event object that identifies this particular
     *  kernel execution instance. Event objects are unique and can be used to
     *  identify  a particular kernel execution instance later on.  If \a event
     *  is  NULL, its default value, no event will be created for this kernel
     *  execution instance and  therefore it will not be possible for the
     *  application to query or queue a wait for  this particular kernel
     *  execution instance.
     *
     *  The total number of work-items in a work-group is computed as
     *  local_work_size[0] * ... * local_work_size[work_dim - 1].
     *  The total number of work-items in the work-group must be less than or
     *  equal to the CL_DEVICE_MAX_WORK_GROUP_SIZE. The explicitly specified
     *  \a local_work_size will be used to determine how to break the global
     *  work-items specified by global_work_size into appropriate work-group
     *  instances. If \a local_work_size is specified, the values specified in
     *  \a global_work_size[0], ...,  global_work_size[work_dim - 1] must be
     *  evenly divisable by the corresponding values specified in
     *  \a local_work_size[0],..., local_work_size[work_dim - 1].
     *  \a local_work_size can also be  a  NULL value in which case the OpenCL
     *  implementation  will  determine  how  to  be break the global work-items
     *  into appropriate work-groups.
     *
     *  If \a local is NullRange and no work-group size is specified when  the
     *  kernel is compiled, the OpenCL implementation will determine how to
     *  break the global work-items specified by \a global into appropriate
     *  work-group instances. The work-group size to be used for kernel can also
     *  be specified in the program source using the
     *  __attribute__((reqd_work_group_size(X, Y, Z))) qualifier. In this case
     *  the size of work group specified by \a local_work_size must match the
     *  value specified by the \a reqd_work_group_size attribute qualifier.
     *
     *  These work-group instances are executed in parallel across multiple
     *  compute units or concurrently on the same compute unit. Each  work-item
     *  is  uniquely identified by a global identifier. The global ID, which
     *  can be read inside the kernel is computed using the value given by
     *  \a global_work_size and \a global_work_offset.
     *
     *  \return One of the following values:
     *
     *  - CL_SUCCESS if the kernel execution was successfully queued.
     *  - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built
     *    program executable available for device associated with command-queue.
     *  - CL_INVALID_COMMAND_QUEUE if command-queue is not a valid
     *    command-queue.
     *  - CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
     *  - CL_INVALID_KERNEL_ARGS if the kernel argument values have not been
     *    specified or are not valid for the device on which kernel will be
     *    executed.
     *  - CL_INVALID_WORK_DIMENSION if \a work_dim is not a valid value
     *    (i.e. a value between 1 and 3).
     *
     *  - CL_INVALID_WORK_GROUP_SIZE if \a local is specified and  number
     *    of workitems specified by \a global is not evenly divisable  by
     *    size of work-given by \a local_work_size or does not match the
     *    work-group size specified for kernel using the
     *    __attribute__((reqd_work_group_size(X, Y, Z))) qualifier in program
     *    source.
     *
     *  - CL_INVALID_GLOBAL_OFFSET if \a offset is not NullRange.
     *
     *  - CL_OUT_OF_RESOURCES if there is a failure to queue the execution
     *    instance of \a kernel on  the  command-queue because of insufficient
     *    resources needed to execute the kernel. For example, the explicitly
     *    specified \a local_work_dim in range causes a failure to execute the
     *    kernel because of insufficient resources such as  registers or local
     *    memory.  Another example would be the number of read-only image args
     *    used in kernel exceed the CL_DEVICE_MAX_READ_IMAGE_ARGS value for
     *    device or the number of write-only image args used in kernel exceed
     *    the CL_DEVICE_MAX_WRITE_IMAGE_ARGS value for device or the number of
     *    samplers used in kernel exceed CL_DEVICE_MAX_SAMPLERS for device.
     *
     *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate
     *    memory for image or buffer objects specified as arguments to kernel.
     *
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueNDRangeKernel(
        const Kernel& kernel,
        const NDRange& offset,
        const NDRange& global,
        const NDRange& local,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueNDRangeKernel(
                object_, kernel(), (cl_uint) global.dimensions(),
                offset.dimensions() != 0 ? (const ::size_t*) offset : NULL,
                (const ::size_t*) global,
                local.dimensions() != 0 ? (const ::size_t*) local : NULL,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQUEUE_NDRANGE_KERNEL_ERR);
    }

    /*! \brief Enqueue a command to execute a kernel on a device.
     *         The kernel is executed using a single work-item.
     *
     *  \param kernel is a valid kernel object. The OpenCL context associated
     *  with \a kernel and \a command-queue must be the same.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \param event returns an event object that identifies this particular
     *  kernel execution instance. Event objects are unique and can be used to
     *  identify a particular kernel execution instance later on.
     *  If \a event is NULL, its default value, no event will be created for
     *  this kernel execution instance and therefore it will not be possible for
     *  the application to query or queue a wait for this particular kernel
     *  execution instance.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the kernel execution was successfully queued.
     *  - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built
     *    program executable available for device associated with command-queue.
     *  - CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
     *  - CL_INVALID_KERNEL_ARGS if the kernel argument values have not been
     *    specified or are not valid for the device on which kernel will be
     *    executed.
     *  - CL_INVALID_WORK_GROUP_SIZE if a work-group size is specified for
     *    kernel using the __attribute__((reqd_work_group_size(X, Y, Z)))
     *    qualifier in program source and is not (1, 1, 1).
     *  - CL_OUT_OF_RESOURCES if there is a failure to queue the execution
     *    instance of kernel on the command-queue because of insufficient
     *    resources needed to execute the kernel. For example, the explicitly
     *    specified \a local_work_dim in range causes a failure to execute the
     *    kernel because of insufficient resources such as registers or local
     *    memory. Another example would be the number of read-only image args
     *    used in kernel exceed the CL_DEVICE_MAX_READ_IMAGE_ARGS value for
     *    device or the number of write-only image args used in kernel exceed
     *    the CL_DEVICE_MAX_WRITE_IMAGE_ARGS value for device or the number of
     *    samplers used in kernel exceed CL_DEVICE_MAX_SAMPLERS for device.
     *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate
     *    memory for image or buffer objects specified as arguments to kernel.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not valid
     *    events.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueTask(
        const Kernel& kernel,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueTask(
                object_, kernel(),
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQUEUE_TASK_ERR);
    }

    /*! \brief Enqueue a command to execute a native C/C++ function not compiled
     *         using the OpenCL compiler.
     *
     *         A native user function can only be executed on a command-queue
     *         created on a device that has CL_EXEC_NATIVE_KERNEL capability
     *         set in CL_DEVICE_EXECUTION_CAPABILITIES.
     *
     *  \param userFptr is a pointer to a host-callable user function.
     *
     *  \param args a pair containing a a pointer to the args list that
     *  \a user_func should be called with and the size in bytes of the args
     *  list that args points to. Size  number of bytes of the data pointed to
     *  by args will be copied and a pointer to this copied region will be
     *  passed to \a userFptr. The copy needs to be done because the memory
     *  objects (cl_mem values) that args may contain need to be modified and
     *  replaced by appropriate pointers to global memory. When
     *  clEnqueueNativeKernel returns, the memory region pointed to by
     *  args can be reused by the application.
     *
     *  \param mem_objects is a list of valid memory objects, whose size > 0
     *  implies that each element is a pointer to appropriate locations
     *  that args points to where memory object handles (cl_mem values) are
     *  stored. Before the user function is executed, the memory object handles
     *  are replaced by pointers to global memory.
     *
     *  \param events as described in enqueueNDRangeKernel.
     *
     *  \param event returns an event objects that identifies this particular
     *  kernel execution instance. Event objects are unique and can be used to
     *  identify a particular kernel execution instance later on. If \a event
     *  is NULL, its default, no event will be created for this kernel execution
     *  instance and therefore it will not be possible for the application to
     *  query or queue a wait for this particular kernel execution instance.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the user function execution instance was successfully
     *    queued
     *  - CL_INVALID_VALUE if \a user_func is NULL.
     *  - CL_INVALID_OPERATION if device cannot execute the native kernel.
     *  - CL_INVALID_MEM_OBJECT if one or more memory objects specified in
     *    \a mem_list are not valid or are not buffer objects.
     *  - CL_OUT_OF_RESOURCES if there is a failure to queue the execution
     *    instance of kernel on the command-queue because of insufficient
     *    resources needed to execute the kernel.
     *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate
     *    memory for buffer objects specified as arguments to \a kernel.
     *  - CL_INVALID_EVENT_WAIT_LIST if event objects in \a events are not valid
     *    events.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueNativeKernel(
        void (*userFptr)(void *),
        std::pair<void*, ::size_t> args,
        const VECTOR_CLASS<Memory>* mem_objects = NULL,
        const VECTOR_CLASS<const void*>* mem_locs = NULL,
        const VECTOR_CLASS<Event>* events = NULL,
        Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueNativeKernel(
                object_, userFptr, args.first, args.second,
                (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
                (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
                (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL,
                (events != NULL) ? (cl_uint) events->size() : 0,
                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                (cl_event*) event),
            __ENQUEUE_NATIVE_KERNEL);
    }

    /*! \brief Enqueue a marker command.
     *
     *  The marker command returns an event which can be used to queue a
     *  wait on this marker event i.e. wait for all commands queued before
     *  the marker command to complete.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function is successfully executed
     *  - CL_INVALID_VALUE if \a event is a NULL value
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueMarker(Event* event = NULL) const
    {
        return detail::errHandler(
            ::clEnqueueMarker(object_, (cl_event*) event),
            __ENQUEUE_MARKER_ERR);
    }

    /*! \brief Enqueue a wait for a specific event or a list of events to
     *  complete before any future commands queued in the command-queue are
     *  executed.
     *
     *
     *  \param events is the list of events. Each event in \a events must
     *  be a valid event object returned by a previous call to:
     *  - enqueueNDRangeKernel
     *  - enqueueTask
     *  - enqueueNativeKernel
     *  - enqueue{Read|Write|Map}{Buffer|Image}
     *  - enqueueCopy{Buffer|Image}
     *  - enqueueCopyBufferToImage
     *  - enqueueCopyImageToBuffer
     *  - enqueueMarker.
     *  The events specified in \a event_list act as synchronization points.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function was successfully executed.
     *  - CL_INVALID_VALUE if size of \a events is zero
     *  - CL_INVALID_EVENT if event objects specified in \a events are not valid
     *    events
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueWaitForEvents(const VECTOR_CLASS<Event>& events) const
    {
        return detail::errHandler(
            ::clEnqueueWaitForEvents(
                object_,
                (cl_uint) events.size(),
                (const cl_event*) &events.front()),
            __ENQUEUE_WAIT_FOR_EVENTS_ERR);
    }

    cl_int enqueueAcquireGLObjects(
         const VECTOR_CLASS<Memory>* mem_objects = NULL,
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
         return detail::errHandler(
             ::clEnqueueAcquireGLObjects(
                 object_,
                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                 (cl_event*) event),
             __ENQUEUE_ACQUIRE_GL_ERR);
     }

    cl_int enqueueReleaseGLObjects(
         const VECTOR_CLASS<Memory>* mem_objects = NULL,
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
         return detail::errHandler(
             ::clEnqueueReleaseGLObjects(
                 object_,
                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
                 (cl_event*) event),
             __ENQUEUE_RELEASE_GL_ERR);
     }

#if defined (USE_DX_INTEROP)
typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)(
    cl_command_queue command_queue, cl_uint num_objects,
    const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
    const cl_event* event_wait_list, cl_event* event);
typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)(
    cl_command_queue command_queue, cl_uint num_objects,
    const cl_mem* mem_objects,  cl_uint num_events_in_wait_list,
    const cl_event* event_wait_list, cl_event* event);

   cl_int enqueueAcquireD3D10Objects(
         const VECTOR_CLASS<Memory>* mem_objects = NULL,
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
		static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL;
		__INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR);
		
		 return detail::errHandler(
             pfn_clEnqueueAcquireD3D10ObjectsKHR(
                 object_,
                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL) ? (cl_event*) &events->front() : NULL,
                 (cl_event*) event),
             __ENQUEUE_ACQUIRE_GL_ERR);
     }

    cl_int enqueueReleaseD3D10Objects(
         const VECTOR_CLASS<Memory>* mem_objects = NULL,
         const VECTOR_CLASS<Event>* events = NULL,
         Event* event = NULL) const
     {
		 static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL;
		 __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR);

         return detail::errHandler(
             pfn_clEnqueueReleaseD3D10ObjectsKHR(
                 object_,
                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
                 (events != NULL) ? (cl_uint) events->size() : 0,
                 (events != NULL) ? (cl_event*) &events->front() : NULL,
                 (cl_event*) event),
             __ENQUEUE_RELEASE_GL_ERR);
     }
#endif

    /*! \brief Enqueue a barrier operation.
     *
     *  The enqueueBarrier command ensures that all queued commands in
     *  command-queue have finished execution before the next batch of commands
     *  can begin execution. enqueueBarrier is a synchronization point.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function was executed successfully
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *  required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int enqueueBarrier() const
    {
        return detail::errHandler(
            ::clEnqueueBarrier(object_),
            __ENQUEUE_BARRIER_ERR);
    }

    /*! \brief Issue all previously queued OpenCL commands in command-queue to
     *  the device associated with command-queue.
     *
     *  flush only guarantees that all queued commands get issued to the
     *  appropriate device. There is no guarantee that they will be
     *  complete after flush returns.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function call was executed successfully
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     *  Any blocking commands queued in a command-queue such as
     *  enqueueRead{Image|Buffer} with \a blocking_read set to CL_TRUE,
     *  enqueueWrite{Image|Buffer} with \a blocking_write set to CL_TRUE,
     *  enqueueMap{Buffer|Image} with \a blocking_map set to CL_TRUE or
     *  waitForEvents perform an implicit flush of the command-queue.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int flush() const
    {
        return detail::errHandler(::clFlush(object_), __FLUSH_ERR);
    }

    /*! \brief Block until all previously queued OpenCL runtime commands in
     *  \a command_queue are issued to the associated device and have completed.
     *
     *  finish does not return until all queued commands in \a command_queue
     *  have been processed and completed. finish is also a synchronization
     *  point.
     *
     *  \return One of the following values:
     *  - CL_SUCCESS if the function call was executed successfully.
     *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
     *    required by the runtime.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated.
     */
    cl_int finish() const
    {
        return detail::errHandler(::clFinish(object_), __FINISH_ERR);
    }
};

/*! \class KernelFunctor
 * \brief Kernel functor interface
 *
 * \note Currently only functors of zero to ten arguments are supported. It
 * is straightforward to add more and a more general solution, similar to
 * Boost.Lambda could be followed if required in the future.
 */
class KernelFunctor
{
private:
    Kernel kernel_;
    CommandQueue queue_;
    NDRange offset_;
    NDRange global_;
    NDRange local_;

    cl_int err_;
public:
    //! Default constructor; KernelFunctor is not valid at this point.
    KernelFunctor() { }

    /*! \brief Construct a KernelFunctor.
     *
     *  A KernelFunctor object will launch the \a kernel with the
     *  corresponding \a queue, \a offset, \a global, and \a local
     *  values when called with the appropriate number of arguments,
     *  as defined by kernel itself,
     *
     *  \param kernel is the kernel to launch when this functor is executed.
     *
     *  \param queue is the command-queue to launch on.
     *
     *  \param offset must currently be  a  NullRange value. In  a future
     *  revision of OpenCL, \a global_work_offset can be used to specify an
     *  array of \a work_dim unsigned values that describe the offset used to
     *  calculate the global ID of a work-item instead of having the global IDs
     *  always start at offset (0, 0, 0).
     *
     *  \param global describes  the number of global work-items in will execute
     *  the  kernel  function. The  total  number  of  global
     *  work-items is computed as global_work_size[0] * ...
     *  * global_work_size[work_dim - 1].
     *
     *  \param local describes the number of work-items that  make  up  a
     *  work-group (also referred to as the size of the work-group) that
     *  will execute the  kernel specified by kernel.
     * 
     *  \return A KernelFunctor object that when called with the appropriate
     *  number of arguments, as defined by kernel itself, will be launched
     *  with the corresponding queue, offset, global, and local values.
     *
     *  \note This constructor is typically not used in favor of the Kernel::bind method. 
     */
    KernelFunctor(
        const Kernel& kernel,
        const CommandQueue& queue,
        const NDRange& offset,
        const NDRange& global,
        const NDRange& local) :
            kernel_(kernel),
            queue_(queue),
            offset_(offset),
            global_(global),
            local_(local),
            err_(CL_SUCCESS)
    {}

    /*! \brief Assignment operator.
     *
     * \param rhs KernelFunctor object for rhs of assignment.
     *
     * \return KernelFunctor object for lhs of assignment.
     */
    KernelFunctor& operator=(const KernelFunctor& rhs);

    /*! \brief Copy constructor
     *
     * \param rhs is the KernelFunctor to be copied (cloned).
     */
    KernelFunctor(const KernelFunctor& rhs);

    /*! \brief Get the error code returned by the last call to the
     *         functor.
     *
     * \return The last error; in the case that the functor object
     * in question has not been called CL_SUCCESS is returned.
     */
    cl_int getError() { return err_; }

    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    inline Event operator()(const VECTOR_CLASS<Event>* events = NULL);

    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<typename A1>
    inline Event operator()(
        const A1& a1, 
        const VECTOR_CLASS<Event>* events = NULL);

    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const VECTOR_CLASS<Event>* events = NULL);

    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 3 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3,
        const VECTOR_CLASS<Event>* events = NULL);

    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 2 for the kernel call.
     *  \param a4 is used argument 3 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3, class A4>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4,
        const VECTOR_CLASS<Event>* events = NULL);

    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 2 for the kernel call.
     *  \param a4 is used argument 3 for the kernel call.
     *  \param a5 is used argument 4 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3, class A4, class A5>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4, 
        const A5& a5,
        const VECTOR_CLASS<Event>* events = NULL);

    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 2 for the kernel call.
     *  \param a4 is used argument 3 for the kernel call.
     *  \param a5 is used argument 4 for the kernel call.
     *  \param a6 is used argument 5 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3, class A4, class A5, class A6>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4, 
        const A5& a5, 
        const A6& a6,
        const VECTOR_CLASS<Event>* events = NULL);

    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 2 for the kernel call.
     *  \param a4 is used argument 3 for the kernel call.
     *  \param a5 is used argument 4 for the kernel call.
     *  \param a6 is used argument 5 for the kernel call.
     *  \param a7 is used argument 6 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3, class A4,
             class A5, class A6, class A7>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4, 
        const A5& a5, 
        const A6& a6, 
        const A7& a7,
        const VECTOR_CLASS<Event>* events = NULL);

    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 2 for the kernel call.
     *  \param a4 is used argument 3 for the kernel call.
     *  \param a5 is used argument 4 for the kernel call.
     *  \param a6 is used argument 5 for the kernel call.
     *  \param a7 is used argument 6 for the kernel call.
     *  \param a8 is used argument 7 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3, class A4, class A5,
             class A6, class A7, class A8>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4, 
        const A5& a5, 
        const A6& a6, 
        const A7& a7, 
        const A8& a8,
        const VECTOR_CLASS<Event>* events = NULL);

    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 2 for the kernel call.
     *  \param a4 is used argument 3 for the kernel call.
     *  \param a5 is used argument 4 for the kernel call.
     *  \param a6 is used argument 5 for the kernel call.
     *  \param a7 is used argument 6 for the kernel call.
     *  \param a8 is used argument 7 for the kernel call.
     *  \param a9 is used argument 8 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3, class A4, class A5,
             class A6, class A7, class A8, class A9>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4, 
        const A5& a5, 
        const A6& a6, 
        const A7& a7, 
        const A8& a8, 
        const A9& a9,
        const VECTOR_CLASS<Event>* events = NULL);
    
    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 2 for the kernel call.
     *  \param a4 is used argument 3 for the kernel call.
     *  \param a5 is used argument 4 for the kernel call.
     *  \param a6 is used argument 5 for the kernel call.
     *  \param a7 is used argument 6 for the kernel call.
     *  \param a8 is used argument 7 for the kernel call.
     *  \param a9 is used argument 8 for the kernel call.
     *  \param a10 is used argument 9 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3, class A4, class A5,
             class A6, class A7, class A8, class A9, class A10>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4, 
        const A5& a5, 
        const A6& a6,
        const A7& a7, 
        const A8& a8, 
        const A9& a9, 
        const A10& a10,
        const VECTOR_CLASS<Event>* events = NULL);
    
    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 2 for the kernel call.
     *  \param a4 is used argument 3 for the kernel call.
     *  \param a5 is used argument 4 for the kernel call.
     *  \param a6 is used argument 5 for the kernel call.
     *  \param a7 is used argument 6 for the kernel call.
     *  \param a8 is used argument 7 for the kernel call.
     *  \param a9 is used argument 8 for the kernel call.
     *  \param a10 is used argument 9 for the kernel call.
     *  \param a11 is used argument 10 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3, class A4, class A5,
             class A6, class A7, class A8, class A9, class A10,
             class A11>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4, 
        const A5& a5, 
        const A6& a6,
        const A7& a7, 
        const A8& a8, 
        const A9& a9, 
        const A10& a10, 
        const A11& a11,
        const VECTOR_CLASS<Event>* events = NULL);
    
    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 2 for the kernel call.
     *  \param a4 is used argument 3 for the kernel call.
     *  \param a5 is used argument 4 for the kernel call.
     *  \param a6 is used argument 5 for the kernel call.
     *  \param a7 is used argument 6 for the kernel call.
     *  \param a8 is used argument 7 for the kernel call.
     *  \param a9 is used argument 8 for the kernel call.
     *  \param a10 is used argument 9 for the kernel call.
     *  \param a11 is used argument 10 for the kernel call.
     *  \param a12 is used argument 11 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3, class A4, class A5,
             class A6, class A7, class A8, class A9, class A10,
             class A11, class A12>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4, 
        const A5& a5, 
        const A6& a6,
        const A7& a7, 
        const A8& a8, 
        const A9& a9, 
        const A10& a10, 
        const A11& a11, 
        const A12& a12,
        const VECTOR_CLASS<Event>* events = NULL);
    
    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 2 for the kernel call.
     *  \param a4 is used argument 3 for the kernel call.
     *  \param a5 is used argument 4 for the kernel call.
     *  \param a6 is used argument 5 for the kernel call.
     *  \param a7 is used argument 6 for the kernel call.
     *  \param a8 is used argument 7 for the kernel call.
     *  \param a9 is used argument 8 for the kernel call.
     *  \param a10 is used argument 9 for the kernel call.
     *  \param a11 is used argument 10 for the kernel call.
     *  \param a12 is used argument 11 for the kernel call.
     *  \param a13 is used argument 12 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3, class A4, class A5,
             class A6, class A7, class A8, class A9, class A10,
             class A11, class A12, class A13>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4, 
        const A5& a5, 
        const A6& a6,
        const A7& a7, 
        const A8& a8, 
        const A9& a9, 
        const A10& a10, 
        const A11& a11, 
        const A12& a12, 
        const A13& a13,
        const VECTOR_CLASS<Event>* events = NULL);
    
    /*! \brief Enqueue a command to execute a kernel on a device.
     *
     *  \param a1 is used argument 0 for the kernel call.
     *  \param a2 is used argument 1 for the kernel call.
     *  \param a3 is used argument 2 for the kernel call.
     *  \param a4 is used argument 3 for the kernel call.
     *  \param a5 is used argument 4 for the kernel call.
     *  \param a6 is used argument 5 for the kernel call.
     *  \param a7 is used argument 6 for the kernel call.
     *  \param a8 is used argument 7 for the kernel call.
     *  \param a9 is used argument 8 for the kernel call.
     *  \param a10 is used argument 9 for the kernel call.
     *  \param a11 is used argument 10 for the kernel call.
     *  \param a12 is used argument 11 for the kernel call.
     *  \param a13 is used argument 12 for the kernel call.
     *  \param a13 is used argument 13 for the kernel call.
     *
     *  \param events specifies the list of events that need to complete before
     *  this particular command can be executed. If \a events is NULL, its
     *  default value, then this particular command does not wait on any event
     *  to complete. The events specified in \a events act as
     *  synchronization points.
     *
     *  \return An event that identifies this particular kernel
     *  execution instance.
     *
     * \note In the case that exceptions are enabled and error value
     * other than CL_SUCCESS is generated, then cl::Error exception is
     * generated, otherwise the returned error is stored in the Kernel
     * object and can get accessed using \a get_error.
     */
    template<class A1, class A2, class A3, class A4, class A5,
             class A6, class A7, class A8, class A9, class A10,
             class A11, class A12, class A13, class A14>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4, 
        const A5& a5, 
        const A6& a6,
        const A7& a7, 
        const A8& a8, 
        const A9& a9, 
        const A10& a10, 
        const A11& a11,
        const A12& a12, 
        const A13& a13, 
        const A14& a14,
        const VECTOR_CLASS<Event>* events = NULL);
    
    template<class A1, class A2, class A3, class A4, class A5,
             class A6, class A7, class A8, class A9, class A10,
             class A11, class A12, class A13, class A14, class A15>
    inline Event operator()(
        const A1& a1, 
        const A2& a2, 
        const A3& a3, 
        const A4& a4, 
        const A5& a5, 
        const A6& a6,
        const A7& a7, 
        const A8& a8, 
        const A9& a9, 
        const A10& a10, 
        const A11& a11,
        const A12& a12, 
        const A13& a13, 
        const A14& a14, 
        const A15& a15,
        const VECTOR_CLASS<Event>* events = NULL);
};

inline KernelFunctor Kernel::bind(
    const CommandQueue& queue,
    const NDRange& offset,
    const NDRange& global,
    const NDRange& local)
{
    return KernelFunctor(*this,queue,offset,global,local);
}

inline KernelFunctor Kernel::bind(
    const CommandQueue& queue,
    const NDRange& global,
    const NDRange& local)
{
    return KernelFunctor(*this,queue,NullRange,global,local);
}

inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs)
{
    if (this == &rhs) {
        return *this;
    }
    
    kernel_ = rhs.kernel_;
    queue_  = rhs.queue_;
    offset_ = rhs.offset_;
    global_ = rhs.global_;
    local_  = rhs.local_;
    
    return *this;
}

inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) :
    kernel_(rhs.kernel_),
    queue_(rhs.queue_),
    offset_(rhs.offset_),
    global_(rhs.global_),
    local_(rhs.local_)
{
}

Event KernelFunctor::operator()(const VECTOR_CLASS<Event>* events __attribute__ ((unused)))
{
    Event event;

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<typename A1>
Event KernelFunctor::operator()(
	const A1& a1, 
	const VECTOR_CLASS<Event>* events)
{
    Event event;

    kernel_.setArg(0,a1);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<typename A1, typename A2>
Event KernelFunctor::operator()(
	const A1& a1, 
	const A2& a2,
    const VECTOR_CLASS<Event>* events)
{
    Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<typename A1, typename A2, typename A3>
Event KernelFunctor::operator()(
	const A1& a1, 
	const A2& a2, 
	const A3& a3,
    const VECTOR_CLASS<Event>* events)
{
    Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<typename A1, typename A2, typename A3, typename A4>
Event KernelFunctor::operator()(
    const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4,
    const VECTOR_CLASS<Event>* events)
{
    Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<typename A1, typename A2, typename A3, typename A4, typename A5>
Event KernelFunctor::operator()(
    const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4, 
	const A5& a5,
    const VECTOR_CLASS<Event>* events)
{
    Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);
    kernel_.setArg(4,a5);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<typename A1, typename A2, typename A3, typename A4, typename A5,
         typename A6>
Event KernelFunctor::operator()(
    const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4, 
	const A5& a5, 
	const A6& a6,
    const VECTOR_CLASS<Event>* events)
{
    Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);
    kernel_.setArg(4,a5);
    kernel_.setArg(5,a6);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<typename A1, typename A2, typename A3, typename A4,
         typename A5, typename A6, typename A7>
Event KernelFunctor::operator()(
	const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4, 
	const A5& a5, 
	const A6& a6, 
	const A7& a7,
    const VECTOR_CLASS<Event>* events)
{
    Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);
    kernel_.setArg(4,a5);
    kernel_.setArg(5,a6);
    kernel_.setArg(6,a7);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<typename A1, typename A2, typename A3, typename A4, typename A5,
         typename A6, typename A7, typename A8>
Event KernelFunctor::operator()(
	const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4, 
	const A5& a5, 
	const A6& a6, 
	const A7& a7, 
	const A8& a8,
    const VECTOR_CLASS<Event>* events)
{
    Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);
    kernel_.setArg(4,a5);
    kernel_.setArg(5,a6);
    kernel_.setArg(6,a7);
    kernel_.setArg(7,a8);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<typename A1, typename A2, typename A3, typename A4, typename A5,
         typename A6, typename A7, typename A8, typename A9>
Event KernelFunctor::operator()(
	const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4, 
	const A5& a5,
    const A6& a6, 
	const A7& a7, 
	const A8& a8, 
	const A9& a9,
    const VECTOR_CLASS<Event>* events)
{
    Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);
    kernel_.setArg(4,a5);
    kernel_.setArg(5,a6);
    kernel_.setArg(6,a7);
    kernel_.setArg(7,a8);
    kernel_.setArg(8,a9);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<typename A1, typename A2, typename A3, typename A4, typename A5,
        typename A6, typename A7, typename A8, typename A9, typename A10>
Event KernelFunctor::operator()(
    const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4, 
	const A5& a5, 
	const A6& a6,
    const A7& a7, 
	const A8& a8, 
	const A9& a9, 
	const A10& a10,
    const VECTOR_CLASS<Event>* events)
{
    Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);
    kernel_.setArg(4,a5);
    kernel_.setArg(5,a6);
    kernel_.setArg(6,a7);
    kernel_.setArg(7,a8);
    kernel_.setArg(8,a9);
    kernel_.setArg(9,a10);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<class A1, class A2, class A3, class A4, class A5,
             class A6, class A7, class A8, class A9, class A10,
			 class A11>
Event KernelFunctor::operator()(
	const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4, 
	const A5& a5, 
	const A6& a6,
    const A7& a7, 
	const A8& a8, 
	const A9& a9, 
	const A10& a10, 
	const A11& a11,
    const VECTOR_CLASS<Event>* events)
{
	    Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);
    kernel_.setArg(4,a5);
    kernel_.setArg(5,a6);
    kernel_.setArg(6,a7);
    kernel_.setArg(7,a8);
    kernel_.setArg(8,a9);
    kernel_.setArg(9,a10);
    kernel_.setArg(10,a11);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<class A1, class A2, class A3, class A4, class A5,
            class A6, class A7, class A8, class A9, class A10,
			 class A11, class A12>
Event KernelFunctor::operator()(
    const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4, 
	const A5& a5, 
	const A6& a6,
    const A7& a7, 
	const A8& a8, 
	const A9& a9, 
	const A10& a10, 
	const A11& a11, 
	const A12& a12,
    const VECTOR_CLASS<Event>* events)
{
	    Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);
    kernel_.setArg(4,a5);
    kernel_.setArg(5,a6);
    kernel_.setArg(6,a7);
    kernel_.setArg(7,a8);
    kernel_.setArg(8,a9);
    kernel_.setArg(9,a10);
    kernel_.setArg(10,a11);
    kernel_.setArg(11,a12);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<class A1, class A2, class A3, class A4, class A5,
             class A6, class A7, class A8, class A9, class A10,
			 class A11, class A12, class A13>
Event KernelFunctor::operator()(
    const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4, 
	const A5& a5, 
	const A6& a6,
    const A7& a7, 
	const A8& a8, 
	const A9& a9, 
	const A10& a10, 
	const A11& a11, 
	const A12& a12, 
	const A13& a13,
    const VECTOR_CLASS<Event>* events)
{
	Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);
    kernel_.setArg(4,a5);
    kernel_.setArg(5,a6);
    kernel_.setArg(6,a7);
    kernel_.setArg(7,a8);
    kernel_.setArg(8,a9);
    kernel_.setArg(9,a10);
    kernel_.setArg(10,a11);
    kernel_.setArg(11,a12);
    kernel_.setArg(12,a13);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<class A1, class A2, class A3, class A4, class A5,
         class A6, class A7, class A8, class A9, class A10,
		 class A11, class A12, class A13, class A14>
Event KernelFunctor::operator()(
    const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4, 
	const A5& a5, 
	const A6& a6,
    const A7& a7, 
	const A8& a8, 
	const A9& a9, 
	const A10& a10, 
	const A11& a11,
	const A12& a12, 
	const A13& a13, 
	const A14& a14,
    const VECTOR_CLASS<Event>* events)
{
	Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);
    kernel_.setArg(4,a5);
    kernel_.setArg(5,a6);
    kernel_.setArg(6,a7);
    kernel_.setArg(7,a8);
    kernel_.setArg(8,a9);
    kernel_.setArg(9,a10);
    kernel_.setArg(10,a11);
    kernel_.setArg(11,a12);
    kernel_.setArg(12,a13);
    kernel_.setArg(13,a14);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

template<class A1, class A2, class A3, class A4, class A5,
         class A6, class A7, class A8, class A9, class A10,
		 class A11, class A12, class A13, class A14, class A15>
Event KernelFunctor::operator()(
    const A1& a1, 
	const A2& a2, 
	const A3& a3, 
	const A4& a4, 
	const A5& a5, 
	const A6& a6,
    const A7& a7, 
	const A8& a8, 
	const A9& a9, 
	const A10& a10, 
	const A11& a11,
	const A12& a12, 
	const A13& a13, 
	const A14& a14, 
	const A15& a15,
    const VECTOR_CLASS<Event>* events)
{
	Event event;

    kernel_.setArg(0,a1);
    kernel_.setArg(1,a2);
    kernel_.setArg(2,a3);
    kernel_.setArg(3,a4);
    kernel_.setArg(4,a5);
    kernel_.setArg(5,a6);
    kernel_.setArg(6,a7);
    kernel_.setArg(7,a8);
    kernel_.setArg(8,a9);
    kernel_.setArg(9,a10);
    kernel_.setArg(10,a11);
    kernel_.setArg(11,a12);
    kernel_.setArg(12,a13);
    kernel_.setArg(13,a14);
	kernel_.setArg(14,a15);

    err_ = queue_.enqueueNDRangeKernel(
        kernel_,
        offset_,
        global_,
        local_,
        NULL,    // bgaster_fixme - do we want to allow wait event lists?
        &event);

    return event;
}

#undef __ERR_STR
#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
#undef __GET_DEVICE_INFO_ERR
#undef __GET_PLATFORM_INFO_ERR
#undef __GET_DEVICE_IDS_ERR
#undef __GET_CONTEXT_INFO_ERR
#undef __GET_EVENT_INFO_ERR
#undef __GET_EVENT_PROFILE_INFO_ERR
#undef __GET_MEM_OBJECT_INFO_ERR
#undef __GET_IMAGE_INFO_ERR
#undef __GET_SAMPLER_INFO_ERR
#undef __GET_KERNEL_INFO_ERR
#undef __GET_KERNEL_WORK_GROUP_INFO_ERR
#undef __GET_PROGRAM_INFO_ERR
#undef __GET_PROGRAM_BUILD_INFO_ERR
#undef __GET_COMMAND_QUEUE_INFO_ERR

#undef __CREATE_CONTEXT_FROM_TYPE_ERR
#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR

#undef __CREATE_BUFFER_ERR
#undef __CREATE_SUBBUFFER_ERR
#undef __CREATE_IMAGE2D_ERR
#undef __CREATE_IMAGE3D_ERR
#undef __CREATE_SAMPLER_ERR
#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR

#undef __CREATE_USER_EVENT_ERR
#undef __SET_USER_EVENT_STATUS_ERR
#undef __SET_EVENT_CALLBACK_ERR

#undef __WAIT_FOR_EVENTS_ERR

#undef __CREATE_KERNEL_ERR
#undef __SET_KERNEL_ARGS_ERR
#undef __CREATE_PROGRAM_WITH_SOURCE_ERR
#undef __CREATE_PROGRAM_WITH_BINARY_ERR
#undef __BUILD_PROGRAM_ERR
#undef __CREATE_KERNELS_IN_PROGRAM_ERR

#undef __CREATE_COMMAND_QUEUE_ERR
#undef __SET_COMMAND_QUEUE_PROPERTY_ERR
#undef __ENQUEUE_READ_BUFFER_ERR
#undef __ENQUEUE_WRITE_BUFFER_ERR
#undef __ENQUEUE_READ_BUFFER_RECT_ERR
#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR
#undef __ENQEUE_COPY_BUFFER_ERR
#undef __ENQEUE_COPY_BUFFER_RECT_ERR
#undef __ENQUEUE_READ_IMAGE_ERR
#undef __ENQUEUE_WRITE_IMAGE_ERR
#undef __ENQUEUE_COPY_IMAGE_ERR
#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR
#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR
#undef __ENQUEUE_MAP_BUFFER_ERR
#undef __ENQUEUE_MAP_IMAGE_ERR
#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR
#undef __ENQUEUE_NDRANGE_KERNEL_ERR
#undef __ENQUEUE_TASK_ERR
#undef __ENQUEUE_NATIVE_KERNEL

#undef __UNLOAD_COMPILER_ERR
#endif //__CL_USER_OVERRIDE_ERROR_STRINGS

#undef __INIT_CL_EXT_FCN_PTR

} // namespace cl

#endif // CL_HPP_