|
@@ -17,96 +17,7 @@
|
|
#include <starpu.h>
|
|
#include <starpu.h>
|
|
#include <starpu_opencl.h>
|
|
#include <starpu_opencl.h>
|
|
#include "../helper.h"
|
|
#include "../helper.h"
|
|
-
|
|
|
|
-void scal_func_cpu(void *buffers[], void *cl_arg)
|
|
|
|
-{
|
|
|
|
- unsigned i;
|
|
|
|
-
|
|
|
|
- struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0];
|
|
|
|
- unsigned *val = (unsigned *) STARPU_VECTOR_GET_PTR(vector);
|
|
|
|
- unsigned n = STARPU_VECTOR_GET_NX(vector);
|
|
|
|
-
|
|
|
|
- /* scale the vector */
|
|
|
|
- for (i = 0; i < n; i++)
|
|
|
|
- val[i] *= 2;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-#ifdef STARPU_USE_CUDA
|
|
|
|
-extern void scal_func_cuda(void *buffers[], void *cl_arg);
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
-#ifdef STARPU_USE_OPENCL
|
|
|
|
-static struct starpu_opencl_program opencl_program;
|
|
|
|
-
|
|
|
|
-void scal_func_opencl(void *buffers[], void *_args)
|
|
|
|
-{
|
|
|
|
- int id, devid;
|
|
|
|
- cl_int err;
|
|
|
|
- cl_kernel kernel;
|
|
|
|
- cl_command_queue queue;
|
|
|
|
- cl_event event;
|
|
|
|
-
|
|
|
|
- unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
|
|
|
|
- cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
|
|
|
|
- unsigned offset = STARPU_VECTOR_GET_OFFSET(buffers[0]);
|
|
|
|
-
|
|
|
|
- id = starpu_worker_get_id();
|
|
|
|
- devid = starpu_worker_get_devid(id);
|
|
|
|
-
|
|
|
|
- err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "vector_mult_opencl", devid);
|
|
|
|
- if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
-
|
|
|
|
- err = clSetKernelArg(kernel, 0, sizeof(val), &val);
|
|
|
|
- err |= clSetKernelArg(kernel, 1, sizeof(offset), &offset);
|
|
|
|
- err |= clSetKernelArg(kernel, 2, sizeof(n), &n);
|
|
|
|
- if (err) STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
-
|
|
|
|
- {
|
|
|
|
- size_t global=n;
|
|
|
|
- size_t local;
|
|
|
|
- size_t s;
|
|
|
|
- cl_device_id device;
|
|
|
|
-
|
|
|
|
- starpu_opencl_get_device(devid, &device);
|
|
|
|
-
|
|
|
|
- err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
|
|
|
|
- if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
- if (local > global) local=global;
|
|
|
|
-
|
|
|
|
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
|
|
|
|
- if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- clFinish(queue);
|
|
|
|
- starpu_opencl_collect_stats(event);
|
|
|
|
- clReleaseEvent(event);
|
|
|
|
-
|
|
|
|
- starpu_opencl_release_kernel(kernel);
|
|
|
|
-}
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
-static struct starpu_codelet codelet =
|
|
|
|
-{
|
|
|
|
- .where = STARPU_CPU
|
|
|
|
-#ifdef STARPU_USE_CUDA
|
|
|
|
- | STARPU_CUDA
|
|
|
|
-#endif
|
|
|
|
-#ifdef STARPU_USE_OPENCL
|
|
|
|
- | STARPU_OPENCL
|
|
|
|
-#endif
|
|
|
|
- ,
|
|
|
|
- .cpu_funcs = { scal_func_cpu, NULL },
|
|
|
|
-#ifdef STARPU_USE_OPENCL
|
|
|
|
- .opencl_funcs = { scal_func_opencl, NULL },
|
|
|
|
-#endif
|
|
|
|
-#ifdef STARPU_USE_CUDA
|
|
|
|
- .cuda_funcs = { scal_func_cuda, NULL },
|
|
|
|
-#endif
|
|
|
|
- .modes = { STARPU_RW },
|
|
|
|
- .model = NULL,
|
|
|
|
- .nbuffers = 1
|
|
|
|
-};
|
|
|
|
-
|
|
|
|
|
|
+#include "scal.h"
|
|
|
|
|
|
int main(int argc, char **argv)
|
|
int main(int argc, char **argv)
|
|
{
|
|
{
|
|
@@ -168,7 +79,7 @@ int main(int argc, char **argv)
|
|
struct starpu_task *task = starpu_task_create();
|
|
struct starpu_task *task = starpu_task_create();
|
|
|
|
|
|
task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
|
|
task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
|
|
- task->cl = &codelet;
|
|
|
|
|
|
+ task->cl = &scal_codelet;
|
|
task->execute_on_a_specific_worker = 1;
|
|
task->execute_on_a_specific_worker = 1;
|
|
task->workerid = i;
|
|
task->workerid = i;
|
|
|
|
|