Browse Source

doc/tutorial: fix makefile and opencl program

Nathalie Furmento 3 years ago
parent
commit
9d8d17fcf5
2 changed files with 25 additions and 20 deletions
  1. 6 5
      doc/tutorial/Makefile
  2. 19 15
      doc/tutorial/vector_scal_opencl.c

+ 6 - 5
doc/tutorial/Makefile

@@ -13,12 +13,12 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
-CFLAGS          +=      $$(pkg-config --cflags starpu-1.1)
-LDLIBS          +=      $$(pkg-config --libs starpu-1.1)
+CFLAGS          +=      $$(pkg-config --cflags starpu-1.3)
+LDLIBS          +=      $$(pkg-config --libs starpu-1.3)
 
-HAS_CUDA	=	$(shell pkg-config --libs starpu-1.1 |grep -i cuda)
+HAS_CUDA	=	$(shell starpu_machine_display 2>/dev/null | grep CUDA | head -1)
 NVCC		?=	nvcc
-HAS_OPENCL	=	$(shell pkg-config --libs starpu-1.1 |grep -i opencl)
+HAS_OPENCL	=	$(shell starpu_machine_display 2>/dev/null | grep OpenCL | head -1)
 
 %.o: %.cu
 	nvcc $(CFLAGS) $< -c
@@ -35,7 +35,8 @@ else
 VECTOR_SCAL_COMPILER		=	$(CC)
 endif
 ifneq ($(strip $(HAS_OPENCL)),)
-VECTOR_SCAL_PREREQUISITES += vector_scal_opencl.o
+VECTOR_SCAL_PREREQUISITES 	+=	vector_scal_opencl.o
+LDLIBS				+=	-lOpenCL
 endif
 
 vector_scal: $(VECTOR_SCAL_PREREQUISITES)

+ 19 - 15
doc/tutorial/vector_scal_opencl.c

@@ -21,38 +21,42 @@ extern struct starpu_opencl_program programs;
 void vector_scal_opencl(void *buffers[], void *_args)
 {
 	float *factor = _args;
-	int id, devid, err;
+	int id, devid;
+	cl_int err;
 	cl_kernel kernel;
 	cl_command_queue queue;
-	cl_event event;
 
 	/* length of the vector */
-	unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+	unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]);
 	/* OpenCL copy of the vector pointer */
 	cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
 
 	id = starpu_worker_get_id();
 	devid = starpu_worker_get_devid(id);
 
-	err = starpu_opencl_load_kernel(&kernel, &queue, &programs,
-					"vector_mult_opencl", devid);   /* Name of the codelet defined above */
+	err = starpu_opencl_load_kernel(&kernel, &queue, &programs, "vector_mult_opencl", devid);
 	if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = clSetKernelArg(kernel, 0, sizeof(val), &val);
-	err |= clSetKernelArg(kernel, 1, sizeof(n), &n);
+	err = clSetKernelArg(kernel, 0, sizeof(n), &n);
+	err = clSetKernelArg(kernel, 1, sizeof(val), &val);
 	err |= clSetKernelArg(kernel, 2, sizeof(*factor), factor);
 	if (err) STARPU_OPENCL_REPORT_ERROR(err);
 
 	{
-		size_t global=1;
-		size_t local=1;
-		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
-		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
-	}
+		size_t global=n;
+		size_t local;
+                size_t s;
+                cl_device_id device;
+
+                starpu_opencl_get_device(devid, &device);
 
-	clFinish(queue);
-	starpu_opencl_collect_stats(event);
-	clReleaseEvent(event);
+                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
+                if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+                if (local > global) local=global;
+                else global = (global + local-1) / local * local;
 
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
+		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+	}
 	starpu_opencl_release_kernel(kernel);
 }