|
@@ -28,35 +28,111 @@
|
|
|
#include <stdio.h>
|
|
|
#include <math.h>
|
|
|
|
|
|
-#define NX (30*1000000/sizeof(double))
|
|
|
+#define NX 204800
|
|
|
#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
|
|
|
|
|
|
+extern void scal_cpu_func(void *buffers[], void *_args);
|
|
|
+extern void scal_cpu_func_icc(void *buffers[], void *_args);
|
|
|
+extern void scal_sse_func(void *buffers[], void *_args);
|
|
|
+extern void scal_sse_func_icc(void *buffers[], void *_args);
|
|
|
+extern void scal_cuda_func(void *buffers[], void *_args);
|
|
|
+extern void scal_opencl_func(void *buffers[], void *_args);
|
|
|
|
|
|
-int main(int argc, char **argv)
|
|
|
+static struct starpu_perfmodel vector_scal_model =
|
|
|
{
|
|
|
- double * A,*B,*C,*D,*E,*F;
|
|
|
-
|
|
|
- putenv("STARPU_LIMIT_CPU_MEM=160");
|
|
|
+ .type = STARPU_HISTORY_BASED,
|
|
|
+ .symbol = "vector_scal"
|
|
|
+};
|
|
|
|
|
|
- /* Initialize StarPU with default configuration */
|
|
|
- int ret = starpu_init(NULL);
|
|
|
+static struct starpu_perfmodel vector_scal_power_model =
|
|
|
+{
|
|
|
+ .type = STARPU_HISTORY_BASED,
|
|
|
+ .symbol = "vector_scal_power"
|
|
|
+};
|
|
|
|
|
|
- /* register a disk */
|
|
|
- unsigned dd = starpu_disk_register(&write_on_file, (void *) "/tmp/", 1024*1024*200);
|
|
|
+static struct starpu_codelet cl =
|
|
|
+{
|
|
|
+ .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL | STARPU_MIC,
|
|
|
+ /* CPU implementation of the codelet */
|
|
|
+ .cpu_funcs = {
|
|
|
+ scal_cpu_func
|
|
|
+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
|
|
|
+ , scal_cpu_func_icc
|
|
|
+#endif
|
|
|
+#ifdef __SSE__
|
|
|
+ , scal_sse_func
|
|
|
+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
|
|
|
+ , scal_sse_func_icc
|
|
|
+#endif
|
|
|
+#endif
|
|
|
+ , NULL
|
|
|
+ },
|
|
|
+ .cpu_funcs_name = {
|
|
|
+ "scal_cpu_func",
|
|
|
+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
|
|
|
+ "scal_cpu_func_icc",
|
|
|
+#endif
|
|
|
+#ifdef __SSE__
|
|
|
+ "scal_sse_func",
|
|
|
+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
|
|
|
+ "scal_sse_func_icc"
|
|
|
+#endif
|
|
|
+#endif
|
|
|
+ },
|
|
|
+
|
|
|
+#ifdef STARPU_USE_CUDA
|
|
|
+ /* CUDA implementation of the codelet */
|
|
|
+ .cuda_funcs = {scal_cuda_func, NULL},
|
|
|
+#endif
|
|
|
+#ifdef STARPU_USE_OPENCL
|
|
|
+ /* OpenCL implementation of the codelet */
|
|
|
+ .opencl_funcs = {scal_opencl_func, NULL},
|
|
|
+#endif
|
|
|
+ .nbuffers = 1,
|
|
|
+ .modes = {STARPU_RW},
|
|
|
+ .model = &vector_scal_model,
|
|
|
+ .power_model = &vector_scal_power_model
|
|
|
+};
|
|
|
+
|
|
|
+#ifdef STARPU_USE_OPENCL
|
|
|
+struct starpu_opencl_program opencl_program;
|
|
|
+#endif
|
|
|
+
|
|
|
+static int approximately_equal(float a, float b)
|
|
|
+{
|
|
|
+#ifdef STARPU_HAVE_NEARBYINTF
|
|
|
+ int ai = (int) nearbyintf(a * 1000.0);
|
|
|
+ int bi = (int) nearbyintf(b * 1000.0);
|
|
|
+#elif defined(STARPU_HAVE_RINTF)
|
|
|
+ int ai = (int) rintf(a * 1000.0);
|
|
|
+ int bi = (int) rintf(b * 1000.0);
|
|
|
+#else
|
|
|
+#error "Please define either nearbyintf or rintf."
|
|
|
+#endif
|
|
|
+ return ai == bi;
|
|
|
+}
|
|
|
|
|
|
- /* allocate two memory spaces */
|
|
|
- starpu_malloc_flags((void **)&A, NX*sizeof(double), STARPU_MALLOC_COUNT);
|
|
|
- starpu_malloc_flags((void **)&F, NX*sizeof(double), STARPU_MALLOC_COUNT);
|
|
|
+int main(int argc, char **argv)
|
|
|
+{
|
|
|
+ /* We consider a vector of float that is initialized just as any of C
|
|
|
+ * data */
|
|
|
+ float vector[NX];
|
|
|
+ unsigned i;
|
|
|
+ for (i = 0; i < NX; i++)
|
|
|
+ vector[i] = (i+1.0f);
|
|
|
|
|
|
+ /* Initialize StarPU with default configuration */
|
|
|
+ int ret = starpu_init(NULL);
|
|
|
if (ret == -ENODEV) goto enodev;
|
|
|
|
|
|
- FPRINTF(stderr, "Test of disk memory \n");
|
|
|
- unsigned int j;
|
|
|
- for(j = 0; j < NX; ++j)
|
|
|
- {
|
|
|
- A[j] = j;
|
|
|
- F[j] = -j;
|
|
|
- }
|
|
|
+ FPRINTF(stderr, "[BEFORE] 1-th element : %3.2f\n", vector[1]);
|
|
|
+ FPRINTF(stderr, "[BEFORE] (NX-1)th element: %3.2f\n", vector[NX-1]);
|
|
|
+
|
|
|
+#ifdef STARPU_USE_OPENCL
|
|
|
+ ret = starpu_opencl_load_opencl_from_file("examples/basic_examples/vector_scal_opencl_kernel.cl",
|
|
|
+ &opencl_program, NULL);
|
|
|
+ STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
|
|
|
+#endif
|
|
|
|
|
|
/* Tell StaPU to associate the "vector" vector with the "vector_handle"
|
|
|
* identifier. When a task needs to access a piece of data, it should
|
|
@@ -71,57 +147,48 @@ int main(int argc, char **argv)
|
|
|
* - the fourth argument is the number of elements in the vector
|
|
|
* - the fifth argument is the size of each element.
|
|
|
*/
|
|
|
- starpu_data_handle_t vector_handleA, vector_handleB, vector_handleC, vector_handleD, vector_handleE, vector_handleF;
|
|
|
-
|
|
|
- /* register vector in starpu */
|
|
|
- starpu_vector_data_register(&vector_handleA, 0, (uintptr_t)A, NX, sizeof(double));
|
|
|
- starpu_vector_data_register(&vector_handleB, -1, (uintptr_t) NULL, NX, sizeof(double));
|
|
|
- starpu_vector_data_register(&vector_handleC, -1, (uintptr_t) NULL, NX, sizeof(double));
|
|
|
- starpu_vector_data_register(&vector_handleD, -1, (uintptr_t) NULL, NX, sizeof(double));
|
|
|
- starpu_vector_data_register(&vector_handleE, -1, (uintptr_t) NULL, NX, sizeof(double));
|
|
|
- starpu_vector_data_register(&vector_handleF, 0, (uintptr_t)F, NX, sizeof(double));
|
|
|
-
|
|
|
- /* copy vector A->B, B->C... */
|
|
|
- starpu_data_cpy(vector_handleB, vector_handleA, 0, NULL, NULL);
|
|
|
- starpu_data_cpy(vector_handleC, vector_handleB, 0, NULL, NULL);
|
|
|
- starpu_data_cpy(vector_handleD, vector_handleC, 0, NULL, NULL);
|
|
|
- starpu_data_cpy(vector_handleE, vector_handleD, 0, NULL, NULL);
|
|
|
- starpu_data_cpy(vector_handleF, vector_handleE, 0, NULL, NULL);
|
|
|
+ starpu_data_handle_t vector_handle;
|
|
|
+ starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(vector[0]));
|
|
|
+
|
|
|
+ float factor = 3.14;
|
|
|
+
|
|
|
+ /* create a synchronous task: any call to starpu_task_submit will block
|
|
|
+ * until it is terminated */
|
|
|
+ struct starpu_task *task = starpu_task_create();
|
|
|
+ task->synchronous = 1;
|
|
|
+
|
|
|
+ task->cl = &cl;
|
|
|
+
|
|
|
+ /* the codelet manipulates one buffer in RW mode */
|
|
|
+ task->handles[0] = vector_handle;
|
|
|
+
|
|
|
+ /* an argument is passed to the codelet, beware that this is a
|
|
|
+ * READ-ONLY buffer and that the codelet may be given a pointer to a
|
|
|
+ * COPY of the argument */
|
|
|
+ task->cl_arg = &factor;
|
|
|
+ task->cl_arg_size = sizeof(factor);
|
|
|
+
|
|
|
+ /* execute the task on any eligible computational ressource */
|
|
|
+ ret = starpu_task_submit(task);
|
|
|
+ STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
|
|
|
|
|
|
/* StarPU does not need to manipulate the array anymore so we can stop
|
|
|
* monitoring it */
|
|
|
+ starpu_data_unregister(vector_handle);
|
|
|
|
|
|
- /* free them */
|
|
|
- starpu_data_unregister(vector_handleA);
|
|
|
- starpu_data_unregister(vector_handleB);
|
|
|
- starpu_data_unregister(vector_handleC);
|
|
|
- starpu_data_unregister(vector_handleD);
|
|
|
- starpu_data_unregister(vector_handleE);
|
|
|
- starpu_data_unregister(vector_handleF);
|
|
|
-
|
|
|
- starpu_disk_unregister(dd);
|
|
|
-
|
|
|
- /* check if computation is correct */
|
|
|
- int try = 1;
|
|
|
- for (j = 0; j < NX; ++j)
|
|
|
- if (A[j] != F[j])
|
|
|
- {
|
|
|
- printf("Fail A %f != F %f \n", A[j], F[j]);
|
|
|
- try = 0;
|
|
|
- }
|
|
|
-
|
|
|
- /* free last vectors */
|
|
|
- starpu_free_flags(A, NX*sizeof(double), STARPU_MALLOC_COUNT);
|
|
|
- starpu_free_flags(F, NX*sizeof(double), STARPU_MALLOC_COUNT);
|
|
|
+#ifdef STARPU_USE_OPENCL
|
|
|
+ ret = starpu_opencl_unload_opencl(&opencl_program);
|
|
|
+ STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
|
|
|
+#endif
|
|
|
|
|
|
/* terminate StarPU, no task can be submitted after */
|
|
|
starpu_shutdown();
|
|
|
|
|
|
- if(try)
|
|
|
- FPRINTF(stderr, "TEST SUCCESS\n");
|
|
|
- else
|
|
|
- FPRINTF(stderr, "TEST FAIL\n");
|
|
|
- return (try ? EXIT_SUCCESS : EXIT_FAILURE);
|
|
|
+ ret = approximately_equal(vector[1], (1+1.0f) * factor) && approximately_equal(vector[NX-1], (NX-1+1.0f) * factor);
|
|
|
+ FPRINTF(stderr, "[AFTER] 1-th element : %3.2f (should be %3.2f)\n", vector[1], (1+1.0f) * factor);
|
|
|
+ FPRINTF(stderr, "[AFTER] (NX-1)-th element: %3.2f (should be %3.2f)\n", vector[NX-1], (NX-1+1.0f) * factor);
|
|
|
+ FPRINTF(stderr, "[AFTER] Computation is%s correct\n", ret?"":" NOT");
|
|
|
+ return (ret ? EXIT_SUCCESS : EXIT_FAILURE);
|
|
|
|
|
|
enodev:
|
|
|
return 77;
|