axpy.c 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. /*
  2. * StarPU
  3. * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <stdlib.h>
  18. #include <stdio.h>
  19. #include <assert.h>
  20. #include <sys/time.h>
  21. #include <common/blas.h>
  22. #define TYPE float
  23. #define AXPY SAXPY
  24. #define CUBLASAXPY cublasSaxpy
  25. #define N (16*1024*1024)
  26. #define NBLOCKS 8
  27. TYPE *vec_x, *vec_y;
  28. /* descriptors for StarPU */
  29. starpu_data_handle handle_y, handle_x;
  30. void axpy_cpu(void *descr[], __attribute__((unused)) void *arg)
  31. {
  32. TYPE alpha = *((TYPE *)arg);
  33. unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
  34. TYPE *block_x = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]);
  35. TYPE *block_y = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]);
  36. AXPY((int)n, alpha, block_x, 1, block_y, 1);
  37. }
  38. #ifdef STARPU_USE_CUDA
  39. void axpy_gpu(void *descr[], __attribute__((unused)) void *arg)
  40. {
  41. TYPE alpha = *((TYPE *)arg);
  42. unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
  43. TYPE *block_x = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]);
  44. TYPE *block_y = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]);
  45. CUBLASAXPY((int)n, alpha, block_x, 1, block_y, 1);
  46. cudaThreadSynchronize();
  47. }
  48. #endif
  49. static starpu_codelet axpy_cl = {
  50. .where =
  51. #ifdef STARPU_USE_CUDA
  52. STARPU_CUDA|
  53. #endif
  54. STARPU_CPU,
  55. .cpu_func = axpy_cpu,
  56. #ifdef STARPU_USE_CUDA
  57. .cuda_func = axpy_gpu,
  58. #endif
  59. .nbuffers = 2
  60. };
  61. int main(int argc, char **argv)
  62. {
  63. /* Initialize StarPU */
  64. starpu_init(NULL);
  65. starpu_helper_cublas_init();
  66. /* This is equivalent to
  67. vec_a = malloc(N*sizeof(TYPE));
  68. vec_b = malloc(N*sizeof(TYPE));
  69. */
  70. starpu_data_malloc_pinned_if_possible((void **)&vec_x, N*sizeof(TYPE));
  71. assert(vec_x);
  72. starpu_data_malloc_pinned_if_possible((void **)&vec_y, N*sizeof(TYPE));
  73. assert(vec_y);
  74. unsigned i;
  75. for (i = 0; i < N; i++)
  76. {
  77. vec_x[i] = 1.0f;//(TYPE)starpu_drand48();
  78. vec_y[i] = 4.0f;//(TYPE)starpu_drand48();
  79. }
  80. fprintf(stderr, "BEFORE x[0] = %2.2f\n", vec_x[0]);
  81. fprintf(stderr, "BEFORE y[0] = %2.2f\n", vec_y[0]);
  82. /* Declare the data to StarPU */
  83. starpu_vector_data_register(&handle_x, 0, (uintptr_t)vec_x, N, sizeof(TYPE));
  84. starpu_vector_data_register(&handle_y, 0, (uintptr_t)vec_y, N, sizeof(TYPE));
  85. /* Divide the vector into blocks */
  86. struct starpu_data_filter block_filter = {
  87. .filter_func = starpu_block_filter_func_vector,
  88. .nchildren = NBLOCKS,
  89. .get_nchildren = NULL,
  90. .get_child_ops = NULL
  91. };
  92. starpu_data_partition(handle_x, &block_filter);
  93. starpu_data_partition(handle_y, &block_filter);
  94. TYPE alpha = 3.41;
  95. struct timeval start;
  96. struct timeval end;
  97. gettimeofday(&start, NULL);
  98. unsigned b;
  99. for (b = 0; b < NBLOCKS; b++)
  100. {
  101. struct starpu_task *task = starpu_task_create();
  102. task->cl = &axpy_cl;
  103. task->cl_arg = &alpha;
  104. task->buffers[0].handle = starpu_data_get_sub_data(handle_x, 1, b);
  105. task->buffers[0].mode = STARPU_R;
  106. task->buffers[1].handle = starpu_data_get_sub_data(handle_y, 1, b);
  107. task->buffers[1].mode = STARPU_RW;
  108. starpu_task_submit(task);
  109. }
  110. starpu_task_wait_for_all();
  111. starpu_data_unpartition(handle_y, 0);
  112. starpu_data_unregister(handle_y);
  113. gettimeofday(&end, NULL);
  114. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 +
  115. (end.tv_usec - start.tv_usec));
  116. fprintf(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(TYPE)/timing);
  117. fprintf(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", vec_y[0], alpha);
  118. /* Stop StarPU */
  119. starpu_shutdown();
  120. return 0;
  121. }