gpu_mandelbrot.cu 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. #include <starpu.h>
  2. #include <stdint.h>
  3. #include <stdio.h>
  4. #include <math.h>
  5. struct Params
  6. {
  7. float cr;
  8. float ci;
  9. unsigned taskx;
  10. unsigned tasky;
  11. unsigned width;
  12. unsigned height;
  13. };
  14. __global__ void gpuMandelbrotKernel
  15. (
  16. uint32_t nxP, uint32_t nyP,
  17. uint32_t ldP,
  18. int * subP,
  19. struct Params params
  20. )
  21. {
  22. unsigned width = params.width;
  23. unsigned height = params.height;
  24. unsigned taskx = params.taskx;
  25. unsigned tasky = params.tasky;
  26. float centerr = params.cr;
  27. float centeri = params.ci;
  28. float zoom = width * 0.25296875;
  29. int maxiter = (width/2) * 0.049715909 * log10(zoom);
  30. float conv_lim = 2.0;
  31. uint32_t id;
  32. int n,i,j,x,y;
  33. float zr,zi,cr,ci;
  34. id = blockIdx.x * blockDim.x + threadIdx.x;
  35. i = id % nxP;
  36. j = id / nxP;
  37. if (j >= nyP){
  38. return;
  39. }
  40. x = i + taskx * nxP;
  41. y = j + tasky * nyP;
  42. zr = cr = centerr + (x - width/2.)/zoom;
  43. zi = ci = centeri + (y - height/2.)/zoom;
  44. float m = zr*zr + zi*zi;
  45. for (n = 0; n <= maxiter && m < conv_lim * conv_lim; n++) {
  46. float tmp = zr * zr - zi*zi + cr;
  47. zi = 2*zr*zi + ci;
  48. zr = tmp;
  49. m = zr*zr + zi*zi;
  50. }
  51. int color;
  52. if (n < maxiter)
  53. color = 255.*n/maxiter;
  54. else
  55. color = 0;
  56. subP[i + j*ldP] = color;
  57. }
  58. #define THREADS_PER_BLOCK 64
  59. extern "C" void gpu_mandelbrot(void *descr[], void *args)
  60. {
  61. int *d_subP;
  62. uint32_t nxP, nyP;
  63. uint32_t ldP;
  64. uint32_t nblocks;
  65. struct Params *params = (struct Params *) args;
  66. d_subP = (int *) STARPU_MATRIX_GET_PTR(descr[0]);
  67. nxP = STARPU_MATRIX_GET_NX(descr[0]);
  68. nyP = STARPU_MATRIX_GET_NY(descr[0]);
  69. ldP = STARPU_MATRIX_GET_LD(descr[0]);
  70. nblocks = (nxP * nyP + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK;
  71. gpuMandelbrotKernel <<< nblocks, THREADS_PER_BLOCK, 0, starpu_cuda_get_local_stream() >>> (nxP, nyP, ldP, d_subP, *params);
  72. cudaStreamSynchronize(starpu_cuda_get_local_stream());
  73. }