gpu_black_scholes.cu 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2019 Mael Keryell
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <stdio.h>
  17. #include <stdint.h>
  18. #include <math.h>
  19. #include <starpu.h>
  20. // __device__ inline double cndGPU(double d)
  21. // {
  22. // const double A1 = 0.31938153f;
  23. // const double A2 = -0.356563782f;
  24. // const double A3 = 1.781477937f;
  25. // const double A4 = -1.821255978f;
  26. // const double A5 = 1.330274429f;
  27. // const float RSQRT2PI = 0.39894228040143267793994605993438f;
  28. // double K = __fdividef(1.0f, (1.0f + 0.2316419f * fabsf(d)));
  29. // double cnd = RSQRT2PI * __expf(- 0.5f * d * d) * (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
  30. // if (d > 0)
  31. // cnd = 1.0f - cnd;
  32. // return cnd;
  33. // }
  34. __device__ inline double cndGPU(double d)
  35. {
  36. return (1.0 + erf(d/sqrt(2.0)))/2.0;
  37. }
  38. __global__ void gpuBlackScholesKernel(double *S, double *K, double *R, double *T,
  39. double *SIG, double *CRES, double *PRES,
  40. uint32_t nxS)
  41. {
  42. uint32_t i, id;
  43. id = blockIdx.x * blockDim.x + threadIdx.x;
  44. i = id % nxS;
  45. double sqrtT = __fdividef(1.0F, rsqrtf(T[i]));
  46. double d1 = (log(S[i] / K[i]) + (R[i] + SIG[i] * SIG[i] * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));
  47. double d2 = (log(S[i] / K[i]) + (R[i] - SIG[i] * SIG[i] * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));
  48. CRES[i] = S[i] * (normcdf(d1)) - K[i] * exp(-R[i] * T[i]) * normcdf(d2);
  49. PRES[i] = -S[i] * (normcdf(-d1)) + K[i] * exp(-R[i] * T[i]) * normcdf(-d2);
  50. }
  51. #define THREADS_PER_BLOCK 64
  52. extern "C" void gpu_black_scholes(void *descr[], void *args)
  53. {
  54. double *S, *K, *R, *T, *SIG, *CRES, *PRES;
  55. uint32_t nxS;
  56. uint32_t nblocks;
  57. S = (double *) STARPU_MATRIX_GET_PTR(descr[0]);
  58. K = (double *) STARPU_MATRIX_GET_PTR(descr[1]);
  59. R = (double *) STARPU_MATRIX_GET_PTR(descr[2]);
  60. T = (double *) STARPU_MATRIX_GET_PTR(descr[3]);
  61. SIG = (double *) STARPU_MATRIX_GET_PTR(descr[4]);
  62. CRES = (double *) STARPU_MATRIX_GET_PTR(descr[5]);
  63. PRES = (double *) STARPU_MATRIX_GET_PTR(descr[6]);
  64. nxS = STARPU_MATRIX_GET_NX(descr[0]);
  65. nblocks = (nxS + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
  66. gpuBlackScholesKernel
  67. <<< nblocks, THREADS_PER_BLOCK, 0, starpu_cuda_get_local_stream()
  68. >>> (S, K, R, T, SIG, CRES, PRES, nxS);
  69. cudaStreamSynchronize(starpu_cuda_get_local_stream());
  70. }