file_generation.jl 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. # StarPU --- Runtime system for heterogeneous multicore architectures.
  2. #
  3. # Copyright (C) 2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. #
  5. # StarPU is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU Lesser General Public License as published by
  7. # the Free Software Foundation; either version 2.1 of the License, or (at
  8. # your option) any later version.
  9. #
  10. # StarPU is distributed in the hope that it will be useful, but
  11. # WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. #
  14. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. #
  16. const cpu_kernel_file_start = "#include <stdio.h>
  17. #include <stdint.h>
  18. #include <starpu.h>
  19. #include <math.h>
  20. #include \"blas.h\"
  21. static inline long long jlstarpu_max(long long a, long long b)
  22. {
  23. return (a > b) ? a : b;
  24. }
  25. static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
  26. {
  27. if (stop >= start){
  28. return jlstarpu_max(0, (stop - start + 1) / step);
  29. } else {
  30. return jlstarpu_max(0, (stop - start - 1) / step);
  31. }
  32. }
  33. "
  34. const cuda_kernel_file_start = "#include <stdio.h>
  35. #include <stdint.h>
  36. #include <starpu.h>
  37. #include <math.h>
  38. #include <starpu_cublas_v2.h>
  39. #define THREADS_PER_BLOCK 64
  40. __attribute__((unused)) static inline long long jlstarpu_max(long long a, long long b)
  41. {
  42. return (a > b) ? a : b;
  43. }
  44. __attribute__((unused)) static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
  45. {
  46. if (stop >= start){
  47. return jlstarpu_max(0, (stop - start + 1) / step);
  48. } else {
  49. return jlstarpu_max(0, (stop - start - 1) / step);
  50. }
  51. }
  52. __attribute__((unused)) __device__ static inline long long jlstarpu_max__device(long long a, long long b)
  53. {
  54. return (a > b) ? a : b;
  55. }
  56. __attribute__((unused)) __device__ static inline long long jlstarpu_interval_size__device(long long start, long long step, long long stop)
  57. {
  58. if (stop >= start){
  59. return jlstarpu_max__device(0, (stop - start + 1) / step);
  60. } else {
  61. return jlstarpu_max__device(0, (stop - start - 1) / step);
  62. }
  63. }
  64. "
  65. """
  66. Opens a new Cuda source file, where generated GPU kernels will be written
  67. """
  68. function starpu_new_cuda_kernel_file(file_name :: String)
  69. global generated_cuda_kernel_file_name = file_name
  70. kernel_file = open(file_name, "w")
  71. print(kernel_file, cuda_kernel_file_start)
  72. close(kernel_file)
  73. return nothing
  74. end
  75. export target
  76. macro target(x)
  77. targets = eval(x)
  78. return quote
  79. starpu_target=$targets
  80. global starpu_target
  81. end
  82. end
  83. """
  84. Executes @cuda_kernel and @cpu_kernel
  85. """
  86. macro codelet(x)
  87. parsed = starpu_parse(x)
  88. name=string(x.args[1].args[1].args[1]);
  89. cpu_name = name
  90. cuda_name = "CUDA_"*name
  91. dump(name)
  92. parse_scalar_parameters(parsed, name)
  93. c_struct_param_decl = generate_c_struct_param_declaration(name)
  94. cpu_expr = transform_to_cpu_kernel(parsed)
  95. if (starpu_target & STARPU_CUDA != 0)
  96. prekernel, kernel = transform_to_cuda_kernel(parsed)
  97. end
  98. generated_cpu_kernel_file_name=string("genc_",string(x.args[1].args[1].args[1]),".c")
  99. generated_cuda_kernel_file_name=string("gencuda_",string(x.args[1].args[1].args[1]),".cu")
  100. if (starpu_target & STARPU_CPU != 0)
  101. kernel_file = open(generated_cpu_kernel_file_name, "w")
  102. debug_print("generating ", generated_cpu_kernel_file_name)
  103. print(kernel_file, cpu_kernel_file_start)
  104. print(kernel_file, c_struct_param_decl)
  105. print(kernel_file, cpu_expr)
  106. close(kernel_file)
  107. CPU_CODELETS[name]=cpu_name
  108. end
  109. if (starpu_target & STARPU_CUDA!=0) && STARPU_USE_CUDA == 1
  110. kernel_file = open(generated_cuda_kernel_file_name, "w")
  111. debug_print("generating ", generated_cuda_kernel_file_name)
  112. print(kernel_file, cuda_kernel_file_start)
  113. if kernel != nothing
  114. print(kernel_file, "__global__ ", kernel)
  115. end
  116. print(kernel_file, c_struct_param_decl)
  117. print(kernel_file, "\nextern \"C\" ", prekernel)
  118. close(kernel_file)
  119. CUDA_CODELETS[name]=cuda_name
  120. end
  121. end
  122. function parse_scalar_parameters(expr :: StarpuExprFunction, codelet_name)
  123. scalar_parameters = []
  124. for i in (1 : length(expr.args))
  125. type = expr.args[i].typ
  126. if (type <: Number || type <: AbstractChar)
  127. push!(scalar_parameters, (expr.args[i].name, type))
  128. end
  129. end
  130. CODELETS_SCALARS[codelet_name] = scalar_parameters
  131. # declare structure carrying scalar parameters
  132. struct_params_name = Symbol("params_", rand_string())
  133. structure_decl_str = "mutable struct " * "$struct_params_name\n"
  134. for p in scalar_parameters
  135. structure_decl_str *= "$(p[1])::$(p[2])\n"
  136. end
  137. structure_decl_str *= "end"
  138. eval(Meta.parse(structure_decl_str))
  139. # add structure type to dictionnary
  140. add_to_dict_str = "starpu_type_traduction_dict[$struct_params_name] = \"struct $struct_params_name\""
  141. eval(Meta.parse(add_to_dict_str))
  142. # save structure name
  143. CODELETS_PARAMS_STRUCT[codelet_name] = struct_params_name
  144. end