Browse Source

Changes for julia1.3+
Optimization of C and julia code for better comparison with native code

Denis Barthou 5 years ago
parent
commit
73fc4e1dc7
100 changed files with 3636 additions and 3985 deletions
  1. 4 0
      julia/StarPU.jl/Manifest.toml
  2. 7 0
      julia/StarPU.jl/Project.toml
  3. 2 0
      julia/StarPU.jl/REQUIRE
  4. 1230 0
      julia/StarPU.jl/src/StarPU.jl
  5. 261 0
      julia/StarPU.jl/src/compiler/c.jl
  6. 349 0
      julia/StarPU.jl/src/compiler/cuda.jl
  7. 0 0
      julia/StarPU.jl/src/compiler/expression_manipulation.jl
  8. 928 0
      julia/StarPU.jl/src/compiler/expressions.jl
  9. 132 0
      julia/StarPU.jl/src/compiler/file_generation.jl
  10. 13 0
      julia/StarPU.jl/src/compiler/include.jl
  11. 5 8
      julia/src/Compiler/parsing.jl
  12. 0 12
      julia/src/Compiler/utils.jl
  13. 1 7
      julia/src/Wrapper/C/jlstarpu.h
  14. 21 66
      julia/src/Wrapper/C/jlstarpu_data_handles.c
  15. 1 7
      julia/src/Wrapper/C/jlstarpu_simple_functions.c
  16. 2 7
      julia/src/Wrapper/C/jlstarpu_task.h
  17. 9 9
      julia/src/Wrapper/C/jlstarpu_task_submit.c
  18. 1 2
      julia/src/Wrapper/C/jlstarpu_utils.h
  19. 23 26
      julia/src/Wrapper/Julia/linked_list.jl
  20. 20 0
      julia/mult/README
  21. 55 0
      julia/mult/makefile
  22. 21 61
      julia/tst/mult.c
  23. 146 0
      julia/mult/mult.jl
  24. 4 0
      julia/mult/mult.plot
  25. 44 0
      julia/mult/mult_native.jl
  26. 11 0
      julia/mult/res/mult_cstarpu_gcc9_s72_2x2_b4x2.dat
  27. 29 0
      julia/mult/res/mult_gen_gcc9_1x4.dat
  28. 29 0
      julia/mult/res/mult_gen_gcc9_4x1.dat
  29. 7 0
      julia/mult/res/mult_gen_gcc9_s100_4x1.dat
  30. 17 0
      julia/mult/res/mult_gen_gcc9_s50_4x1.dat
  31. 4 0
      julia/mult/res/mult_gen_gcc9_s64_16x16_b4x2.dat
  32. 13 0
      julia/mult/res/mult_gen_gcc9_s64_4x4_b4x2.dat
  33. 7 0
      julia/mult/res/mult_gen_gcc9_s64_8x1_b4x2.dat
  34. 7 0
      julia/mult/res/mult_gen_gcc9_s64_8x8_b4x2.dat
  35. 11 0
      julia/mult/res/mult_gen_gcc9_s72_16x18_b4x2.dat
  36. 11 0
      julia/mult/res/mult_gen_gcc9_s72_16x8_b4x2.dat
  37. 11 0
      julia/mult/res/mult_gen_gcc9_s72_2x2.dat
  38. 11 0
      julia/mult/res/mult_gen_gcc9_s72_2x2_b4x2.dat
  39. 11 0
      julia/mult/res/mult_gen_gcc9_s72_2x2_b4x4.dat
  40. 5 0
      julia/mult/res/mult_gen_gcc9_s72_2x2_b8x2.dat
  41. 11 0
      julia/mult/res/mult_gen_gcc9_s72_4x1.dat
  42. 11 0
      julia/mult/res/mult_gen_gcc9_s72_4x4_b4x2.dat
  43. 11 0
      julia/mult/res/mult_gen_gcc9_s72_8x8_b4x2.dat
  44. 9 0
      julia/mult/res/mult_gen_gcc9_s80_4x1.dat
  45. 3 0
      julia/mult/res/mult_gen_icc_s72_2x1_b4x2.dat
  46. 11 0
      julia/mult/res/mult_gen_icc_s72_4x4_b4x2.dat
  47. 62 0
      julia/mult/res/mult_native.dat
  48. 11 0
      julia/mult/res/mult_nogen_gcc9_s72_2x2_b2x2.dat
  49. 11 0
      julia/mult/res/mult_nogen_gcc9_s72_2x2_b4x2.dat
  50. 11 0
      julia/mult/res/mult_nogen_icc_s72-36_2x2_b4x2.dat
  51. 11 0
      julia/mult/res/mult_nogen_icc_s72_2x2_b4x2.dat
  52. 11 0
      julia/mult/res/mult_nogen_icc_s72x2_2x2_b4x2.dat
  53. 0 57
      julia/src/Compiler/C/add_for_loop_declarations.jl
  54. 0 15
      julia/src/Compiler/C/create_cpu_kernel.jl
  55. 0 27
      julia/src/Compiler/C/flatten_blocks.jl
  56. 0 76
      julia/src/Compiler/C/substitute_args.jl
  57. 0 25
      julia/src/Compiler/C/substitute_func_calls.jl
  58. 0 52
      julia/src/Compiler/C/substitute_indexing.jl
  59. 0 179
      julia/src/Compiler/Cuda/create_cuda_kernel.jl
  60. 0 49
      julia/src/Compiler/Cuda/indep_for.jl
  61. 0 121
      julia/src/Compiler/Cuda/indep_for_kernel_ids.jl
  62. 0 60
      julia/src/Compiler/Expressions/affect.jl
  63. 0 68
      julia/src/Compiler/Expressions/block.jl
  64. 0 75
      julia/src/Compiler/Expressions/call.jl
  65. 0 60
      julia/src/Compiler/Expressions/cuda_call.jl
  66. 0 44
      julia/src/Compiler/Expressions/field.jl
  67. 0 100
      julia/src/Compiler/Expressions/for.jl
  68. 0 85
      julia/src/Compiler/Expressions/function.jl
  69. 0 94
      julia/src/Compiler/Expressions/if.jl
  70. 0 48
      julia/src/Compiler/Expressions/interval.jl
  71. 0 70
      julia/src/Compiler/Expressions/ref.jl
  72. 0 33
      julia/src/Compiler/Expressions/return.jl
  73. 0 63
      julia/src/Compiler/Expressions/simple_expressions.jl
  74. 0 109
      julia/src/Compiler/Expressions/typed.jl
  75. 0 53
      julia/src/Compiler/Expressions/while.jl
  76. 0 69
      julia/src/Compiler/Generate_files/c_files.jl
  77. 0 134
      julia/src/Compiler/Generate_files/cuda_files.jl
  78. 0 54
      julia/src/Compiler/Generate_files/so_files.jl
  79. 0 39
      julia/src/Compiler/include.jl
  80. 0 146
      julia/src/Wrapper/Julia/starpu_codelet.jl
  81. 0 234
      julia/src/Wrapper/Julia/starpu_data_handle.jl
  82. 0 49
      julia/src/Wrapper/Julia/starpu_define.jl
  83. 0 125
      julia/src/Wrapper/Julia/starpu_destructible.jl
  84. 0 20
      julia/src/Wrapper/Julia/starpu_include.jl
  85. 0 35
      julia/src/Wrapper/Julia/starpu_init_shutdown.jl
  86. 0 90
      julia/src/Wrapper/Julia/starpu_perfmodel.jl
  87. 0 28
      julia/src/Wrapper/Julia/starpu_simple_functions.jl
  88. 0 184
      julia/src/Wrapper/Julia/starpu_task.jl
  89. 0 72
      julia/src/Wrapper/Julia/starpu_task_submit.jl
  90. 0 35
      julia/src/Wrapper/Julia/static_structures.jl
  91. 0 68
      julia/tst/Makefile.mk
  92. 0 41
      julia/tst/README
  93. 0 241
      julia/tst/black_scholes/black_scholes.c
  94. 0 81
      julia/tst/black_scholes/black_scholes_def.jl
  95. 0 35
      julia/tst/black_scholes/black_scholes_generated.jl
  96. 0 170
      julia/tst/black_scholes/black_scholes_with_generated.c
  97. 0 54
      julia/tst/black_scholes/cpu_black_scholes.c
  98. 0 124
      julia/tst/black_scholes/cpu_cuda_black_scholes.jl
  99. 0 89
      julia/tst/black_scholes/gpu_black_scholes.cu
  100. 0 0
      julia/tst/cpu_cuda_mult.jl

+ 4 - 0
julia/StarPU.jl/Manifest.toml

@@ -0,0 +1,4 @@
+# This file is machine-generated - editing it directly is not advised
+
+[[Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

+ 7 - 0
julia/StarPU.jl/Project.toml

@@ -0,0 +1,7 @@
+name = "StarPU"
+uuid = "3e36cc6e-3f67-11e9-3531-2137bfe575e8"
+authors = ["barthou "]
+version = "0.1.0"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

+ 2 - 0
julia/StarPU.jl/REQUIRE

@@ -0,0 +1,2 @@
+julia 1.0
+Libdl

File diff suppressed because it is too large
+ 1230 - 0
julia/StarPU.jl/src/StarPU.jl


+ 261 - 0
julia/StarPU.jl/src/compiler/c.jl

@@ -0,0 +1,261 @@
+
+
+"""
+    Returns the list of instruction that will be added before for loop of shape
+        "for for_index_var in set ..."
+"""
+function interval_evaluation_declarations(set :: StarpuExprInterval, for_index_var :: Symbol)
+
+    decl_pattern = @parse € :: Int64
+    affect_pattern = @parse € :: Int64 = €
+    interv_size_affect_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
+
+    id = set.id
+
+    start_var = starpu_parse(Symbol(:start_, id))
+    start_decl = replace_pattern(affect_pattern, start_var, set.start)
+
+    index_var = starpu_parse(for_index_var)
+    index_decl = replace_pattern(decl_pattern, index_var)
+
+    if isa(set.step, StarpuExprValue)
+
+        stop_var = starpu_parse(Symbol(:stop_, id))
+        stop_decl = replace_pattern(affect_pattern, stop_var, set.stop)
+
+        return StarpuExpr[start_decl, stop_decl, index_decl]
+    end
+
+    step_var = starpu_parse(Symbol(:step_, id))
+    step_decl = replace_pattern(affect_pattern, step_var, set.step)
+
+    dim_var = starpu_parse(Symbol(:dim_, id))
+    dim_decl = replace_pattern(interv_size_affect_pattern, dim_var, start_var, step_var, set.stop)
+
+    iter_var = starpu_parse(Symbol(:iter_, id))
+    iter_decl = replace_pattern(decl_pattern, iter_var)
+
+
+    return StarpuExpr[start_decl, step_decl, dim_decl, iter_decl, index_decl]
+end
+
+
+function add_for_loop_declarations(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprFor)
+            return x
+        end
+
+        interval_decl = interval_evaluation_declarations(x.set, x.iter)
+
+        return StarpuExprFor(x.iter, x.set, x.body, x.is_independant, interval_decl)
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+
+
+function transform_to_cpu_kernel(expr :: StarpuExprFunction)
+
+    output = add_for_loop_declarations(expr)
+    output = substitute_args(output)
+    output = substitute_func_calls(output)
+    output = substitute_indexing(output)
+    output = flatten_blocks(output)
+
+    return output
+end
+
+
+
+function flatten_blocks(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprBlock)
+            return x
+        end
+
+        instrs = StarpuExpr[]
+
+        for sub_expr in x.exprs
+
+            if isa(sub_expr, StarpuExprBlock)
+                push!(instrs, sub_expr.exprs...)
+            else
+                push!(instrs, sub_expr)
+            end
+        end
+
+        return StarpuExprBlock(instrs)
+    end
+
+    return apply(func_to_run, expr)
+end
+
+
+function substitute_argument_usage(expr :: StarpuExpr, arg_index, buffer_name :: Symbol, arg_name :: Symbol, ptr_name :: Symbol)
+    function func_to_apply(x :: StarpuExpr)
+
+        if x == StarpuExprVar(arg_name)
+            return StarpuExprVar(ptr_name)
+        end
+
+        if !(isa(x, StarpuExprCall) && x.func in keys(func_substitution))
+            return x
+        end
+
+        if (length(x.args) != 1)
+            error("Invalid arity for function $(x.func)")
+        end
+
+        if (x.args[1] != StarpuExprVar(ptr_name))
+            return x
+        end
+
+        new_func = func_substitution[x.func]
+        new_arg = starpu_parse(:($buffer_name[$arg_index]))
+
+        return StarpuExprCall(new_func, [new_arg])
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+function substitute_args(expr :: StarpuExprFunction)
+
+    new_body = expr.body
+    func_id = rand_string()
+    buffer_arg_name = Symbol("buffers_", func_id)
+    cl_arg_name = Symbol("cl_arg_", func_id)
+    post = false
+    function_start_affectations = StarpuExpr[]
+
+    for i in (1 : length(expr.args))
+
+        var_id = rand_string()
+        ptr = Symbol(:ptr_, var_id)
+        var_name = ptr
+        
+        if (expr.args[i].typ <: Vector)
+            func_interface = :STARPU_VECTOR_GET_PTR
+        elseif (expr.args[i].typ <: Matrix)
+            func_interface = :STARPU_MATRIX_GET_PTR
+            ld_name = Symbol("ld_", var_id)
+            post_affect = starpu_parse( :($ld_name :: UInt32 = STARPU_MATRIX_GET_LD($buffer_arg_name[$i])) )
+            post=true
+            
+        elseif (expr.args[i].typ <: Float32)
+            func_interface = :STARPU_VARIABLE_GET_PTR
+            var_name = Symbol("scal_", var_id)
+            post_affect = starpu_parse( :($var_name :: Float32 = ($ptr[0])) )
+            post = true
+            
+        end
+        #else
+            #error("Task arguments must be either vector or matrix (got $(expr.args[i].typ))") #TODO : cl_args, variable ?
+        #end
+
+        type_in_arg = eltype(expr.args[i].typ)
+        new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$i])) )
+        push!(function_start_affectations, new_affect)
+        if (post)
+            push!(function_start_affectations, post_affect)
+        end
+        new_body = substitute_argument_usage(new_body, i, buffer_arg_name, expr.args[i].name, var_name)
+
+    end
+
+
+    new_args = [
+                    starpu_parse(:($buffer_arg_name :: Matrix{Nothing})),
+                    starpu_parse(:($cl_arg_name :: Vector{Nothing}))
+                ]
+    new_body = StarpuExprBlock([function_start_affectations..., new_body.exprs...])
+
+    return StarpuExprFunction(expr.ret_type, expr.func, new_args, new_body)
+end
+
+
+
+func_substitution = Dict(
+    :width => :STARPU_MATRIX_GET_NY,
+    :height => :STARPU_MATRIX_GET_NX,
+
+    :length => :STARPU_VECTOR_GET_NX
+)
+
+
+
+function substitute_func_calls(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprCall) || !(x.func in keys(func_substitution))
+            return x
+        end
+
+        return StarpuExprCall(func_substitution[x.func], x.args)
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+function substitute_indexing(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprRef)
+            return x
+        end
+
+        #if !isa(x.ref, StarpuExprVar)
+        #    error("Only variable indexing is allowed") #TODO allow more ?
+        #end
+
+
+        nb_indexes = length(x.indexes)
+
+        if (nb_indexes >= 3)
+            error("Indexing with more than 2 indexes is not allowed") # TODO : blocks
+        end
+
+        if (nb_indexes == 0)
+            return x
+
+        elseif nb_indexes == 1
+            new_index = StarpuExprCall(:-, [x.indexes[1], StarpuExprValue(1)])  #TODO : add field "offset" from STARPU_VECTOR_GET interface
+                                                                            #TODO : detect when it is a matrix used with one index only
+            return StarpuExprRef(x.ref, [new_index])
+
+        elseif nb_indexes == 2
+
+            var_name = String(x.ref.name)
+
+            if !occursin(r"ptr_", var_name) || isempty(var_name[5:end])
+                error("Invalid variable ($var_name) for multiple index dereferencing")
+            end
+
+            var_id = var_name[5:end]
+            ld_name = Symbol("ld_", var_id) # TODO : check if this variable is legit (var_name must refer to a matrix)
+
+            new_index = x.indexes[2]
+            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
+            new_index = StarpuExprCall(:(*), [new_index, StarpuExprVar(ld_name)])
+            new_index = StarpuExprCall(:(+), [x.indexes[1], new_index])
+            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
+
+            return StarpuExprRef(x.ref, [new_index])
+        end
+    end
+
+    return apply(func_to_run, expr)
+end

+ 349 - 0
julia/StarPU.jl/src/compiler/cuda.jl

@@ -0,0 +1,349 @@
+
+
+function is_indep_for_expr(x :: StarpuExpr)
+    return isa(x, StarpuExprFor) && x.is_independant
+end
+
+
+function extract_init_indep_finish(expr :: StarpuExpr) # TODO : it is not a correct extraction (example : if (cond) {@indep for ...} else {return} would not work)
+                                                            # better use apply() (NOTE :assert_no_indep_for already exists) to find recursively every for loops
+    init = StarpuExpr[]
+    finish = StarpuExpr[]
+
+    if is_indep_for_expr(expr)
+        return init, StarpuIndepFor(expr), finish
+    end
+
+    if !isa(expr, StarpuExprBlock)
+        return [expr], nothing, finish
+    end
+
+    for i in (1 : length(expr.exprs))
+
+        if !is_indep_for_expr(expr.exprs[i])
+            continue
+        end
+
+        init = expr.exprs[1 : i-1]
+        indep = StarpuIndepFor(expr.exprs[i])
+        finish = expr.exprs[i+1 : end]
+
+        if any(is_indep_for_expr, finish)
+            error("Sequence of several independant loops is not allowed") #same it may be tricked by a Block(Indep_for(...))
+        end
+
+        return init, indep, finish
+    end
+
+    return expr.exprs, nothing, finish
+end
+
+
+
+
+function analyse_variable_declarations(expr :: StarpuExpr, already_defined :: Vector{StarpuExprTypedVar} = StarpuExprTypedVar[])
+
+    undefined_variables = Symbol[]
+    defined_variable_names = map((x -> x.name), already_defined)
+    defined_variable_types = map((x -> x.typ), already_defined)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if isa(x, StarpuExprFunction)
+            error("No function declaration allowed in this section")
+        end
+
+        if isa(x, StarpuExprVar) || isa(x, StarpuExprTypedVar)
+
+            if !(x.name in defined_variable_names) && !(x.name in undefined_variables)
+                push!(undefined_variables, x.name)
+            end
+
+            return x
+        end
+
+        if isa(x, StarpuExprAffect) || isa(x, StarpuExprFor)
+
+            if isa(x, StarpuExprAffect)
+
+                var = x.var
+
+                if !isa(var, StarpuExprTypedVar)
+                    return x
+                end
+
+                name = var.name
+                typ = var.typ
+
+            else
+                name = x.iter
+                typ = Int64
+            end
+
+            if name in defined_variable_names
+                error("Multiple definition of variable $name")
+            end
+
+            filter!((sym -> sym != name), undefined_variables)
+            push!(defined_variable_names, name)
+            push!(defined_variable_types, typ)
+
+            return x
+        end
+
+        return x
+    end
+
+    apply(func_to_apply, expr)
+    defined_variable = map(StarpuExprTypedVar, defined_variable_names, defined_variable_types)
+
+    return defined_variable, undefined_variables
+end
+
+
+
+function find_variable(name :: Symbol, vars :: Vector{StarpuExprTypedVar})
+
+    for x in vars
+        if x.name == name
+            return x
+        end
+    end
+
+    return nothing
+end
+
+
+
+function add_device_to_interval_call(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if isa(x, StarpuExprCall) && x.func == :jlstarpu_interval_size
+            return StarpuExprCall(:jlstarpu_interval_size__device, x.args)
+        end
+
+        return x
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+function transform_to_cuda_kernel(func :: StarpuExprFunction)
+
+    cpu_func = transform_to_cpu_kernel(func)
+
+    init, indep, finish = extract_init_indep_finish(cpu_func.body)
+
+    if indep == nothing
+        error("No independant for loop has been found") # TODO can fail because extraction is not correct yet
+    end
+
+    prekernel_instr, kernel_args, kernel_instr = analyse_sets(indep)
+
+    kernel_call = StarpuExprCudaCall(:cudaKernel, (@parse nblocks), (@parse THREADS_PER_BLOCK), StarpuExpr[])
+    prekernel_instr = vcat(init, prekernel_instr)
+    kernel_instr = vcat(kernel_instr, indep.body)
+
+    indep_for_def, indep_for_undef = analyse_variable_declarations(StarpuExprBlock(kernel_instr), kernel_args)
+    prekernel_def, prekernel_undef = analyse_variable_declarations(StarpuExprBlock(prekernel_instr), cpu_func.args)
+
+    for undef_var in indep_for_undef
+
+        found_var = find_variable(undef_var, prekernel_def)
+
+        if found_var == nothing # TODO : error then ?
+            continue
+        end
+
+        push!(kernel_args, found_var)
+    end
+
+    call_args = map((x -> StarpuExprVar(x.name)), kernel_args)
+    kernelname=Symbol("KERNEL_",func.func);
+    cuda_call = StarpuExprCudaCall(kernelname, (@parse nblocks), (@parse THREADS_PER_BLOCK), call_args)
+    push!(prekernel_instr, cuda_call)
+    push!(prekernel_instr, @parse cudaStreamSynchronize(starpu_cuda_get_local_stream()))
+    prekernel_instr = vcat(prekernel_instr, finish)
+
+    prekernel_name = Symbol("CUDA_", func.func)
+    prekernel = StarpuExprFunction(Nothing, prekernel_name, cpu_func.args, StarpuExprBlock(prekernel_instr))
+    prekernel = flatten_blocks(prekernel)
+
+    kernel = StarpuExprFunction(Nothing, kernelname, kernel_args, StarpuExprBlock(kernel_instr))
+    kernel = add_device_to_interval_call(kernel)
+    kernel = flatten_blocks(kernel)
+    
+    return prekernel, kernel
+end
+
+
+struct StarpuIndepFor
+
+    iters :: Vector{Symbol}
+    sets :: Vector{StarpuExprInterval}
+
+    body :: StarpuExpr
+end
+
+
+function assert_no_indep_for(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+        if (isa(x, StarpuExprFor) && x.is_independant)
+            error("Invalid usage of intricated @indep for loops")
+        end
+
+        return x
+    end
+
+    return apply(func_to_run, expr)
+end
+
+
+function StarpuIndepFor(expr :: StarpuExprFor)
+
+    if !expr.is_independant
+        error("For expression must be prefixed by @indep")
+    end
+
+    iters = []
+    sets = []
+    for_loop = expr
+
+    while isa(for_loop, StarpuExprFor) && for_loop.is_independant
+
+        push!(iters, for_loop.iter)
+        push!(sets, for_loop.set)
+        for_loop = for_loop.body
+
+        while (isa(for_loop, StarpuExprBlock) && length(for_loop.exprs) == 1)
+            for_loop = for_loop.exprs[1]
+        end
+    end
+
+    return StarpuIndepFor(iters, sets, assert_no_indep_for(for_loop))
+end
+
+
+function translate_index_code(dims :: Vector{StarpuExprVar})
+
+    ndims = length(dims)
+
+    if ndims == 0
+        error("No dimension specified")
+    end
+
+    prod = StarpuExprValue(1)
+    output = StarpuExpr[]
+    reversed_dim = reverse(dims)
+    thread_index_patern = @parse € :: Int64 = (€ / €) % €
+    thread_id = @parse THREAD_ID
+
+    for i in (1 : ndims)
+        index_lvalue = StarpuExprVar(Symbol(:kernel_ids__index_, ndims - i + 1))
+        expr = replace_pattern(thread_index_patern, index_lvalue, thread_id, prod, reversed_dim[i])
+        push!(output, expr)
+
+        prod = StarpuExprCall(:(*), [prod, reversed_dim[i]])
+    end
+
+    thread_id_pattern = @parse begin
+
+        € :: Int64 = blockIdx.x * blockDim.x + threadIdx.x
+
+        if (€ >= €)
+            return
+        end
+    end
+
+    bound_verif = replace_pattern(thread_id_pattern, thread_id, thread_id, prod)
+    push!(output, bound_verif)
+
+    return reverse(output)
+end
+
+
+
+
+
+
+
+function kernel_index_declarations(ind_for :: StarpuIndepFor)
+
+    pre_kernel_instr = StarpuExpr[]
+    kernel_args = StarpuExprTypedVar[]
+    kernel_instr = StarpuExpr[]
+
+    decl_pattern = @parse € :: Int64 = €
+    interv_size_decl_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
+    iter_pattern = @parse € :: Int64 = € + € * €
+
+    dims = StarpuExprVar[]
+    ker_instr_to_add_later_on = StarpuExpr[]
+
+    for k in (1 : length(ind_for.sets))
+
+        set = ind_for.sets[k]
+
+        start_var = starpu_parse(Symbol(:kernel_ids__start_, k))
+        start_decl = replace_pattern(decl_pattern, start_var, set.start)
+
+        step_var = starpu_parse(Symbol(:kernel_ids__step_, k))
+        step_decl = replace_pattern(decl_pattern, step_var, set.step)
+
+        dim_var = starpu_parse(Symbol(:kernel_ids__dim_, k))
+        dim_decl = replace_pattern(interv_size_decl_pattern, dim_var, start_var, step_var, set.stop)
+
+        push!(dims, dim_var)
+
+        push!(pre_kernel_instr, start_decl, step_decl, dim_decl)
+        push!(kernel_args, StarpuExprTypedVar(start_var.name, Int64))
+        push!(kernel_args, StarpuExprTypedVar(step_var.name, Int64))
+        push!(kernel_args, StarpuExprTypedVar(dim_var.name, Int64))
+
+        iter_var = starpu_parse(ind_for.iters[k])
+        index_var = starpu_parse(Symbol(:kernel_ids__index_, k))
+        iter_decl = replace_pattern(iter_pattern, iter_var, start_var, index_var, step_var)
+
+        push!(ker_instr_to_add_later_on, iter_decl)
+    end
+
+
+    return dims, ker_instr_to_add_later_on, pre_kernel_instr , kernel_args, kernel_instr
+end
+
+
+
+function analyse_sets(ind_for :: StarpuIndepFor)
+
+
+    decl_pattern = @parse € :: Int64 = €
+    nblocks_decl_pattern = @parse € :: Int64 = (€ + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK
+
+    dims, ker_instr_to_add, pre_kernel_instr, kernel_args, kernel_instr  = kernel_index_declarations(ind_for)
+
+    dim_prod = @parse 1
+
+    for d in dims
+        dim_prod = StarpuExprCall(:(*), [dim_prod, d])
+    end
+
+    nthreads_var = @parse nthreads
+    nthreads_decl = replace_pattern(decl_pattern, nthreads_var, dim_prod)
+    push!(pre_kernel_instr, nthreads_decl)
+
+    nblocks_var = @parse nblocks
+    nblocks_decl = replace_pattern(nblocks_decl_pattern, nblocks_var, nthreads_var)
+    push!(pre_kernel_instr, nblocks_decl)
+
+
+    index_decomposition = translate_index_code(dims)
+
+    push!(kernel_instr, index_decomposition...)
+    push!(kernel_instr, ker_instr_to_add...)
+
+    return pre_kernel_instr, kernel_args, kernel_instr
+end

julia/src/Compiler/expression_manipulation.jl → julia/StarPU.jl/src/compiler/expression_manipulation.jl


+ 928 - 0
julia/StarPU.jl/src/compiler/expressions.jl

@@ -0,0 +1,928 @@
+
+#======================================================
+                AFFECTATION
+======================================================#
+abstract type StarpuExpr end
+abstract type StarpuExprTyped <: StarpuExpr end
+
+
+struct StarpuExprTypedVar <: StarpuExprTyped
+    name :: Symbol
+    typ :: Type
+end
+
+struct StarpuExprTypedExpr <: StarpuExprTyped # TODO : remove typed expression ?
+    expr :: StarpuExpr
+    typ :: Type
+end
+
+struct StarpuExprAffect <: StarpuExpr
+    var :: StarpuExpr
+    expr :: StarpuExpr
+end
+
+struct StarpuExprBlock <: StarpuExpr
+    exprs :: Vector{StarpuExpr}
+end
+
+struct StarpuExprCall <: StarpuExpr
+    func :: Symbol
+    args :: Vector{StarpuExpr}
+end
+struct StarpuExprCudaCall <: StarpuExpr
+
+    ker_name :: Symbol
+
+    nblocks :: StarpuExpr
+    threads_per_block :: StarpuExpr
+
+    args :: Vector{StarpuExpr}
+
+end
+struct StarpuExprField <: StarpuExpr
+
+    left :: StarpuExpr
+    field :: Symbol
+
+    is_an_arrow :: Bool
+end
+struct StarpuExprInterval <: StarpuExpr
+    start :: StarpuExpr
+    step :: StarpuExpr
+    stop :: StarpuExpr
+
+    id :: String
+
+    function StarpuExprInterval(start :: StarpuExpr, step :: StarpuExpr, stop :: StarpuExpr ; id :: String = rand_string())
+        return new(start, step, stop, id)
+    end
+
+end
+struct StarpuExprFor <: StarpuExpr
+
+    iter :: Symbol
+    set:: StarpuExprInterval
+    body :: StarpuExpr
+
+    is_independant :: Bool
+    set_declarations :: Vector{StarpuExpr}
+
+end
+struct StarpuExprFunction <: StarpuExpr
+    ret_type :: Type
+    func :: Symbol
+    args :: Vector{StarpuExprTypedVar}
+    body :: StarpuExpr
+end
+struct StarpuExprIf <: StarpuExpr
+    cond :: StarpuExpr
+    then_statement :: StarpuExpr
+end
+
+
+struct StarpuExprIfElse <: StarpuExpr
+    cond :: StarpuExpr
+    then_statement :: StarpuExpr
+    else_statement :: StarpuExpr
+end
+
+struct StarpuExprRef <: StarpuExpr
+    ref :: StarpuExpr
+    indexes :: Vector{StarpuExpr}
+end
+struct StarpuExprReturn <: StarpuExpr
+    value :: StarpuExpr
+end
+struct StarpuExprVar <: StarpuExpr
+    name :: Symbol
+end
+struct StarpuExprInvalid <: StarpuExpr
+end
+
+struct StarpuExprValue <: StarpuExpr
+    value :: Any
+end
+
+struct StarpuExprWhile <: StarpuExpr
+    cond :: StarpuExpr
+    body :: StarpuExpr
+end
+
+
+function starpu_parse_affect(x :: Expr)
+
+    if (x.head != :(=))
+        error("Invalid \"affectation\" expression")
+    end
+
+    var = starpu_parse(x.args[1])
+    expr = starpu_parse(x.args[2])
+
+    return StarpuExprAffect(var, expr)
+end
+
+
+function equals(x :: StarpuExprAffect, y :: StarpuExpr)
+
+    if typeof(y) != StarpuExprAffect
+        return false
+    end
+
+    return equals(x.var, y.var) && equals(x.expr, y.expr)
+end
+
+
+function print(io :: IO, x :: StarpuExprAffect ; indent = 0, restrict = false)
+
+    print(io, x.var, indent = indent)
+    print(io, " = ")
+
+    need_to_transtyp = isa(x.var, StarpuExprTypedVar) # transtyping to avoid warning (or errors for cuda) during compilation time
+
+    if need_to_transtyp
+        print(io, "(", starpu_type_traduction(x.var.typ), ") (")
+    end
+
+    print(io, x.expr, indent = indent)
+
+    if need_to_transtyp
+        print(io, ")")
+    end
+
+end
+
+function apply(func :: Function, expr :: StarpuExprAffect)
+
+    var = apply(func, expr.var)
+    new_expr = apply(func, expr.expr)
+
+    return func(StarpuExprAffect(var, new_expr))
+end
+
+#======================================================
+                BLOCK
+(series of instruction, not C variable scoping block)
+======================================================#
+
+
+
+
+function is_unwanted(x :: Symbol)
+    return false
+end
+
+function is_unwanted(x :: LineNumberNode)
+    return true
+end
+
+function is_unwanted(x :: Expr)
+    return false
+end
+
+function starpu_parse_block(x :: Expr)
+    if (x.head != :block)
+        error("Invalid \"block\" expression")
+    end    
+    exprs = map(starpu_parse, filter(!is_unwanted, x.args))
+
+    return StarpuExprBlock(exprs)
+end
+
+
+function print(io :: IO, x :: StarpuExprBlock ; indent = 0, restrict=false)
+    for i in (1 : length(x.exprs))
+        print(io, x.exprs[i], indent = indent)
+        print(io, ";")
+        if (i != length(x.exprs))
+            print_newline(io, indent)
+        end
+    end
+end
+
+
+
+
+function apply(func :: Function, expr :: StarpuExprBlock)
+
+    return func(StarpuExprBlock(map((x -> apply(func, x)), expr.exprs)))
+end
+
+#======================================================
+                FUNCTION CALL
+======================================================#
+
+
+
+
+function starpu_parse_call(x :: Expr)
+
+    if (x.head != :call)
+        error("Invalid \"call\" expression")
+    end
+
+    func = starpu_parse(x.args[1])
+    if (x.args[1] == Symbol(":"))
+        return starpu_parse_interval(x)
+    end
+    if (!isa(func, StarpuExprVar))
+        error("Invalid \"call\" expression : function must be a variable")
+    end
+
+    args = map(starpu_parse, x.args[2:end])
+
+    return StarpuExprCall(func.name, args)
+end
+
+
+starpu_infix_operators = (:(+), :(*), :(-), :(/), :(<), :(>), :(<=), :(>=), :(%))
+
+
+function print_prefix(io :: IO, x :: StarpuExprCall ; indent = 0, restrict=false)
+
+    print(io, x.func, "(")
+
+    for i in (1 : length(x.args))
+        if (i != 1)
+            print(io, ", ")
+        end
+        print(io, x.args[i], indent = indent)
+    end
+
+    print(io, ")")
+end
+
+
+function print_infix(io :: IO, x :: StarpuExprCall ; indent = 0,restrict=false)
+    for i in (1 : length(x.args))
+        if (i != 1)
+            print(io, " ", x.func, " ")
+        end
+        print(io, "(")
+        print(io, x.args[i], indent = indent)
+        print(io, ")")
+    end
+end
+
+function print(io :: IO, x :: StarpuExprCall ; indent = 0,restrict=false)
+
+    if (length(x.args) >= 2 && x.func in starpu_infix_operators)
+        print_infix(io, x, indent = indent)
+    else
+        print_prefix(io, x, indent = indent)
+    end
+end
+
+
+
+
+function apply(func :: Function, expr :: StarpuExprCall)
+
+    return func(StarpuExprCall(expr.func, map((x -> apply(func, x)), expr.args)))
+end
+
+
+#======================================================
+                CUDA KERNEL CALL
+======================================================#
+
+
+
+
+
+function print(io :: IO, expr :: StarpuExprCudaCall ; indent = 0,restrict=false)
+
+    print_newline(io, indent)
+    print(io, expr.ker_name)
+    print_newline(io, indent + starpu_indent_size)
+    print(io, "<<< ")
+    print(io, expr.nblocks, indent = indent + 2 * starpu_indent_size)
+    print(io, ", ")
+    print(io, expr.threads_per_block, indent = indent + 2 * starpu_indent_size)
+    print(io, ", 0, starpu_cuda_get_local_stream()")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, ">>> (")
+
+    for i in (1 : length(expr.args))
+
+        if (i != 1)
+            print(io, ", ")
+            if (i % 4 == 1)
+                print_newline(io, indent + 2 * starpu_indent_size + 1)
+            end
+        end
+
+        print(io, expr.args[i], indent = indent + 2 * starpu_indent_size)
+
+    end
+
+    print(io, ");")
+    print_newline(io, indent)
+
+end
+
+
+function apply(func :: Function, expr :: StarpuExprCudaCall)
+
+    nblocks = func(expr.nblocks)
+    threads_per_block = func(expr.threads_per_block)
+    args = map((x -> apply(func, x)), expr.args)
+
+    return StarpuExprCudaCall(expr.ker_name, nblocks, threads_per_block, args)
+end
+
+
+#======================================================
+                STRUCTURE FIELDS
+======================================================#
+
+
+
+
+
+function starpu_parse_field(x :: Expr)
+
+    if x.head != :(.) || length(x.args) != 2
+        error("Invalid parsing of dot expression")
+    end
+
+    left = starpu_parse(x.args[1])
+
+    if (!isa(x.args[2], QuoteNode) || !isa(x.args[2].value, Symbol))
+        error("Invalid parsing of dot expression")
+    end
+
+    return StarpuExprField(left, x.args[2].value, false)
+end
+
+
+function print(io :: IO, x :: StarpuExprField ; indent = 0,restrict=false)
+    print(io, "(")
+    print(io, x.left, indent = indent)
+    print(io, ")", x.is_an_arrow ? "->" : '.', x.field)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprField)
+    return func(StarpuExprField(func(expr.left), expr.field, expr.is_an_arrow))
+end
+
+#======================================================
+                FOR LOOPS
+======================================================#
+
+
+
+
+
+function starpu_parse_for(x :: Expr; is_independant = false)
+
+    if (x.head != :for)
+        error("Invalid \"for\" expression")
+    end
+    affect = x.args[1]
+
+    if (affect.head != :(=))
+        error("Invalid \"for\" iterator affectation")
+    end
+
+    iter = starpu_parse(affect.args[1])
+
+    if (!isa(iter, StarpuExprVar))
+        error("Invalid \"for\" iterator")
+    end
+
+    set = starpu_parse(affect.args[2])
+    if (!isa(set, StarpuExprInterval))
+        error("Set of values in \"for\" loop must be an interval")
+    end
+
+    body = starpu_parse(x.args[2])
+
+    return StarpuExprFor(iter.name, set, body, is_independant, StarpuExpr[])
+end
+
+
+
+
+
+function print(io :: IO, x :: StarpuExprFor ; indent = 0,restrict=false)
+
+    print_newline(io, indent)
+    print(io, StarpuExprBlock(x.set_declarations), indent = indent)
+
+    id = x.set.id
+
+    start = "start_" * id
+    stop = "stop_" * id
+    step = "step_" * id
+    dim = "dim_" * id
+    iter = "iter_" * id
+
+    print_newline(io, indent, 2)
+
+    if isa(x.set.step, StarpuExprValue)
+        print(io, "for ($(x.iter) = $start ; ")
+        comparison_op = (x.set.step.value >= 0) ? "<=" : ">="
+        print(io, "$(x.iter) $comparison_op $stop ; ")
+        print(io, "$(x.iter) += $(x.set.step.value))")
+
+    else
+        print(io, "for ($iter = 0, $(x.iter) = $start ; ")
+        print(io, "$iter < $dim ; ")
+        print(io, "$iter += 1, $(x.iter) += $step)")
+
+    end
+
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprFor)
+
+    set_declarations = map( (x -> apply(func, x)), expr.set_declarations)
+    set = apply(func, expr.set)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprFor(expr.iter, set, body, expr.is_independant, set_declarations))
+end
+
+
+#======================================================
+                FUNCTION DECLARATION
+======================================================#
+
+
+
+
+function starpu_parse_function(x :: Expr)
+
+    if (x.head != :function)
+        error("Invalid \"function\" expression")
+    end
+
+    typed_decl = starpu_parse(x.args[1])
+
+    if (!isa(typed_decl, StarpuExprTypedExpr))
+        error("Invalid \"function\" prototype : a return type must me explicited")
+    end
+
+    prototype = typed_decl.expr
+
+    if (!isa(prototype, StarpuExprCall))
+        error("Invalid \"function\" prototype")
+    end
+
+    arg_list = StarpuExprTypedVar[]
+
+    for type_arg in prototype.args
+        if (!isa(type_arg, StarpuExprTypedVar))
+            error("Invalid \"function\" argument list")
+        end
+        push!(arg_list, type_arg)
+    end
+
+    body = starpu_parse(x.args[2])
+    return StarpuExprFunction(typed_decl.typ, prototype.func, arg_list, body)
+end
+
+
+
+function print(io :: IO, x :: StarpuExprFunction ; indent = 0,restrict=false)
+
+    print(io, starpu_type_traduction(x.ret_type), " ")
+    print(io, x.func, '(')
+
+    for i in (1 : length(x.args))
+
+        if (i != 1)
+            print(io, ", ")
+            if (i % 4 == 1)
+                print_newline(io, indent + starpu_indent_size + length(String(x.func)) + 13)
+            end
+        end
+       print(io, x.args[i], indent = indent + starpu_indent_size, restrict = true)
+    end
+
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}\n\n")
+    print_newline(io, indent)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprFunction)
+
+    args = map((x -> apply(func, x)), expr.args)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprFunction(expr.ret_type, expr.func, args, body))
+end
+
+
+#======================================================
+                IF STATEMENT
+======================================================#
+
+
+
+
+
+function starpu_parse_if(x :: Expr)
+
+    if (x.head != :if)
+        error("Invalid \"if\" expression")
+    end
+
+    len = length(x.args)
+
+    if (len < 2)
+        error("Invalid \"if\" statement")
+    end
+
+    cond = starpu_parse(x.args[1])
+    then_statement = starpu_parse(x.args[2])
+
+    if (len == 2)
+        return StarpuExprIf(cond, then_statement)
+    end
+
+    else_statement = starpu_parse(x.args[3])
+
+    return StarpuExprIfElse(cond, then_statement, else_statement)
+end
+
+
+function print(io :: IO, x :: Union{StarpuExprIf, StarpuExprIfElse}; indent = 0,restrict=false)
+
+    print_newline(io, indent)
+    print(io, "if (")
+    print(io, x.cond, indent = indent + starpu_indent_size)
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.then_statement, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+
+    if (!isa(x, StarpuExprIfElse))
+        return
+    end
+
+    print(io, " else")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.else_statement, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprIf)
+
+    cond = apply(func, expr.cond)
+    then_statement = apply(func, expr.then_statement)
+
+    return func(StarpuExprIf(cond, then_statement))
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprIfElse)
+
+    cond = apply(func, expr.cond)
+    then_statement = apply(func, expr.then_statement)
+    else_statement = apply(func, expr.else_statement)
+
+    return func(StarpuExprIfElse(cond, then_statement, else_statement))
+end
+
+#======================================================
+                INTERVALS
+======================================================#
+
+
+
+
+function starpu_parse_interval(x :: Expr)
+
+    if (x.head != :(call))
+        error("Invalid \"interval\" expression")
+    end
+    start = starpu_parse(x.args[2])
+    steop = starpu_parse(x.args[3])
+
+    if (length(x.args) == 3)
+        return StarpuExprInterval(start, StarpuExprValue(1), steop)
+    end
+
+    stop = starpu_parse(x.args[4])
+
+    return StarpuExprInterval(start, steop, stop)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprInterval)
+
+    start = apply(func, expr.start)
+    step = apply(func, expr.step)
+    stop = apply(func, expr.stop)
+
+    return func(StarpuExprInterval(start, step, stop, id = expr.id))
+end
+
+#======================================================
+                ARRAYS AND REFERENCES
+======================================================#
+
+
+
+
+function starpu_parse_ref(x :: Expr)
+
+    if (x.head != :ref)
+        error("Invalid \"reference\" expression")
+    end
+
+    ref = starpu_parse(x.args[1])
+    indexes = map(starpu_parse, x.args[2:end])
+
+    #=
+    StarpuExpr[]
+
+    for i in (2 : length(x.args))
+        push!(indexes, starpu_parse(x.args[i]))
+    end=#
+
+    return StarpuExprRef(ref, indexes)
+end
+
+
+
+function equals(x :: StarpuExprRef, y :: StarpuExpr)
+
+    if typeof(y) != StarpuExprRef
+        return false
+    end
+
+    if !equals(x.ref, y.ref) || length(x.indexes) != length(y.indexes)
+        return false
+    end
+
+    return all(map(equals, x.indexes, y.indexes))
+end
+
+
+
+
+function print(io :: IO, x :: StarpuExprRef ; indent = 0,restrict=false)
+
+    print(io, x.ref, indent = indent)
+
+    for i in (1 : length(x.indexes))
+        print(io, "[")
+        print(io, x.indexes[i], indent = indent)
+        print(io, "]")
+    end
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprRef)
+
+    ref = apply(func, expr.ref)
+    indexes = map((x -> apply(func, x)), expr.indexes)
+
+    return func(StarpuExprRef(ref, indexes))
+end
+
+#======================================================
+                RETURN EXPRESSION
+======================================================#
+
+
+
+function starpu_parse_return(x :: Expr)
+    if (x.head != :return)
+        error("Invalid \"return\" expression")
+    end
+
+    value = starpu_parse(x.args[1])
+    # Remove type associated to a single, for a return
+    # allows matching with ExprVar
+    if (isa(value, StarpuExprTypedVar))
+        value = StarpuExprVar(value.name)
+    end
+
+    return StarpuExprReturn(value)
+end
+
+function print(io :: IO, x :: StarpuExprReturn ; indent = 0,restrict=false)
+    print(io, "return ")
+    print(io, x.value, indent = indent)
+end
+
+function apply(func :: Function, expr :: StarpuExprReturn)
+
+    return func(StarpuExprReturn(apply(func, expr.value)))
+end
+
+function apply(func :: Function, expr :: StarpuExpr)
+    return func(expr)
+end
+
+print(io :: IO, x :: StarpuExprVar ; indent = 0) = print(io, x.name)
+
+function print(io :: IO, x :: StarpuExprValue ; indent = 0,restrict=false)
+
+    value = x.value
+
+    if value == nothing
+        return
+    end
+
+    if isa(value, AbstractString)
+        print(io, '"', value, '"')
+        return
+    end
+
+    if isa(value, Char)
+        print(io, '\'', value, '\'')
+        return
+    end
+
+    print(io, value)
+end
+
+
+
+
+
+print(io :: IO, x :: StarpuExprInvalid ; indent = 0) = print(io, "INVALID")
+
+
+
+function starpu_parse(raw_value :: Any)
+    return StarpuExprValue(raw_value)
+end
+
+function starpu_parse(sym :: Symbol)
+    return StarpuExprVar(sym)
+end
+
+#======================================================
+                TYPED EXPRESSION
+======================================================#
+
+
+
+function starpu_parse_typed(x :: Expr)
+
+    if (x.head != :(::))
+        error("Invalid type assigned expression")
+    end
+
+    expr = starpu_parse(x.args[1])
+    typ = nothing
+
+    try
+        typ = eval(x.args[2]) :: Type
+    catch
+        print(x.args[2])
+        error("Invalid type in type assigned expression")
+    end
+
+    if (isa(expr, StarpuExprVar))
+        return StarpuExprTypedVar(expr.name, typ)
+    end
+
+    return StarpuExprTypedExpr(expr, typ)
+end
+
+
+
+
+
+starpu_type_traduction_dict = Dict(
+    Int32 => "int32_t",
+    UInt32 => "uint32_t",
+    Float32 => "float",
+    Int64 => "int64_t",
+    UInt64 => "uint64_t",
+    Float64 => "double",
+    Nothing => "void"
+)
+
+
+
+function starpu_type_traduction(x)
+    if x <: Array
+        return starpu_type_traduction_array(x)
+    end
+
+    if x <: Ptr
+        return starpu_type_traduction(eltype(x)) * "*"
+    end
+
+    return starpu_type_traduction_dict[x]
+
+end
+
+function starpu_type_traduction_array(x :: Type{Array{T,N}})  where {T,N}
+    output = starpu_type_traduction(T)
+    for i in (1 : N)
+        output *= "*"
+    end
+
+    return output
+end
+
+function print(io :: IO, x :: StarpuExprTyped ; indent = 0,restrict=false)
+
+    if (isa(x, StarpuExprTypedVar))
+        print(io,starpu_type_traduction(x.typ), " ")
+        #if (restrict)
+        #    print(io,"restrict ");
+        #end
+        print(io, x.name)
+    else
+        print(io, x.expr, indent = indent)
+    end
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprTypedExpr)
+
+    new_expr = apply(func, expr.expr)
+
+    return func(StarpuExprTypedExpr(new_expr, expr.typ))
+end
+
+#======================================================
+                While loop
+======================================================#
+
+
+function starpu_parse_while(x :: Expr)
+
+    if (x.head != :while)
+        error("Invalid \"while\" loop")
+    end
+
+    len = length(x.args)
+
+    if (len < 2)
+        error("Invalid \"while\" loop")
+    end
+
+    cond = starpu_parse(x.args[1])
+    body = starpu_parse(x.args[2])
+
+    return StarpuExprWhile(cond, body)
+end
+
+
+function print(io :: IO, x :: StarpuExprWhile ; indent = 0)
+    print_newline(io, indent)
+    print(io, "while (")
+    print(io, x.cond, indent = indent + starpu_indent_size)
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprWhile)
+
+    cond = apply(func, expr.cond)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprWhile(cond, body))
+end

+ 132 - 0
julia/StarPU.jl/src/compiler/file_generation.jl

@@ -0,0 +1,132 @@
+
+
+
+global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
+
+
+
+global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
+
+const cpu_kernel_file_start = "#include <stdio.h>
+#include <stdint.h>
+#include <starpu.h>
+
+static inline long long jlstarpu_max(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
+{
+    if (stop >= start){
+            return jlstarpu_max(0, (stop - start + 1) / step);
+    } else {
+            return jlstarpu_max(0, (stop - start - 1) / step);
+    }
+}
+
+"
+
+const cuda_kernel_file_start = "#include <stdio.h>
+#include <stdint.h>
+#include <starpu.h>
+
+#define THREADS_PER_BLOCK 64
+
+static inline long long jlstarpu_max(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
+{
+    if (stop >= start){
+            return jlstarpu_max(0, (stop - start + 1) / step);
+    } else {
+            return jlstarpu_max(0, (stop - start - 1) / step);
+    }
+}
+
+
+__device__ static inline long long jlstarpu_max__device(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+__device__ static inline long long jlstarpu_interval_size__device(long long start, long long step, long long stop)
+{
+	if (stop >= start){
+		return jlstarpu_max__device(0, (stop - start + 1) / step);
+	} else {
+		return jlstarpu_max__device(0, (stop - start - 1) / step);
+	}
+}
+
+
+"
+
+"""
+	Opens a new Cuda source file, where generated GPU kernels will be written
+"""
+function starpu_new_cuda_kernel_file(file_name :: String)
+
+    global generated_cuda_kernel_file_name = file_name
+
+    kernel_file = open(file_name, "w")
+    print(kernel_file, cuda_kernel_file_start)
+    close(kernel_file)
+
+    return nothing
+end
+
+export target
+macro target(x)
+    targets = eval(x)
+    return quote
+        starpu_target=$targets
+        global starpu_target
+    end
+end
+
+export CPU_CODELETS
+global CPU_CODELETS=Dict{String,String}()
+export CUDA_CODELETS
+global CUDA_CODELETS=Dict{String,String}()
+
+"""
+	    Executes @cuda_kernel and @cpu_kernel
+        """
+macro codelet(x)
+    parsed = starpu_parse(x)
+    name=string(x.args[1].args[1].args[1]);
+    dump(name)
+    cpu_expr = transform_to_cpu_kernel(parsed)
+    prekernel, kernel = transform_to_cuda_kernel(parsed)
+    generated_cpu_kernel_file_name=string("genc_",string(x.args[1].args[1].args[1]),".c")
+    generated_cuda_kernel_file_name=string("gencuda_",string(x.args[1].args[1].args[1]),".cu")
+    targets=starpu_target
+    return quote
+        
+        if ($targets&$STARPU_CPU!=0)
+            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "w")
+            @debugprint "generating " $(generated_cpu_kernel_file_name)
+            print(kernel_file, $(esc(cpu_kernel_file_start)))
+            print(kernel_file, $cpu_expr)
+            close(kernel_file)
+            CPU_CODELETS[$name]=$name
+        end
+        
+        if ($targets&$STARPU_CUDA!=0)
+            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "w")
+            @debugprint "generating " $(generated_cuda_kernel_file_name)
+            print(kernel_file, $(esc(cuda_kernel_file_start)))
+            print(kernel_file, "__global__ ", $kernel)
+            print(kernel_file, "\nextern \"C\" ", $prekernel)
+            close(kernel_file)
+            CUDA_CODELETS[$name]="CUDA_"*$name
+        end
+        print("end generation")
+        #starpu_task_library_name="generated_tasks"
+        #global starpu_task_library_name
+    end
+end

+ 13 - 0
julia/StarPU.jl/src/compiler/include.jl

@@ -0,0 +1,13 @@
+export starpu_new_cpu_kernel_file
+export starpu_new_cuda_kernel_file
+export @codelet
+export @target
+
+include("utils.jl")
+include("expressions.jl")
+include("parsing.jl")
+include("expression_manipulation.jl")
+include("c.jl")
+include("cuda.jl")
+include("file_generation.jl")
+

+ 5 - 8
julia/src/Compiler/parsing.jl

@@ -14,19 +14,16 @@ starpu_parse_key_word_parsing_function = Dict{Symbol, Function}()
 function starpu_parse(x :: Expr)
 
     if (x.head == :macrocall)
-
-        if (x.args[1] != Symbol("@indep"))
-            error("Only @indep macro, used before a for loop, is allowed ($(x.args[1]) was found)")
+        if (x.args[1] != Symbol("@parallel"))
+            error("Only @parallel macro, used before a for loop, is allowed ($(x.args[1]) was found)")
         end
 
-        if (length(x.args) != 2)
-            error("Invalid usage of @indep macro")
+        if (length(x.args) != 3)
+            error("Invalid usage of @parallel macro", length(x.args))
         end
-
-        return starpu_parse_for(x.args[2], is_independant = true)
+        return starpu_parse_for(x.args[3], is_independant = true)
     end
 
-
     if !(x.head in keys(starpu_parse_key_word_parsing_function))
         return StarpuExprInvalid() #TODO error ?
     end

+ 0 - 12
julia/src/Compiler/utils.jl

@@ -1,9 +1,6 @@
-
 import Base.print
 
-
 function print_newline(io :: IO, indent = 0, n_lines = 1)
-
     for i in (1 : n_lines)
         print(io, "\n")
     end
@@ -15,11 +12,7 @@ end
 
 starpu_indent_size = 4
 
-
-
-
 function rand_char()
-
     r = rand(UInt) % 62
 
     if (0 <= r < 10)
@@ -32,19 +25,14 @@ function rand_char()
 end
 
 function rand_string(size = 8)
-
     output = ""
 
     for i in (1 : size)
         output *= string(rand_char())
     end
-
     return output
 end
 
-
-
-
 function system(cmd :: String)
     ccall((:system, "libc"), Cint, (Cstring,), cmd)
 end

+ 1 - 7
julia/src/Wrapper/C/jlstarpu.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -29,12 +28,7 @@
 #include <starpu.h>
 #include <pthread.h>
 
-
 #include "jlstarpu_utils.h"
 #include "jlstarpu_task.h"
 
-
-
-
-
 #endif /* JLSTARPU_H_ */

+ 21 - 66
julia/src/Wrapper/C/jlstarpu_data_handles.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,31 +16,13 @@
 
 #include "jlstarpu.h"
 
-
-
-
-#if 0
-void print_vector_interface(struct starpu_vector_interface * i)
-{
-	printf("Vector interface at %p\n", i);
-	printf("\tdev_handle : %p\n", i->dev_handle);
-	printf("\telement_size : %u\n", i->elemsize);
-	printf("\tnx : %u\n", i->nx);
-	printf("\toffset : %u\n", i->offset);
-	printf("\tptr : %p\n", i->ptr);
-	printf("\tslide_base : %u\n", i->slice_base);
-}
-#endif
-
-
 enum jlstarpu_data_filter_func
 {
-    JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
-    JLSTARPU_MATRIX_FILTER_BLOCK
+	JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
+	JLSTARPU_MATRIX_FILTER_BLOCK,
+	JLSTARPU_VECTOR_FILTER_BLOCK,
 };
 
-
-
 struct jlstarpu_data_filter
 {
 	enum jlstarpu_data_filter_func func;
@@ -52,63 +33,37 @@ struct jlstarpu_data_filter
 
 void * jlstarpu_translate_data_filter_func(enum jlstarpu_data_filter_func func)
 {
-
 	switch (func){
-
 	case JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK:
 		return starpu_matrix_filter_vertical_block;
-
 	case JLSTARPU_MATRIX_FILTER_BLOCK:
 		return starpu_matrix_filter_block;
-
+	case JLSTARPU_VECTOR_FILTER_BLOCK:
+		return starpu_vector_filter_block;
 	default:
 		return NULL;
-
 	}
 
 }
 
-
-void jlstarpu_translate_data_filter
-(
-		const struct jlstarpu_data_filter * const input,
-		struct starpu_data_filter * output
-)
+void jlstarpu_translate_data_filter(const struct jlstarpu_data_filter * const input,struct starpu_data_filter * output)
 {
 	memset(output, 0, sizeof(struct starpu_data_filter));
-
 	output->filter_func = jlstarpu_translate_data_filter_func(input->func);
 	output->nchildren = input->nchildren;
-
 }
 
-
-
-
-
-
-
-
-
-void jlstarpu_data_partition
-(
-		starpu_data_handle_t handle,
-		const struct jlstarpu_data_filter * const jl_filter
-)
+void jlstarpu_data_partition(starpu_data_handle_t handle,const struct jlstarpu_data_filter * const jl_filter)
 {
 	struct starpu_data_filter filter;
 	jlstarpu_translate_data_filter(jl_filter, &filter);
-
 	starpu_data_partition(handle, &filter);
-
 }
 
 
-void jlstarpu_data_map_filters_1_arg
-(
-		starpu_data_handle_t handle,
-		const struct jlstarpu_data_filter * const jl_filter
-)
+void jlstarpu_data_map_filters_1_arg(starpu_data_handle_t handle,
+	const struct jlstarpu_data_filter * const jl_filter
+	)
 {
 	struct starpu_data_filter filter;
 	jlstarpu_translate_data_filter(jl_filter, &filter);
@@ -120,10 +75,10 @@ void jlstarpu_data_map_filters_1_arg
 
 void jlstarpu_data_map_filters_2_arg
 (
-		starpu_data_handle_t handle,
-		const struct jlstarpu_data_filter * const jl_filter_1,
-		const struct jlstarpu_data_filter * const jl_filter_2
-)
+	starpu_data_handle_t handle,
+	const struct jlstarpu_data_filter * const jl_filter_1,
+	const struct jlstarpu_data_filter * const jl_filter_2
+	)
 {
 	struct starpu_data_filter filter_1;
 	jlstarpu_translate_data_filter(jl_filter_1, &filter_1);
@@ -139,12 +94,12 @@ void jlstarpu_data_map_filters_2_arg
 
 
 
-#define JLSTARPU_GET(interface, field, ret_type)\
-	\
-	ret_type jlstarpu_##interface##_get_##field(const struct starpu_##interface##_interface * const x)\
-	{\
-		return (ret_type) x->field;\
-	}\
+#define JLSTARPU_GET(interface, field, ret_type)			\
+									\
+	ret_type jlstarpu_##interface##_get_##field(const struct starpu_##interface##_interface * const x) \
+	{								\
+		return (ret_type) x->field;				\
+	}								\
 
 
 

+ 1 - 7
julia/src/Wrapper/C/jlstarpu_simple_functions.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -16,16 +15,11 @@
  */
 #include "jlstarpu.h"
 
-
-
-
 int jlstarpu_init(void)
 {
 	return starpu_init(NULL);
 }
 
-
-
 void jlstarpu_set_to_zero(void * ptr, unsigned int size)
 {
 	memset(ptr, 0, size);

+ 2 - 7
julia/src/Wrapper/C/jlstarpu_task.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -27,11 +26,6 @@
 
 #include "jlstarpu.h"
 
-
-
-
-
-
 struct jlstarpu_codelet
 {
 	uint32_t where;
@@ -40,6 +34,7 @@ struct jlstarpu_codelet
 	char * cpu_func_name;
 
 	starpu_cuda_func_t cuda_func;
+	starpu_opencl_func_t opencl_func;
 
 	int nbuffer;
 	enum starpu_data_access_mode * modes;

+ 9 - 9
julia/src/Wrapper/C/jlstarpu_task_submit.c

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -44,14 +43,12 @@ struct starpu_codelet * jlstarpu_translate_codelet(struct jlstarpu_codelet * con
 
 	starpu_codelet_init(output);
 
-
-
 	output->where = input->where;
-
 	output->cpu_funcs[0] = input->cpu_func;
 	output->cpu_funcs_name[0] = input->cpu_func_name;
 
 	output->cuda_funcs[0] = input->cuda_func;
+	output->opencl_funcs[0] = input->opencl_func;
 
 	output->nbuffers = input->nbuffer;
 	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
@@ -70,6 +67,7 @@ void jlstarpu_codelet_update(const struct jlstarpu_codelet * const input, struct
 	output->cpu_funcs_name[0] = input->cpu_func_name;
 
 	output->cuda_funcs[0] = input->cuda_func;
+	output->opencl_funcs[0] = input->opencl_func;
 
 	output->nbuffers = input->nbuffer;
 	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
@@ -84,7 +82,9 @@ void jlstarpu_free_codelet(struct starpu_codelet * cl)
 }
 #endif
 
-
+void jlstarpu_hello() {
+	fprintf(stderr,"coucou !");
+}
 
 #if 0
 struct starpu_task * jlstarpu_translate_task(const struct jlstarpu_task * const input)
@@ -104,7 +104,9 @@ struct starpu_task * jlstarpu_translate_task(const struct jlstarpu_task * const
 }
 #endif
 
-
+char *starpu_find_function(char *name, char *device) {
+	return NULL;
+}
 
 void jlstarpu_task_update(const struct jlstarpu_task * const input, struct starpu_task * const output)
 {
@@ -115,8 +117,6 @@ void jlstarpu_task_update(const struct jlstarpu_task * const input, struct starp
 	output->cl_arg_size = input->cl_arg_size;
 }
 
-
-
 /*
 
 void print_perfmodel(struct starpu_perfmodel * p)

+ 1 - 2
julia/src/Wrapper/C/jlstarpu_utils.h

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 23 - 26
julia/src/Wrapper/Julia/linked_list.jl

@@ -1,21 +1,18 @@
-
-
-
     export Link
     mutable struct Link{T}
 
         data :: T
 
-        previous :: Union{Nullable{Link{T}}, Link{T}}
-        next :: Union{Nullable{Link{T}}, Link{T}}
+        previous :: Union{Nothing, Link{T}}
+        next :: Union{Nothing, Link{T}}
 
         list
 
         function Link{T}(x :: T, l) where {T}
             output = new()
             output.data = x
-            output.previous = Nullable{Link{T}}()
-            output.next = Nullable{Link{T}}()
+            output.previous = Nothing()
+            output.next = Nothing()
             output.list = l
             return output
         end
@@ -27,14 +24,14 @@
 
         nelement :: Int64
 
-        first :: Union{Nullable{Link{T}}, Link{T}}
-        last :: Union{Nullable{Link{T}}, Link{T}}
+        first :: Union{Nothing, Link{T}}
+        last :: Union{Nothing, Link{T}}
 
         function LinkedList{T}() where {T}
             output = new()
             output.nelement = 0
-            output.first = Nullable{Link{T}}()
-            output.last = Nullable{Link{T}}()
+            output.first = Nothing()
+            output.last = Nothing()
 
             return output
         end
@@ -50,7 +47,7 @@
         l.first = new_first
         new_first.next = old_first
 
-        if (isnull(old_first))
+        if (isnothing(old_first))
             l.last = new_first
         else
             old_first.previous = new_first
@@ -71,7 +68,7 @@
         l.last = new_last
         new_last.previous = old_last
 
-        if (isnull(old_last))
+        if (isnothing(old_last))
             l.first = new_last
         else
             old_last.next = new_last
@@ -106,13 +103,13 @@
         next = lnk.next
         previous = lnk.previous
 
-        if (isnull(next))
+        if (isnothing(next))
             l.last = previous
         else
             next.previous = previous
         end
 
-        if (isnull(previous))
+        if (isnothing(previous))
             l.first = next
         else
             previous.next = next
@@ -140,7 +137,7 @@
         quote
             $(esc(lnk_iterator)) = $(esc(list)).first
 
-            while (!isnull($(esc(lnk_iterator))))
+            while (!isnothing($(esc(lnk_iterator))))
                 __next_lnk_iterator = $(esc(lnk_iterator)).next
                 $(esc(expression))
                 $(esc(lnk_iterator)) = __next_lnk_iterator
@@ -155,7 +152,7 @@
         quote
             $(esc(lnk_iterator)) = $(esc(list)).last
 
-            while (!isnull($(esc(lnk_iterator))))
+            while (!isnothing($(esc(lnk_iterator))))
                 __next_lnk_iterator = $(esc(lnk_iterator)).previous
                 $(esc(expression))
                 $(esc(lnk_iterator)) = __next_lnk_iterator
@@ -173,7 +170,7 @@
 
         print(io, " ; previous: ")
 
-        if (isnull(lnk.previous))
+        if (isnothing(lnk.previous))
             print(io, "NONE")
         else
             print(io, lnk.previous.data)
@@ -181,7 +178,7 @@
 
         print(io, " ; next: ")
 
-        if (isnull(lnk.next))
+        if (isnothing(lnk.next))
             print(io, "NONE")
         else
             print(io, lnk.next.data)
@@ -199,7 +196,7 @@
 
         @foreach_asc l lnk begin
 
-            if (!isnull(lnk.previous))
+            if (!isnothing(lnk.previous))
                 print(io, ", ")
             end
 
@@ -213,24 +210,24 @@
 
 
 
-    import Base.start
+    #import Base.start
     function start(l :: LinkedList)
         return nothing
     end
 
 
-    import Base.done
+    #import Base.done
     function done(l :: LinkedList, state)
 
         if (state == nothing)
-            return isnull(l.first)
+            return isnothing(l.first)
         end
 
-        return isnull(state.next)
+        return isnothing(state.next)
     end
 
 
-    import Base.next
+    #import Base.next
     function next(l :: LinkedList, state)
 
         if (state == nothing)
@@ -243,7 +240,7 @@
     end
 
 
-    import Base.endof
+    #import Base.endof
     function endof(l :: LinkedList)
         return l.nelement
     end

+ 20 - 0
julia/mult/README

@@ -0,0 +1,20 @@
+You first need to compile StarPU.jl
+Now, type
+> make
+> make test
+> gnuplot mult.plot
+
+During the test, several versions are compared:
+- mult.c: this is the original C+starpu code
+- mult_native.jl: this is the native julia way of multiplying 2 matrices. There is a call to openBLAS
+- mult_generatedc.jl: generates a C file, genc_matrix_mult.c. This file is compiled into a library, that is loaded and a function is executed.
+- mult_calllib.jl: the env variable JULIA_TASK_LIB should point to a library containing the function matrix_mult. This function is called.
+
+Examples of how to launch mult.jl are given in test rules.
+	 
+A CUDA version is generated too. Untested so far (no nvidia gpu on my laptop!)
+
+
+
+
+

+ 55 - 0
julia/mult/makefile

@@ -0,0 +1,55 @@
+# tile size. Should be changed in mult.jl as well
+STRIDE=72
+
+# ICC compiler
+#CC =icc
+#CFLAGS=-restrict -unroll4 -ipo -falign-loops=256 -O3 -DSTRIDE=${STRIDE} -march=native $(shell pkg-config --cflags starpu-1.3)
+# GCC compiler
+CC=gcc-9
+CFLAGS += -O3 -DSTRIDE=${STRIDE} -mavx -mfma -fomit-frame-pointer -march=native -ffast-math $(shell pkg-config --cflags starpu-1.3)
+
+LDFLAGS +=$(shell pkg-config --libs starpu-1.3)
+EXTERNLIB=extern_tasks.dylib
+GENERATEDLIB=generated_tasks.dylib
+OBJECTS=$(patsubst %.c,%.o,$(wildcard gen*.c))
+LIBPATH=${PWD}/../StarPU.jl/lib
+
+all: ${EXTERNLIB} 
+
+mult: mult.c cpu_mult.o #gpu_mult.o 
+	$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS)	
+
+gpu_mult.o: gpu_mult.cu
+	nvcc -c $(CFLAGS) $^ -o $@
+
+%.o: %.c
+	$(CC) -c $(CFLAGS) $^ -o $@
+
+${EXTERNLIB}: cpu_mult.o
+	$(CC) -shared -fPIC $(LDFLAGS) $^ -o $@  
+
+gpu_mult.so: gpu_mult.o
+	nvcc $(CFLAGS) $^ --shared --compiler-options '-fPIC' -o $@ $(LDFLAGS)
+
+cpu_mult_sa: cpu_mult_sa.o
+	$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS)
+
+${GENERATEDLIB}: ${OBJECTS}
+	$(CC) -shared -fPIC $(LDFLAGS) $^ -o $@
+
+clean:
+	rm *.so *.o *.dylib c_*.genc gencuda_*.cu *.dat
+
+# Performance Tests
+cstarpu.dat: mult
+	STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 ./mult > $@
+julia_generatedc.dat:
+	LD_LIBRARY_PATH+=${LIBPATH} STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia mult.jl $@
+julia_native.dat:
+	LD_LIBRARY_PATH+=${LIBPATH} STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia mult_native.jl $@
+julia_calllib.dat: ${EXTERNLIB}
+	LD_LIBRARY_PATH+=${LIBPATH} JULIA_TASK_LIB="${EXTERNLIB}" STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia mult.jl julia_calllib.dat
+
+test: cstarpu.dat julia_generatedc.dat julia_native.dat julia_calllib.dat
+
+

+ 21 - 61
julia/tst/mult.c

@@ -1,8 +1,10 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2010       Mehdi Juhoor
- * Copyright (C) 2018       Alexis Juven
+ * Copyright (C) 2018                                     Alexis Juven
+ * Copyright (C) 2012,2013                                Inria
+ * Copyright (C) 2009-2011,2013-2015                      Université de Bordeaux
+ * Copyright (C) 2010                                     Mehdi Juhoor
+ * Copyright (C) 2010-2013,2015,2017                      CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -65,13 +67,10 @@
 
 
 
-void gpu_mult(void **, void *);
+//void gpu_mult(void **, void *);
 void cpu_mult(void **, void *);
 
 
-
-
-
 static struct starpu_perfmodel model =
 {
 		.type = STARPU_HISTORY_BASED,
@@ -82,17 +81,13 @@ static struct starpu_codelet cl =
 {
 		.cpu_funcs = {cpu_mult},
 		.cpu_funcs_name = {"cpu_mult"},
-		.cuda_funcs = {gpu_mult},
+		//.cuda_funcs = {gpu_mult},
 		.nbuffers = 3,
 		.modes = {STARPU_R, STARPU_R, STARPU_W},
 		.model = &model
 };
 
 
-
-
-
-
 void multiply_with_starpu(float *A, float *B, float *C,  unsigned xdim,  unsigned ydim,  unsigned zdim, unsigned nslicesx, unsigned nslicesy)
 {
 	starpu_data_handle_t A_handle, B_handle, C_handle;
@@ -135,7 +130,7 @@ void multiply_with_starpu(float *A, float *B, float *C,  unsigned xdim,  unsigne
 			task->handles[1] = starpu_data_get_sub_data(B_handle, 1, taskx);
 			task->handles[2] = starpu_data_get_sub_data(C_handle, 2, taskx, tasky);
 
-			starpu_task_submit(task);
+			if (starpu_task_submit(task)!=0) fprintf(stderr,"submit task error\n");
 
 		}
 	}
@@ -174,32 +169,7 @@ void init_zero(float * m, unsigned width, unsigned height)
 
 
 
-void sort(unsigned int size, double t[])
-{
-	unsigned int j;
-
-	int is_sort = 0;
-
-	while(!is_sort){
-
-		is_sort = 1;
-
-		for (j = 0 ; j < size - 1 ; j++){
-
-			if (t[j] > t[j+1]){
-				double tmp = t[j];
-				t[j] = t[j+1];
-				t[j+1] = tmp;
-				is_sort = 0;
-			}
-		}
-	}
-
-
-}
-
-
-double median_time(unsigned nb_test, unsigned xdim, unsigned ydim, unsigned zdim, unsigned nsclicesx, unsigned nsclicesy)
+double min_time(unsigned nb_test, unsigned xdim, unsigned ydim, unsigned zdim, unsigned nsclicesx, unsigned nsclicesy)
 {
 	unsigned i;
 
@@ -207,7 +177,7 @@ double median_time(unsigned nb_test, unsigned xdim, unsigned ydim, unsigned zdim
 	float * B = (float *) malloc(xdim*zdim*sizeof(float));
 	float * C = (float *) malloc(xdim*ydim*sizeof(float));
 
-	double exec_times[nb_test];
+	double exec_times=-1;
 
 	for (i = 0 ; i < nb_test ; i++){
 
@@ -221,17 +191,14 @@ double median_time(unsigned nb_test, unsigned xdim, unsigned ydim, unsigned zdim
 		multiply_with_starpu(A, B, C, xdim, ydim, zdim, nsclicesx, nsclicesy);
 		stop = starpu_timing_now();
 
-		exec_t = (stop - start)/1.e6;
-		exec_times[i] = exec_t;
+		exec_t = (stop - start)*1.e3; // Put in ns instead of us
+		if (exec_times<0 || exec_times>exec_t) exec_times= exec_t;
 	}
 
-	sort(nb_test, exec_times);
-
 	free(A);
 	free(B);
 	free(C);
-
-	return exec_times[nb_test/2];
+	return exec_times;
 }
 
 
@@ -240,8 +207,8 @@ void display_times(unsigned start_dim, unsigned step_dim, unsigned stop_dim, uns
 	unsigned dim;
 
 	for (dim = start_dim ; dim <= stop_dim ; dim += step_dim){
-		double t = median_time(nb_tests, dim, dim, dim, nsclicesx, nsclicesy);
-		printf("%u ; %f\n", dim, t);
+		double t = min_time(nb_tests, dim, dim, dim, nsclicesx, nsclicesy);
+		printf("%f %f\n", dim*dim*4.*3./1024./1024, (2.*dim-1.)*dim*dim/t);
 	}
 
 }
@@ -249,24 +216,17 @@ void display_times(unsigned start_dim, unsigned step_dim, unsigned stop_dim, uns
 
 int main(int argc, char * argv[])
 {
-
-	if (argc != 7){
-		printf("Usage : %s start_dim step_dim stop_dim nb_tests nsclicesx nsclicesy\n", argv[0]);
-		return 1;
-	}
-
-
 	if (starpu_init(NULL) != EXIT_SUCCESS){
 		fprintf(stderr, "ERROR\n");
 		return 77;
 	}
 
-	unsigned start_dim = (unsigned) atoi(argv[1]);
-	unsigned step_dim = (unsigned) atoi(argv[2]);
-	unsigned stop_dim = (unsigned) atoi(argv[3]);
-	unsigned nb_tests = (unsigned) atoi(argv[4]);
-	unsigned nsclicesx = (unsigned) atoi(argv[5]);
-	unsigned nsclicesy = (unsigned) atoi(argv[6]);
+	unsigned start_dim = 16*STRIDE;
+	unsigned step_dim = 4*STRIDE;
+	unsigned stop_dim = 4096;
+	unsigned nb_tests = 10;
+	unsigned nsclicesx = 2;
+	unsigned nsclicesy = 2;
 
 	display_times(start_dim, step_dim, stop_dim, nb_tests, nsclicesx, nsclicesy);
 

+ 146 - 0
julia/mult/mult.jl

@@ -0,0 +1,146 @@
+import Libdl
+using StarPU
+using LinearAlgebra
+
+#shoud be the same as in the makefile
+const STRIDE = 72
+
+@target STARPU_CPU+STARPU_CUDA
+@codelet function matrix_mult(m1 :: Matrix{Float32}, m2 :: Matrix{Float32}, m3 :: Matrix{Float32}) :: Float32
+
+    width_m2 :: Int32 = width(m2)
+    height_m1 :: Int32 = height(m1)
+    width_m1 :: Int32 = width(m1)
+    # Naive version
+    #@parallel for j in (1 : width_m2)
+    #    @parallel for i in (1 : height_m1)
+    #
+    #          sum :: Float32 = 0.
+
+    #          for k in (1 : width_m1)
+    #              sum = sum + m1[i, k] * m2[k, j]
+    #          end
+    
+    #          m3[i, j] = sum
+    #      end
+    #  end
+    ##### Tiled and unrolled version 
+    for l in (1 : width_m2)
+        for m in (1 : height_m1)
+            m3[m,l] = 0
+        end
+    end
+    @parallel for i in (1 : STRIDE : height_m1)
+        for k in (1 : STRIDE : width_m1 )
+            for j in (1 : STRIDE : width_m2  )
+                for kk in (k : 4 : k+STRIDE-1)
+                    for jj in (j : 2 : j+STRIDE-1)
+                        alpha00 :: Float32 =m2[kk,jj]
+                        alpha01 :: Float32 =m2[kk,jj+1]
+                        alpha10 :: Float32 =m2[kk+1,jj]
+                        alpha11 :: Float32 =m2[kk+1,jj+1]
+                        alpha20 :: Float32 =m2[kk+2,jj]
+                        alpha21 :: Float32 =m2[kk+2,jj+1]
+                        alpha30 :: Float32 =m2[kk+3,jj]
+                        alpha31 :: Float32 =m2[kk+3,jj+1]
+                        for ii in (i : 1 : i+STRIDE-1) 
+                            m3[ii, jj] = m3[ii, jj] + m1[ii, kk] * alpha00 + m1[ii, kk+1] * alpha10 + m1[ii, kk+2] * alpha20 + m1[ii,kk+3]*alpha30
+                            m3[ii, jj+1] = m3[ii, jj+1] + m1[ii, kk] * alpha01 + m1[ii, kk+1] * alpha11 + m1[ii, kk+2]*alpha21 + m1[ii,kk+3]*alpha31 
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    return 0. :: Float32
+end
+
+
+@debugprint "starpu_init"
+starpu_init()
+
+function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy)
+    scale= 3
+    tmin=0
+    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslicesx)
+    horiz = StarpuDataFilter(STARPU_MATRIX_FILTER_BLOCK, nslicesy)
+    @starpu_block let
+        hA,hB,hC = starpu_data_register(A, B, C)
+        starpu_data_partition(hB, vert)
+        starpu_data_partition(hA, horiz)
+        starpu_data_map_filters(hC, vert, horiz)
+        tmin=0
+        perfmodel = StarpuPerfmodel(
+            perf_type = STARPU_HISTORY_BASED,
+            symbol = "history_perf"
+        )
+        cl = StarpuCodelet(
+            cpu_func = CPU_CODELETS["matrix_mult"],
+            #cuda_func = "matrix_mult",
+            #opencl_func="ocl_matrix_mult",
+            modes = [STARPU_R, STARPU_R, STARPU_W],
+            perfmodel = perfmodel
+        )
+
+        for i in (1 : 10 )
+            t=time_ns()
+            @starpu_sync_tasks begin
+                for taskx in (1 : nslicesx)
+                    for tasky in (1 : nslicesy)
+                        handles = [hA[tasky], hB[taskx], hC[taskx, tasky]]
+                        task = StarpuTask(cl = cl, handles = handles)
+                        starpu_task_submit(task)
+                        #@starpu_async_cl matrix_mult(hA[tasky], hB[taskx], hC[taskx, tasky])
+                    end
+                end
+            end
+            t=time_ns()-t
+            if (tmin==0 || tmin>t)
+                tmin=t
+            end
+        end
+    end
+    return tmin
+end
+
+
+function approximately_equals(
+    A :: Matrix{Cfloat},
+    B :: Matrix{Cfloat},
+    eps = 1e-2
+)
+    (height, width) = size(A)
+
+    for j in (1 : width)
+        for i in (1 : height)
+            if (abs(A[i,j] - B[i,j]) > eps * max(abs(B[i,j]), abs(A[i,j])))
+                println("A[$i,$j] : $(A[i,j]), B[$i,$j] : $(B[i,j])")
+                return false
+            end
+        end
+    end
+
+    return true
+end
+
+function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy)
+    for dim in (start_dim : step_dim : stop_dim)
+        A = Array(rand(Cfloat, dim, dim))
+        B = Array(rand(Cfloat, dim, dim))
+        C = zeros(Float32, dim, dim)
+        mt =  multiply_with_starpu(A, B, C, nslicesx, nslicesy)
+        flops = (2*dim-1)*dim*dim/mt
+        size=dim*dim*4*3/1024/1024
+        println(io,"$size $flops")
+        println("$size $flops")
+    end
+end
+
+
+io=open(ARGS[1],"w")
+compute_times(io,16*STRIDE,4*STRIDE,4096,2,2)
+close(io)
+@debugprint "starpu_shutdown"
+starpu_shutdown()
+

+ 4 - 0
julia/mult/mult.plot

@@ -0,0 +1,4 @@
+set output "comparison.pdf"
+set term pdf
+plot "julia_native.dat" w l,"cstarpu.dat" w l,"julia_generatedc.dat" w l,"julia_calllib.dat" w l
+quit

+ 44 - 0
julia/mult/mult_native.jl

@@ -0,0 +1,44 @@
+import Libdl
+using StarPU
+using LinearAlgebra
+
+#shoud be the same as in the makefile
+const STRIDE = 72
+
+@debugprint "starpu_init"
+starpu_init()
+
+function multiply_without_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy)
+    tmin = 0
+    for i in (1 : 10 )
+        t=time_ns()
+        C = A * B;
+        t=time_ns() - t
+        if (tmin==0 || tmin>t)
+            tmin=t
+        end
+    end
+    return tmin
+end
+
+
+function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy)
+    for dim in (start_dim : step_dim : stop_dim)
+        A = Array(rand(Cfloat, dim, dim))
+        B = Array(rand(Cfloat, dim, dim))
+        C = zeros(Float32, dim, dim)
+        mt =  multiply_without_starpu(A, B, C, nslicesx, nslicesy)
+        flops = (2*dim-1)*dim*dim/mt
+        size=dim*dim*4*3/1024/1024
+        println(io,"$size $flops")
+        println("$size $flops")
+    end
+end
+
+
+io=open(ARGS[1],"w")
+compute_times(io,16*STRIDE,4*STRIDE,4096,2,2)
+close(io)
+@debugprint "starpu_shutdown"
+starpu_shutdown()
+

+ 11 - 0
julia/mult/res/mult_cstarpu_gcc9_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.187500 62.469505
+23.730469 65.145783
+34.171875 64.252904
+46.511719 64.856261
+60.750000 61.807355
+76.886719 64.395970
+94.921875 63.789615
+114.855469 64.782028
+136.687500 61.598611
+160.417969 63.266065
+186.046875 62.610491

+ 29 - 0
julia/mult/res/mult_gen_gcc9_1x4.dat

@@ -0,0 +1,29 @@
+3.0 35.095536494941854
+4.6875 41.989376626414035
+6.75 38.862414203751754
+9.1875 42.8149332353783
+12.0 27.869356812008537
+15.1875 40.90318016042991
+18.75 35.225143587305226
+22.6875 40.2855672797496
+27.0 30.428316600694394
+31.6875 39.31411328357
+36.75 32.661610849261855
+42.1875 37.556932447413935
+48.0 23.52181133492872
+54.1875 38.220688331426885
+60.75 30.30144442185247
+67.6875 35.7931848447804
+75.0 27.503675217337065
+82.6875 34.26004394537202
+90.75 30.28248032967485
+99.1875 34.80402996973707
+108.0 23.410338740869793
+117.1875 33.816995170067365
+126.75 28.575495002257274
+136.6875 31.938845714722646
+147.0 26.579028306224597
+157.6875 31.257291229640458
+168.75 29.40371998523363
+180.1875 31.20791496656832
+192.0 22.27977322138876

+ 29 - 0
julia/mult/res/mult_gen_gcc9_4x1.dat

@@ -0,0 +1,29 @@
+3.0 38.576241402554224
+4.6875 45.31099640039931
+6.75 41.33116523247586
+9.1875 41.36870326327288
+12.0 30.726913279900813
+15.1875 40.97070044185806
+18.75 38.00711660927796
+22.6875 41.41500237348445
+27.0 35.60049383550015
+31.6875 39.3274866273195
+36.75 35.89547776075037
+42.1875 40.148418048157694
+48.0 25.328919716005114
+54.1875 38.31273888508681
+60.75 33.40185421821584
+67.6875 36.933804237397716
+75.0 30.424047628770715
+82.6875 35.555311026464885
+90.75 33.520292128428736
+99.1875 36.15390985285586
+108.0 24.69723232685782
+117.1875 35.10082819198454
+126.75 31.92453002780272
+136.6875 35.218675088153915
+147.0 28.57496209979794
+157.6875 34.30832202391309
+168.75 32.28312311135674
+180.1875 33.95765540289391
+192.0 21.87223921165241

+ 7 - 0
julia/mult/res/mult_gen_gcc9_s100_4x1.dat

@@ -0,0 +1,7 @@
+29.296875 40.03209610690769
+45.7763671875 39.7782462738071
+65.91796875 40.919412460071406
+89.7216796875 39.86310075239485
+117.1875 38.57853081024218
+148.3154296875 38.365551586369726
+183.10546875 36.36952308316503

+ 17 - 0
julia/mult/res/mult_gen_gcc9_s50_4x1.dat

@@ -0,0 +1,17 @@
+7.32421875 40.71065910284983
+11.444091796875 38.65943468999069
+16.4794921875 38.47969086769767
+22.430419921875 36.651013048059674
+29.296875 36.83417153820277
+37.078857421875 36.39675792194737
+45.7763671875 35.95397397739203
+55.389404296875 35.01271643062472
+65.91796875 36.615975568007045
+77.362060546875 35.37884613184124
+89.7216796875 35.47361924291173
+102.996826171875 35.19272426462016
+117.1875 33.20635359174189
+132.293701171875 33.85292024547706
+148.3154296875 34.259771254715574
+165.252685546875 33.696634276143286
+183.10546875 32.65563984287101

+ 4 - 0
julia/mult/res/mult_gen_gcc9_s64_16x16_b4x2.dat

@@ -0,0 +1,4 @@
+12.0 36.97738316279322
+48.0 41.92478434152502
+108.0 44.782019377342586
+192.0 43.31174213073912

+ 13 - 0
julia/mult/res/mult_gen_gcc9_s64_4x4_b4x2.dat

@@ -0,0 +1,13 @@
+12.0 46.97244046946638
+18.75 58.871349999359005
+27.0 53.82855830537721
+36.75 56.32008723976445
+48.0 44.67174556090053
+60.75 56.37242731373687
+75.0 50.7326915732461
+90.75 55.4718476922033
+108.0 42.2282159789089
+126.75 53.75672849956793
+147.0 49.58574052835205
+168.75 54.37042916069095
+192.0 41.03783426797551

+ 7 - 0
julia/mult/res/mult_gen_gcc9_s64_8x1_b4x2.dat

@@ -0,0 +1,7 @@
+12.0 44.99985202955367
+27.0 52.4356051129035
+48.0 43.98871775598096
+75.0 49.80617536221089
+108.0 41.49118338442519
+147.0 48.56364996087375
+192.0 40.62654517348534

+ 7 - 0
julia/mult/res/mult_gen_gcc9_s64_8x8_b4x2.dat

@@ -0,0 +1,7 @@
+12.0 42.39120888947927
+27.0 54.550913222094984
+48.0 45.49392373655385
+75.0 51.36208021061481
+108.0 43.63013488867403
+147.0 49.68419805912138
+192.0 42.5457713890938

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_16x18_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 47.91450640786182
+23.73046875 70.18415000835407
+34.171875 130.5930513278052
+46.51171875 175.40249695912559
+60.75 55.633587692486856
+76.88671875 69.98450438218035
+94.921875 84.29515755163186
+114.85546875 105.0701279194339
+136.6875 59.010534668180654
+160.41796875 62.33164645892831
+186.046875 71.89788996838325

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_16x8_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 58.34035046809785
+23.73046875 90.94876643488134
+34.171875 93.57888433733667
+46.51171875 115.24941633539966
+60.75 57.91620969415693
+76.88671875 69.01285319590752
+94.921875 80.1731339374351
+114.85546875 81.88695877919552
+136.6875 59.81485837027026
+160.41796875 62.627542548656216
+186.046875 70.92430858573593

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2.dat

@@ -0,0 +1,11 @@
+15.1875 49.541265548365025
+23.73046875 52.23451345494309
+34.171875 50.51716648682395
+46.51171875 50.85574646979497
+60.75 42.70677762521372
+76.88671875 47.844317018396175
+94.921875 46.812449783935435
+114.85546875 47.63133362143659
+136.6875 42.58453806625135
+160.41796875 43.38142416008992
+186.046875 42.10597070167543

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 62.85531352216577
+23.73046875 64.18226106386828
+34.171875 63.154717935466564
+46.51171875 63.785224988311604
+60.75 56.23769206133714
+76.88671875 59.67937135482873
+94.921875 58.723057009657836
+114.85546875 59.64876377318175
+136.6875 56.179455752285136
+160.41796875 56.25355249047638
+186.046875 55.86750451373739

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2_b4x4.dat

@@ -0,0 +1,11 @@
+15.1875 13.064278868379871
+23.73046875 14.756026074641067
+34.171875 13.83982386947793
+46.51171875 14.613968952238817
+60.75 11.783711558597256
+76.88671875 13.931105963251506
+94.921875 12.855632000014735
+114.85546875 13.249002597722258
+136.6875 12.572965056671936
+160.41796875 14.480218135848462
+186.046875 14.040858387720379

+ 5 - 0
julia/mult/res/mult_gen_gcc9_s72_2x2_b8x2.dat

@@ -0,0 +1,5 @@
+15.1875 14.099898524592977
+23.73046875 14.550933539697333
+34.171875 14.349410926140271
+46.51171875 14.415904334550417
+60.75 13.503051187017753

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_4x1.dat

@@ -0,0 +1,11 @@
+15.1875 45.92147955991763
+23.73046875 51.36107050546614
+34.171875 49.61930926641356
+46.51171875 49.45181909243622
+60.75 45.15796272895741
+76.88671875 48.36073354687536
+94.921875 44.24184559337953
+114.85546875 47.18178855961836
+136.6875 43.159720253646896
+160.41796875 42.67165627290124
+186.046875 38.537576648017776

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_4x4_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 58.120354593587074
+23.73046875 64.57108971168358
+34.171875 63.38082696595577
+46.51171875 62.249708571678504
+60.75 54.032191569244006
+76.88671875 58.202057221774425
+94.921875 61.440907627769874
+114.85546875 59.41214640096639
+136.6875 56.7554113833207
+160.41796875 59.19922367910147
+186.046875 58.85342461418237

+ 11 - 0
julia/mult/res/mult_gen_gcc9_s72_8x8_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 60.852193218742244
+23.73046875 90.34368571742002
+34.171875 64.88259115865708
+46.51171875 75.93301804962653
+60.75 56.570334556680905
+76.88671875 69.56856256378995
+94.921875 61.32231327890172
+114.85546875 65.01978117265061
+136.6875 59.34455850319351
+160.41796875 62.64459678873513
+186.046875 60.73581790609317

+ 9 - 0
julia/mult/res/mult_gen_gcc9_s80_4x1.dat

@@ -0,0 +1,9 @@
+18.75 33.04933833660189
+29.296875 36.73797053791907
+42.1875 38.63857483141544
+57.421875 37.41446097176627
+75.0 37.29798074973393
+94.921875 47.78484008020491
+117.1875 46.920043928079984
+141.796875 44.932797054801505
+168.75 44.2566320371221

+ 3 - 0
julia/mult/res/mult_gen_icc_s72_2x1_b4x2.dat

@@ -0,0 +1,3 @@
+15.1875 32.90035486674719
+23.73046875 33.41217720558453
+34.171875 32.65678472416923

+ 11 - 0
julia/mult/res/mult_gen_icc_s72_4x4_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 62.840292498287134
+23.73046875 65.60736049798648
+34.171875 63.71211730973094
+46.51171875 64.52944091137051
+60.75 57.21012623951768
+76.88671875 64.30553029147484
+94.921875 63.023853024696905
+114.85546875 61.571602782198134
+136.6875 58.78441267918305
+160.41796875 60.1701272099038
+186.046875 58.911378284423975

+ 62 - 0
julia/mult/res/mult_native.dat

@@ -0,0 +1,62 @@
+0.046875 62.74933655006031
+0.1875 81.76128691363823
+0.421875 56.62036966486339
+0.75 61.59670394349617
+1.171875 78.30779265074588
+1.6875 75.18164244785089
+2.296875 86.16277240336612
+3.0 88.7175896320434
+3.796875 89.94675752869234
+4.6875 91.81258734297542
+5.671875 91.07763358860382
+6.75 89.73871714190392
+7.921875 93.94105347752168
+9.1875 96.0986938110649
+10.546875 86.81427208540494
+12.0 90.48478005441814
+13.546875 87.99680611063411
+15.1875 84.31631390960176
+16.921875 91.48325697666799
+18.75 89.95690606810193
+20.671875 81.49676622230874
+22.6875 81.45233686971325
+24.796875 85.66539502122194
+27.0 87.78572232818597
+29.296875 83.52321886514454
+31.6875 84.0009753910359
+34.171875 86.80205838654649
+36.75 86.99073757030344
+39.421875 81.17955694509793
+42.1875 80.44760372225159
+45.046875 80.56510057628367
+48.0 81.65642955552222
+51.046875 82.11316716249475
+54.1875 82.5995656333009
+57.421875 82.17046140404119
+60.75 82.15768363601998
+64.171875 81.85343346990497
+67.6875 83.50739871185147
+71.296875 81.53545433745259
+75.0 81.48249578388855
+78.796875 83.7802422875762
+82.6875 82.91995474794902
+86.671875 84.37396962418724
+90.75 83.27206282643111
+94.921875 84.42083617832927
+99.1875 84.41380826745248
+103.546875 83.76666519036874
+108.0 83.15578835025194
+112.546875 82.68544528819217
+117.1875 82.44413764522827
+121.921875 75.20336455312317
+126.75 78.74206622501798
+131.671875 78.30429756099845
+136.6875 77.07009004287404
+141.796875 76.79451754647009
+147.0 77.188762171773
+152.296875 83.71882981918472
+157.6875 81.25155945091102
+163.171875 82.77719540968279
+168.75 82.091876711701
+174.421875 80.13244240187723
+180.1875 83.15669561921781

+ 11 - 0
julia/mult/res/mult_nogen_gcc9_s72_2x2_b2x2.dat

@@ -0,0 +1,11 @@
+15.1875 52.12811008957249
+23.73046875 60.33779880359854
+34.171875 58.65251355824629
+46.51171875 59.39753610863093
+60.75 51.71969005913254
+76.88671875 60.41770576441039
+94.921875 55.244712365625155
+114.85546875 59.08425492245039
+136.6875 50.57086001690063
+160.41796875 54.04522394983553
+186.046875 52.97679192814275

+ 11 - 0
julia/mult/res/mult_nogen_gcc9_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 54.13411070454958
+23.73046875 61.69285331765543
+34.171875 59.001001016663984
+46.51171875 60.24465674394535
+60.75 52.541458398044604
+76.88671875 62.0774861675393
+94.921875 56.20749571716199
+114.85546875 53.17550437757629
+136.6875 47.70744414255635
+160.41796875 49.931147163084646
+186.046875 47.38560406825662

+ 11 - 0
julia/mult/res/mult_nogen_icc_s72-36_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 52.39992483045233
+23.73046875 61.25439146348626
+34.171875 58.97664123460709
+46.51171875 58.949173678252
+60.75 52.78987632022571
+76.88671875 61.569889782842495
+94.921875 56.0234167726132
+114.85546875 59.25765213366246
+136.6875 50.44001190797859
+160.41796875 55.423350523540556
+186.046875 54.219079530491165

+ 11 - 0
julia/mult/res/mult_nogen_icc_s72_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 52.58835344153732
+23.73046875 61.299696065186076
+34.171875 59.10944402276785
+46.51171875 60.240571021372645
+60.75 52.66550013928241
+76.88671875 61.30385865566083
+94.921875 58.08324611362879
+114.85546875 62.11036688036624
+136.6875 51.309289298267664
+160.41796875 54.50754182628601
+186.046875 52.16594203007848

+ 11 - 0
julia/mult/res/mult_nogen_icc_s72x2_2x2_b4x2.dat

@@ -0,0 +1,11 @@
+15.1875 54.225066705273136
+23.73046875 62.03919972007252
+34.171875 57.31552099956256
+46.51171875 59.10290100154662
+60.75 52.5567601559296
+76.88671875 62.05760119221998
+94.921875 56.85760151243153
+114.85546875 59.990114344500874
+136.6875 51.0662836859927
+160.41796875 54.41003890332101
+186.046875 53.02269691247011

+ 0 - 57
julia/src/Compiler/C/add_for_loop_declarations.jl

@@ -1,57 +0,0 @@
-
-
-"""
-    Returns the list of instruction that will be added before for loop of shape
-        "for for_index_var in set ..."
-"""
-function interval_evaluation_declarations(set :: StarpuExprInterval, for_index_var :: Symbol)
-
-    const decl_pattern = @parse € :: Int64
-    const affect_pattern = @parse € :: Int64 = €
-    const interv_size_affect_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
-
-    id = set.id
-
-    start_var = starpu_parse(Symbol(:start_, id))
-    start_decl = replace_pattern(affect_pattern, start_var, set.start)
-
-    index_var = starpu_parse(for_index_var)
-    index_decl = replace_pattern(decl_pattern, index_var)
-
-    if isa(set.step, StarpuExprValue)
-
-        stop_var = starpu_parse(Symbol(:stop_, id))
-        stop_decl = replace_pattern(affect_pattern, stop_var, set.stop)
-
-        return StarpuExpr[start_decl, stop_decl, index_decl]
-    end
-
-    step_var = starpu_parse(Symbol(:step_, id))
-    step_decl = replace_pattern(affect_pattern, step_var, set.step)
-
-    dim_var = starpu_parse(Symbol(:dim_, id))
-    dim_decl = replace_pattern(interv_size_affect_pattern, dim_var, start_var, step_var, set.stop)
-
-    iter_var = starpu_parse(Symbol(:iter_, id))
-    iter_decl = replace_pattern(decl_pattern, iter_var)
-
-
-    return StarpuExpr[start_decl, step_decl, dim_decl, iter_decl, index_decl]
-end
-
-
-function add_for_loop_declarations(expr :: StarpuExpr)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprFor)
-            return x
-        end
-
-        interval_decl = interval_evaluation_declarations(x.set, x.iter)
-
-        return StarpuExprFor(x.iter, x.set, x.body, x.is_independant, interval_decl)
-    end
-
-    return apply(func_to_apply, expr)
-end

+ 0 - 15
julia/src/Compiler/C/create_cpu_kernel.jl

@@ -1,15 +0,0 @@
-
-
-
-
-
-function transform_to_cpu_kernel(expr :: StarpuExprFunction)
-
-    output = add_for_loop_declarations(expr)
-    output = substitute_args(output)
-    output = substitute_func_calls(output)
-    output = substitute_indexing(output)
-    output = flatten_blocks(output)
-
-    return output
-end

+ 0 - 27
julia/src/Compiler/C/flatten_blocks.jl

@@ -1,27 +0,0 @@
-
-
-
-function flatten_blocks(expr :: StarpuExpr)
-
-    function func_to_run(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprBlock)
-            return x
-        end
-
-        instrs = StarpuExpr[]
-
-        for sub_expr in x.exprs
-
-            if isa(sub_expr, StarpuExprBlock)
-                push!(instrs, sub_expr.exprs...)
-            else
-                push!(instrs, sub_expr)
-            end
-        end
-
-        return StarpuExprBlock(instrs)
-    end
-
-    return apply(func_to_run, expr)
-end

+ 0 - 76
julia/src/Compiler/C/substitute_args.jl

@@ -1,76 +0,0 @@
-
-
-function substitute_argument_usage(expr :: StarpuExpr, arg_index, buffer_name :: Symbol, arg_name :: Symbol, ptr_name :: Symbol)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if x == StarpuExprVar(arg_name)
-            return StarpuExprVar(ptr_name)
-        end
-
-        if !(isa(x, StarpuExprCall) && x.func in keys(func_substitution))
-            return x
-        end
-
-        if (length(x.args) != 1)
-            error("Invalid arrity for function $(x.func)")
-        end
-
-        if (x.args[1] != StarpuExprVar(ptr_name))
-            return x
-        end
-
-        new_func = func_substitution[x.func]
-        new_arg = starpu_parse(:($buffer_name[$arg_index]))
-
-        return StarpuExprCall(new_func, [new_arg])
-    end
-
-    return apply(func_to_apply, expr)
-end
-
-
-
-function substitute_args(expr :: StarpuExprFunction)
-
-    new_body = expr.body
-    func_id = rand_string()
-    buffer_arg_name = Symbol("buffers_", func_id)
-    cl_arg_name = Symbol("cl_arg_", func_id)
-
-    function_start_affectations = StarpuExpr[]
-
-    for i in (1 : length(expr.args))
-
-        var_id = rand_string()
-        ptr = Symbol(:ptr_, var_id)
-
-        if (expr.args[i].typ <: Vector)
-            func_interface = :STARPU_VECTOR_GET_PTR
-
-        elseif (expr.args[i].typ <: Matrix)
-            func_interface = :STARPU_MATRIX_GET_PTR
-            ld_name = Symbol("ld_", var_id)
-            new_affect = starpu_parse( :($ld_name :: UInt32 = STARPU_MATRIX_GET_LD($buffer_arg_name[$i])) )
-            push!(function_start_affectations, new_affect)
-
-        else
-            error("Task arguments must be either vector or matrix (got $(expr.args[i].typ))") #TODO : cl_args, variable ?
-        end
-
-        type_in_arg = eltype(expr.args[i].typ)
-        new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$i])) )
-        push!(function_start_affectations, new_affect)
-
-        new_body = substitute_argument_usage(new_body, i, buffer_arg_name, expr.args[i].name, ptr)
-    end
-
-
-    new_args = [
-                    starpu_parse(:($buffer_arg_name :: Matrix{Void})),
-                    starpu_parse(:($cl_arg_name :: Vector{Void}))
-                ]
-    new_body = StarpuExprBlock([function_start_affectations..., new_body.exprs...])
-
-    return StarpuExprFunction(expr.ret_type, expr.func, new_args, new_body)
-end

+ 0 - 25
julia/src/Compiler/C/substitute_func_calls.jl

@@ -1,25 +0,0 @@
-
-
-
-func_substitution = Dict(
-    :width => :STARPU_MATRIX_GET_NY,
-    :height => :STARPU_MATRIX_GET_NX,
-
-    :length => :STARPU_VECTOR_GET_NX
-)
-
-
-
-function substitute_func_calls(expr :: StarpuExpr)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprCall) || !(x.func in keys(func_substitution))
-            return x
-        end
-
-        return StarpuExprCall(func_substitution[x.func], x.args)
-    end
-
-    return apply(func_to_apply, expr)
-end

+ 0 - 52
julia/src/Compiler/C/substitute_indexing.jl

@@ -1,52 +0,0 @@
-
-
-function substitute_indexing(expr :: StarpuExpr)
-
-    function func_to_run(x :: StarpuExpr)
-
-        if !isa(x, StarpuExprRef)
-            return x
-        end
-
-        if !isa(x.ref, StarpuExprVar)
-            error("Only variable indexing is allowed") #TODO allow more ?
-        end
-
-
-        nb_indexes = length(x.indexes)
-
-        if (nb_indexes >= 3)
-            error("Indexing with more than 2 indexes is not allowed") # TODO : blocks
-        end
-
-        if (nb_indexes == 0)
-            return x
-
-        elseif nb_indexes == 1
-            new_index = StarpuExprCall(:-, [x.indexes[1], StarpuExprValue(1)])  #TODO : add field "offset" from STARPU_VECTOR_GET interface
-                                                                            #TODO : detect when it is a matrix used with one index only
-            return StarpuExprRef(x.ref, [new_index])
-
-        elseif nb_indexes == 2
-
-            var_name = String(x.ref.name)
-
-            if !ismatch(r"ptr_", var_name) || isempty(var_name[5:end])
-                error("Invalid variable ($var_name) for multiple index dereferencing")
-            end
-
-            var_id = var_name[5:end]
-            ld_name = Symbol("ld_", var_id) # TODO : check if this variable is legit (var_name must refer to a matrix)
-
-            new_index = x.indexes[2]
-            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
-            new_index = StarpuExprCall(:(*), [new_index, StarpuExprVar(ld_name)])
-            new_index = StarpuExprCall(:(+), [x.indexes[1], new_index])
-            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
-
-            return StarpuExprRef(x.ref, [new_index])
-        end
-    end
-
-    return apply(func_to_run, expr)
-end

+ 0 - 179
julia/src/Compiler/Cuda/create_cuda_kernel.jl

@@ -1,179 +0,0 @@
-
-
-function is_indep_for_expr(x :: StarpuExpr)
-    return isa(x, StarpuExprFor) && x.is_independant
-end
-
-
-function extract_init_indep_finish(expr :: StarpuExpr) # TODO : it is not a correct extraction (example : if (cond) {@indep for ...} else {return} would not work)
-                                                            # better use apply() (NOTE :assert_no_indep_for already exists) to find recursively every for loops
-    init = StarpuExpr[]
-    finish = StarpuExpr[]
-
-    if is_indep_for_expr(expr)
-        return init, StarpuIndepFor(expr), finish
-    end
-
-    if !isa(expr, StarpuExprBlock)
-        return [expr], nothing, finish
-    end
-
-    for i in (1 : length(expr.exprs))
-
-        if !is_indep_for_expr(expr.exprs[i])
-            continue
-        end
-
-        init = expr.exprs[1 : i-1]
-        indep = StarpuIndepFor(expr.exprs[i])
-        finish = expr.exprs[i+1 : end]
-
-        if any(is_indep_for_expr, finish)
-            error("Sequence of several independant loops is not allowed") #same it may be tricked by a Block(Indep_for(...))
-        end
-
-        return init, indep, finish
-    end
-
-    return expr.exprs, nothing, finish
-end
-
-
-
-
-function analyse_variable_declarations(expr :: StarpuExpr, already_defined :: Vector{StarpuExprTypedVar} = StarpuExprTypedVar[])
-
-    undefined_variables = Symbol[]
-    defined_variable_names = map((x -> x.name), already_defined)
-    defined_variable_types = map((x -> x.typ), already_defined)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if isa(x, StarpuExprFunction)
-            error("No function declaration allowed in this section")
-        end
-
-        if isa(x, StarpuExprVar) || isa(x, StarpuExprTypedVar)
-
-            if !(x.name in defined_variable_names) && !(x.name in undefined_variables)
-                push!(undefined_variables, x.name)
-            end
-
-            return x
-        end
-
-        if isa(x, StarpuExprAffect) || isa(x, StarpuExprFor)
-
-            if isa(x, StarpuExprAffect)
-
-                var = x.var
-
-                if !isa(var, StarpuExprTypedVar)
-                    return x
-                end
-
-                name = var.name
-                typ = var.typ
-
-            else
-                name = x.iter
-                typ = Int64
-            end
-
-            if name in defined_variable_names
-                error("Multiple definition of variable $name")
-            end
-
-            filter!((sym -> sym != name), undefined_variables)
-            push!(defined_variable_names, name)
-            push!(defined_variable_types, typ)
-
-            return x
-        end
-
-        return x
-    end
-
-    apply(func_to_apply, expr)
-    defined_variable = map(StarpuExprTypedVar, defined_variable_names, defined_variable_types)
-
-    return defined_variable, undefined_variables
-end
-
-
-
-function find_variable(name :: Symbol, vars :: Vector{StarpuExprTypedVar})
-
-    for x in vars
-        if x.name == name
-            return x
-        end
-    end
-
-    return nothing
-end
-
-
-
-function add_device_to_interval_call(expr :: StarpuExpr)
-
-    function func_to_apply(x :: StarpuExpr)
-
-        if isa(x, StarpuExprCall) && x.func == :jlstarpu_interval_size
-            return StarpuExprCall(:jlstarpu_interval_size__device, x.args)
-        end
-
-        return x
-    end
-
-    return apply(func_to_apply, expr)
-end
-
-
-
-function transform_to_cuda_kernel(func :: StarpuExprFunction)
-
-    cpu_func = transform_to_cpu_kernel(func)
-
-    init, indep, finish = extract_init_indep_finish(cpu_func.body)
-
-    if indep == nothing
-        error("No independant for loop has been found") # TODO can fail because extraction is not correct yet
-    end
-
-    prekernel_instr, kernel_args, kernel_instr = analyse_sets(indep)
-
-    kernel_call = StarpuExprCudaCall(:cudaKernel, (@parse nblocks), (@parse THREADS_PER_BLOCK), StarpuExpr[])
-    prekernel_instr = vcat(init, prekernel_instr)
-    kernel_instr = vcat(kernel_instr, indep.body)
-
-    indep_for_def, indep_for_undef = analyse_variable_declarations(StarpuExprBlock(kernel_instr), kernel_args)
-    prekernel_def, prekernel_undef = analyse_variable_declarations(StarpuExprBlock(prekernel_instr), cpu_func.args)
-
-    for undef_var in indep_for_undef
-
-        found_var = find_variable(undef_var, prekernel_def)
-
-        if found_var == nothing # TODO : error then ?
-            continue
-        end
-
-        push!(kernel_args, found_var)
-    end
-
-    call_args = map((x -> StarpuExprVar(x.name)), kernel_args)
-    cuda_call = StarpuExprCudaCall(func.func, (@parse nblocks), (@parse THREADS_PER_BLOCK), call_args)
-    push!(prekernel_instr, cuda_call)
-    push!(prekernel_instr, @parse cudaStreamSynchronize(starpu_cuda_get_local_stream()))
-    prekernel_instr = vcat(prekernel_instr, finish)
-
-    prekernel_name = Symbol("CUDA_", func.func)
-    prekernel = StarpuExprFunction(Void, prekernel_name, cpu_func.args, StarpuExprBlock(prekernel_instr))
-    prekernel = flatten_blocks(prekernel)
-
-    kernel = StarpuExprFunction(Void, func.func, kernel_args, StarpuExprBlock(kernel_instr))
-    kernel = add_device_to_interval_call(kernel)
-    kernel = flatten_blocks(kernel)
-    
-    return prekernel, kernel
-end

+ 0 - 49
julia/src/Compiler/Cuda/indep_for.jl

@@ -1,49 +0,0 @@
-
-
-struct StarpuIndepFor
-
-    iters :: Vector{Symbol}
-    sets :: Vector{StarpuExprInterval}
-
-    body :: StarpuExpr
-end
-
-
-function assert_no_indep_for(expr :: StarpuExpr)
-
-    function func_to_run(x :: StarpuExpr)
-
-        if (isa(x, StarpuExprFor) && x.is_independant)
-            error("Invalid usage of intricated @indep for loops")
-        end
-
-        return x
-    end
-
-    return apply(func_to_run, expr)
-end
-
-
-function StarpuIndepFor(expr :: StarpuExprFor)
-
-    if !expr.is_independant
-        error("For expression must be prefixed by @indep")
-    end
-
-    iters = []
-    sets = []
-    for_loop = expr
-
-    while isa(for_loop, StarpuExprFor) && for_loop.is_independant
-
-        push!(iters, for_loop.iter)
-        push!(sets, for_loop.set)
-        for_loop = for_loop.body
-
-        while (isa(for_loop, StarpuExprBlock) && length(for_loop.exprs) == 1)
-            for_loop = for_loop.exprs[1]
-        end
-    end
-
-    return StarpuIndepFor(iters, sets, assert_no_indep_for(for_loop))
-end

+ 0 - 121
julia/src/Compiler/Cuda/indep_for_kernel_ids.jl

@@ -1,121 +0,0 @@
-
-
-function translate_index_code(dims :: Vector{StarpuExprVar})
-
-    ndims = length(dims)
-
-    if ndims == 0
-        error("No dimension specified")
-    end
-
-    prod = StarpuExprValue(1)
-    output = StarpuExpr[]
-    reversed_dim = reverse(dims)
-    thread_index_patern = @parse € :: Int64 = (€ / €) % €
-    thread_id = @parse THREAD_ID
-
-    for i in (1 : ndims)
-        index_lvalue = StarpuExprVar(Symbol(:kernel_ids__index_, ndims - i + 1))
-        expr = replace_pattern(thread_index_patern, index_lvalue, thread_id, prod, reversed_dim[i])
-        push!(output, expr)
-
-        prod = StarpuExprCall(:(*), [prod, reversed_dim[i]])
-    end
-
-    thread_id_pattern = @parse begin
-
-        € :: Int64 = blockIdx.x * blockDim.x + threadIdx.x
-
-        if (€ >= €)
-            return
-        end
-    end
-
-    bound_verif = replace_pattern(thread_id_pattern, thread_id, thread_id, prod)
-    push!(output, bound_verif)
-
-    return reverse(output)
-end
-
-
-
-
-
-
-
-function kernel_index_declarations(ind_for :: StarpuIndepFor)
-
-    pre_kernel_instr = StarpuExpr[]
-    kernel_args = StarpuExprTypedVar[]
-    kernel_instr = StarpuExpr[]
-
-    decl_pattern = @parse € :: Int64 = €
-    interv_size_decl_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
-    iter_pattern = @parse € :: Int64 = € + € * €
-
-    dims = StarpuExprVar[]
-    ker_instr_to_add_later_on = StarpuExpr[]
-
-    for k in (1 : length(ind_for.sets))
-
-        set = ind_for.sets[k]
-
-        start_var = starpu_parse(Symbol(:kernel_ids__start_, k))
-        start_decl = replace_pattern(decl_pattern, start_var, set.start)
-
-        step_var = starpu_parse(Symbol(:kernel_ids__step_, k))
-        step_decl = replace_pattern(decl_pattern, step_var, set.step)
-
-        dim_var = starpu_parse(Symbol(:kernel_ids__dim_, k))
-        dim_decl = replace_pattern(interv_size_decl_pattern, dim_var, start_var, step_var, set.stop)
-
-        push!(dims, dim_var)
-
-        push!(pre_kernel_instr, start_decl, step_decl, dim_decl)
-        push!(kernel_args, StarpuExprTypedVar(start_var.name, Int64))
-        push!(kernel_args, StarpuExprTypedVar(step_var.name, Int64))
-        push!(kernel_args, StarpuExprTypedVar(dim_var.name, Int64))
-
-        iter_var = starpu_parse(ind_for.iters[k])
-        index_var = starpu_parse(Symbol(:kernel_ids__index_, k))
-        iter_decl = replace_pattern(iter_pattern, iter_var, start_var, index_var, step_var)
-
-        push!(ker_instr_to_add_later_on, iter_decl)
-    end
-
-
-    return dims, ker_instr_to_add_later_on, pre_kernel_instr , kernel_args, kernel_instr
-end
-
-
-
-function analyse_sets(ind_for :: StarpuIndepFor)
-
-
-    decl_pattern = @parse € :: Int64 = €
-    nblocks_decl_pattern = @parse € :: Int64 = (€ + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK
-
-    dims, ker_instr_to_add, pre_kernel_instr, kernel_args, kernel_instr  = kernel_index_declarations(ind_for)
-
-    dim_prod = @parse 1
-
-    for d in dims
-        dim_prod = StarpuExprCall(:(*), [dim_prod, d])
-    end
-
-    nthreads_var = @parse nthreads
-    nthreads_decl = replace_pattern(decl_pattern, nthreads_var, dim_prod)
-    push!(pre_kernel_instr, nthreads_decl)
-
-    nblocks_var = @parse nblocks
-    nblocks_decl = replace_pattern(nblocks_decl_pattern, nblocks_var, nthreads_var)
-    push!(pre_kernel_instr, nblocks_decl)
-
-
-    index_decomposition = translate_index_code(dims)
-
-    push!(kernel_instr, index_decomposition...)
-    push!(kernel_instr, ker_instr_to_add...)
-
-    return pre_kernel_instr, kernel_args, kernel_instr
-end

+ 0 - 60
julia/src/Compiler/Expressions/affect.jl

@@ -1,60 +0,0 @@
-
-#======================================================
-                AFFECTATION
-======================================================#
-
-
-struct StarpuExprAffect <: StarpuExpr
-    var :: StarpuExpr
-    expr :: StarpuExpr
-end
-
-function starpu_parse_affect(x :: Expr)
-
-    if (x.head != :(=))
-        error("Invalid \"affectation\" expression")
-    end
-
-    var = starpu_parse(x.args[1])
-    expr = starpu_parse(x.args[2])
-
-    return StarpuExprAffect(var, expr)
-end
-
-
-function equals(x :: StarpuExprAffect, y :: StarpuExpr)
-
-    if typeof(y) != StarpuExprAffect
-        return false
-    end
-
-    return equals(x.var, y.var) && equals(x.expr, y.expr)
-end
-
-
-function print(io :: IO, x :: StarpuExprAffect ; indent = 0)
-
-    print(io, x.var, indent = indent)
-    print(io, " = ")
-
-    need_to_transtyp = isa(x.var, StarpuExprTypedVar) # transtyping to avoid warning (or errors for cuda) during compilation time
-
-    if need_to_transtyp
-        print(io, "(", starpu_type_traduction(x.var.typ), ") (")
-    end
-
-    print(io, x.expr, indent = indent)
-
-    if need_to_transtyp
-        print(io, ")")
-    end
-
-end
-
-function apply(func :: Function, expr :: StarpuExprAffect)
-
-    var = apply(func, expr.var)
-    new_expr = apply(func, expr.expr)
-
-    return func(StarpuExprAffect(var, new_expr))
-end

+ 0 - 68
julia/src/Compiler/Expressions/block.jl

@@ -1,68 +0,0 @@
-
-#======================================================
-                BLOCK
-(series of instruction, not C variable scoping block)
-======================================================#
-
-
-struct StarpuExprBlock <: StarpuExpr
-    exprs :: Vector{StarpuExpr}
-end
-
-
-function is_unwanted(x :: Symbol)
-    return false
-end
-
-function is_unwanted(x :: Expr)
-
-    if (x.head == :line)
-        return true
-    end
-
-    return false
-end
-
-
-function starpu_parse_block(x :: Expr)
-
-    if (x.head != :block)
-        error("Invalid \"block\" expression")
-    end
-
-    exprs = map(starpu_parse, filter(!is_unwanted, x.args))
-
-    #=for y in x.args
-
-        if (is_unwanted(y))
-            continue
-        end
-
-        push!(exprs, starpu_parse(y))
-    end
-    =#
-    #if (length(exprs) == 1)
-    #    return exprs[1]  #TODO : let 1 instruction blocks be a thing ?
-    #end
-
-    return StarpuExprBlock(exprs)
-end
-
-
-function print(io :: IO, x :: StarpuExprBlock ; indent = 0)
-    for i in (1 : length(x.exprs))
-        print(io, x.exprs[i], indent = indent)
-        print(io, ";")
-        if (i != length(x.exprs))
-            print_newline(io, indent)
-        end
-    end
-end
-
-
-
-
-function apply(func :: Function, expr :: StarpuExprBlock)
-
-    return func(StarpuExprBlock(map((x -> apply(func, x)), expr.exprs)))
-end

+ 0 - 75
julia/src/Compiler/Expressions/call.jl

@@ -1,75 +0,0 @@
-
-#======================================================
-                FUNCTION CALL
-======================================================#
-
-
-struct StarpuExprCall <: StarpuExpr
-    func :: Symbol
-    args :: Vector{StarpuExpr}
-end
-
-
-function starpu_parse_call(x :: Expr)
-
-    if (x.head != :call)
-        error("Invalid \"call\" expression")
-    end
-
-    func = starpu_parse(x.args[1])
-
-    if (!isa(func, StarpuExprVar))
-        error("Invalid \"call\" expression : function must be a variable")
-    end
-
-    args = map(starpu_parse, x.args[2:end])
-
-    return StarpuExprCall(func.name, args)
-end
-
-
-starpu_infix_operators = (:(+), :(*), :(-), :(/), :(<), :(>), :(<=), :(>=), :(%))
-
-
-function print_prefix(io :: IO, x :: StarpuExprCall ; indent = 0)
-
-    print(io, x.func, "(")
-
-    for i in (1 : length(x.args))
-        if (i != 1)
-            print(io, ", ")
-        end
-        print(io, x.args[i], indent = indent)
-    end
-
-    print(io, ")")
-end
-
-
-function print_infix(io :: IO, x :: StarpuExprCall ; indent = 0)
-    for i in (1 : length(x.args))
-        if (i != 1)
-            print(io, " ", x.func, " ")
-        end
-        print(io, "(")
-        print(io, x.args[i], indent = indent)
-        print(io, ")")
-    end
-end
-
-function print(io :: IO, x :: StarpuExprCall ; indent = 0)
-
-    if (length(x.args) >= 2 && x.func in starpu_infix_operators)
-        print_infix(io, x, indent = indent)
-    else
-        print_prefix(io, x, indent = indent)
-    end
-end
-
-
-
-
-function apply(func :: Function, expr :: StarpuExprCall)
-
-    return func(StarpuExprCall(expr.func, map((x -> apply(func, x)), expr.args)))
-end

+ 0 - 60
julia/src/Compiler/Expressions/cuda_call.jl

@@ -1,60 +0,0 @@
-
-
-#======================================================
-                CUDA KERNEL CALL
-======================================================#
-
-
-
-struct StarpuExprCudaCall <: StarpuExpr
-
-    ker_name :: Symbol
-
-    nblocks :: StarpuExpr
-    threads_per_block :: StarpuExpr
-
-    args :: Vector{StarpuExpr}
-
-end
-
-
-function print(io :: IO, expr :: StarpuExprCudaCall ; indent = 0)
-
-    print_newline(io, indent)
-    print(io, expr.ker_name)
-    print_newline(io, indent + starpu_indent_size)
-    print(io, "<<< ")
-    print(io, expr.nblocks, indent = indent + 2 * starpu_indent_size)
-    print(io, ", ")
-    print(io, expr.threads_per_block, indent = indent + 2 * starpu_indent_size)
-    print(io, ", 0, starpu_cuda_get_local_stream()")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, ">>> (")
-
-    for i in (1 : length(expr.args))
-
-        if (i != 1)
-            print(io, ", ")
-            if (i % 4 == 1)
-                print_newline(io, indent + 2 * starpu_indent_size + 1)
-            end
-        end
-
-        print(io, expr.args[i], indent = indent + 2 * starpu_indent_size)
-
-    end
-
-    print(io, ");")
-    print_newline(io, indent)
-
-end
-
-
-function apply(func :: Function, expr :: StarpuExprCudaCall)
-
-    nblocks = func(expr.nblocks)
-    threads_per_block = func(expr.threads_per_block)
-    args = map((x -> apply(func, x)), expr.args)
-
-    return StarpuExprCudaCall(expr.ker_name, nblocks, threads_per_block, args)
-end

+ 0 - 44
julia/src/Compiler/Expressions/field.jl

@@ -1,44 +0,0 @@
-
-
-#======================================================
-                STRUCTURE FIELDS
-======================================================#
-
-
-
-struct StarpuExprField <: StarpuExpr
-
-    left :: StarpuExpr
-    field :: Symbol
-
-    is_an_arrow :: Bool
-end
-
-
-function starpu_parse_field(x :: Expr)
-
-    if x.head != :(.) || length(x.args) != 2
-        error("Invalid parsing of dot expression")
-    end
-
-    left = starpu_parse(x.args[1])
-
-    if (!isa(x.args[2], QuoteNode) || !isa(x.args[2].value, Symbol))
-        error("Invalid parsing of dot expression")
-    end
-
-    return StarpuExprField(left, x.args[2].value, false)
-end
-
-
-function print(io :: IO, x :: StarpuExprField ; indent = 0)
-    print(io, "(")
-    print(io, x.left, indent = indent)
-    print(io, ")", x.is_an_arrow ? "->" : '.', x.field)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprField)
-    return func(StarpuExprField(func(expr.left), expr.field, expr.is_an_arrow))
-end

+ 0 - 100
julia/src/Compiler/Expressions/for.jl

@@ -1,100 +0,0 @@
-
-#======================================================
-                FOR LOOPS
-======================================================#
-
-
-struct StarpuExprFor <: StarpuExpr
-
-    iter :: Symbol
-    set:: StarpuExprInterval
-    body :: StarpuExpr
-
-    is_independant :: Bool
-    set_declarations :: Vector{StarpuExpr}
-
-end
-
-
-
-function starpu_parse_for(x :: Expr; is_independant = false)
-
-    if (x.head != :for)
-        error("Invalid \"for\" expression")
-    end
-
-    affect = x.args[1]
-
-    if (affect.head != :(=))
-        error("Invalid \"for\" iterator affectation")
-    end
-
-    iter = starpu_parse(affect.args[1])
-
-    if (!isa(iter, StarpuExprVar))
-        error("Invalid \"for\" iterator")
-    end
-
-    set = starpu_parse(affect.args[2])
-
-    if (!isa(set, StarpuExprInterval))
-        error("Set of values in \"for\" loop must be an interval")
-    end
-
-    body = starpu_parse(x.args[2])
-
-    return StarpuExprFor(iter.name, set, body, is_independant, StarpuExpr[])
-end
-
-
-
-
-
-function print(io :: IO, x :: StarpuExprFor ; indent = 0)
-
-    print_newline(io, indent)
-    print(io, StarpuExprBlock(x.set_declarations), indent = indent)
-
-    id = x.set.id
-
-    start = "start_" * id
-    stop = "stop_" * id
-    step = "step_" * id
-    dim = "dim_" * id
-    iter = "iter_" * id
-
-    print_newline(io, indent, 2)
-
-    if isa(x.set.step, StarpuExprValue)
-        print(io, "for ($(x.iter) = $start ; ")
-        comparison_op = (x.set.step.value >= 0) ? "<=" : ">="
-        print(io, "$(x.iter) $comparison_op $stop ; ")
-        print(io, "$(x.iter) += $(x.set.step.value))")
-
-    else
-        print(io, "for ($iter = 0, $(x.iter) = $start ; ")
-        print(io, "$iter < $dim ; ")
-        print(io, "$iter += 1, $(x.iter) += $step)")
-
-    end
-
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.body, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-    print_newline(io, indent)
-
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprFor)
-
-    set_declarations = map( (x -> apply(func, x)), expr.set_declarations)
-    set = apply(func, expr.set)
-    body = apply(func, expr.body)
-
-    return func(StarpuExprFor(expr.iter, set, body, expr.is_independant, set_declarations))
-end

+ 0 - 85
julia/src/Compiler/Expressions/function.jl

@@ -1,85 +0,0 @@
-
-
-#======================================================
-                FUNCTION DECLARATION
-======================================================#
-
-
-struct StarpuExprFunction <: StarpuExpr
-    ret_type :: Type
-    func :: Symbol
-    args :: Vector{StarpuExprTypedVar}
-    body :: StarpuExpr
-end
-
-
-function starpu_parse_function(x :: Expr)
-
-    if (x.head != :function)
-        error("Invalid \"function\" expression")
-    end
-
-    typed_decl = starpu_parse(x.args[1])
-
-    if (!isa(typed_decl, StarpuExprTypedExpr))
-        error("Invalid \"function\" prototype : a return type must me explicited")
-    end
-
-    prototype = typed_decl.expr
-
-    if (!isa(prototype, StarpuExprCall))
-        error("Invalid \"function\" prototype")
-    end
-
-    arg_list = StarpuExprTypedVar[]
-
-    for type_arg in prototype.args
-        if (!isa(type_arg, StarpuExprTypedVar))
-            error("Invalid \"function\" argument list")
-        end
-        push!(arg_list, type_arg)
-    end
-
-    body = starpu_parse(x.args[2])
-
-    return StarpuExprFunction(typed_decl.typ, prototype.func, arg_list, body)
-end
-
-
-
-function print(io :: IO, x :: StarpuExprFunction ; indent = 0)
-
-    print(io, starpu_type_traduction(x.ret_type), " ")
-    print(io, x.func, '(')
-
-    for i in (1 : length(x.args))
-
-        if (i != 1)
-            print(io, ", ")
-            if (i % 4 == 1)
-                print_newline(io, indent + starpu_indent_size + length(String(x.func)) + 13)
-            end
-        end
-
-        print(io, x.args[i], indent = indent + starpu_indent_size)
-    end
-
-    print(io, ")")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.body, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}\n\n")
-    print_newline(io, indent)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprFunction)
-
-    args = map((x -> apply(func, x)), expr.args)
-    body = apply(func, expr.body)
-
-    return func(StarpuExprFunction(expr.ret_type, expr.func, args, body))
-end

+ 0 - 94
julia/src/Compiler/Expressions/if.jl

@@ -1,94 +0,0 @@
-
-
-#======================================================
-                IF STATEMENT
-======================================================#
-
-
-
-struct StarpuExprIf <: StarpuExpr
-    cond :: StarpuExpr
-    then_statement :: StarpuExpr
-end
-
-
-struct StarpuExprIfElse <: StarpuExpr
-    cond :: StarpuExpr
-    then_statement :: StarpuExpr
-    else_statement :: StarpuExpr
-end
-
-
-function starpu_parse_if(x :: Expr)
-
-    if (x.head != :if)
-        error("Invalid \"if\" expression")
-    end
-
-    len = length(x.args)
-
-    if (len < 2)
-        error("Invalid \"if\" statement")
-    end
-
-    cond = starpu_parse(x.args[1])
-    then_statement = starpu_parse(x.args[2])
-
-    if (len == 2)
-        return StarpuExprIf(cond, then_statement)
-    end
-
-    else_statement = starpu_parse(x.args[3])
-
-    return StarpuExprIfElse(cond, then_statement, else_statement)
-end
-
-
-function print(io :: IO, x :: Union{StarpuExprIf, StarpuExprIfElse}; indent = 0)
-
-    print_newline(io, indent)
-    print(io, "if (")
-    print(io, x.cond, indent = indent + starpu_indent_size)
-    print(io, ")")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.then_statement, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-
-    if (!isa(x, StarpuExprIfElse))
-        return
-    end
-
-    print(io, " else")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.else_statement, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-    print_newline(io, indent)
-
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprIf)
-
-    cond = apply(func, expr.cond)
-    then_statement = apply(func, expr.then_statement)
-
-    return func(StarpuExprIf(cond, then_statement))
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprIfElse)
-
-    cond = apply(func, expr.cond)
-    then_statement = apply(func, expr.then_statement)
-    else_statement = apply(func, expr.else_statement)
-
-    return func(StarpuExprIfElse(cond, then_statement, else_statement))
-end

+ 0 - 48
julia/src/Compiler/Expressions/interval.jl

@@ -1,48 +0,0 @@
-
-#======================================================
-                INTERVALS
-======================================================#
-
-
-struct StarpuExprInterval <: StarpuExpr
-    start :: StarpuExpr
-    step :: StarpuExpr
-    stop :: StarpuExpr
-
-    id :: String
-
-    function StarpuExprInterval(start :: StarpuExpr, step :: StarpuExpr, stop :: StarpuExpr ; id :: String = rand_string())
-        return new(start, step, stop, id)
-    end
-
-end
-
-
-function starpu_parse_interval(x :: Expr)
-
-    if (x.head != :(:))
-        error("Invalid \"interval\" expression")
-    end
-
-    start = starpu_parse(x.args[1])
-    steop = starpu_parse(x.args[2])
-
-    if (length(x.args) == 2)
-        return StarpuExprInterval(start, StarpuExprValue(1), steop)
-    end
-
-    stop = starpu_parse(x.args[3])
-
-    return StarpuExprInterval(start, steop, stop)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprInterval)
-
-    start = apply(func, expr.start)
-    step = apply(func, expr.step)
-    stop = apply(func, expr.stop)
-
-    return func(StarpuExprInterval(start, step, stop, id = expr.id))
-end

+ 0 - 70
julia/src/Compiler/Expressions/ref.jl

@@ -1,70 +0,0 @@
-
-#======================================================
-                ARRAYS AND REFERENCES
-======================================================#
-
-
-struct StarpuExprRef <: StarpuExpr
-    ref :: StarpuExpr
-    indexes :: Vector{StarpuExpr}
-end
-
-
-function starpu_parse_ref(x :: Expr)
-
-    if (x.head != :ref)
-        error("Invalid \"reference\" expression")
-    end
-
-    ref = starpu_parse(x.args[1])
-    indexes = map(starpu_parse, x.args[2:end])
-
-    #=
-    StarpuExpr[]
-
-    for i in (2 : length(x.args))
-        push!(indexes, starpu_parse(x.args[i]))
-    end=#
-
-    return StarpuExprRef(ref, indexes)
-end
-
-
-
-function equals(x :: StarpuExprRef, y :: StarpuExpr)
-
-    if typeof(y) != StarpuExprRef
-        return false
-    end
-
-    if !equals(x.ref, y.ref) || length(x.indexes) != length(y.indexes)
-        return false
-    end
-
-    return all(map(equals, x.indexes, y.indexes))
-end
-
-
-
-
-function print(io :: IO, x :: StarpuExprRef ; indent = 0)
-
-    print(io, x.ref, indent = indent)
-
-    for i in (1 : length(x.indexes))
-        print(io, "[")
-        print(io, x.indexes[i], indent = indent)
-        print(io, "]")
-    end
-
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprRef)
-
-    ref = apply(func, expr.ref)
-    indexes = map((x -> apply(func, x)), expr.indexes)
-
-    return func(StarpuExprRef(ref, indexes))
-end

+ 0 - 33
julia/src/Compiler/Expressions/return.jl

@@ -1,33 +0,0 @@
-
-#======================================================
-                RETURN EXPRESSION
-======================================================#
-
-
-struct StarpuExprReturn <: StarpuExpr
-    value :: StarpuExpr
-end
-
-function starpu_parse_return(x :: Expr)
-
-    if (x.head != :return)
-        error("Invalid \"return\" expression")
-    end
-
-    value = starpu_parse(x.args[1])
-
-    return StarpuExprReturn(value)
-end
-
-
-function print(io :: IO, x :: StarpuExprReturn ; indent = 0)
-    print(io, "return ")
-    print(io, x.value, indent = indent)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprReturn)
-
-    return func(StarpuExprReturn(apply(func, expr.value)))
-end

+ 0 - 63
julia/src/Compiler/Expressions/simple_expressions.jl

@@ -1,63 +0,0 @@
-
-
-abstract type StarpuExpr end
-
-
-function apply(func :: Function, expr :: StarpuExpr)
-    return func(expr)
-end
-
-
-
-
-struct StarpuExprVar <: StarpuExpr
-    name :: Symbol
-end
-
-print(io :: IO, x :: StarpuExprVar ; indent = 0) = print(io, x.name)
-
-
-
-struct StarpuExprValue <: StarpuExpr
-    value :: Any
-end
-
-
-function print(io :: IO, x :: StarpuExprValue ; indent = 0)
-
-    value = x.value
-
-    if value == nothing
-        return
-    end
-
-    if isa(value, AbstractString)
-        print(io, '"', value, '"')
-        return
-    end
-
-    if isa(value, Char)
-        print(io, '\'', value, '\'')
-        return
-    end
-
-    print(io, value)
-end
-
-
-
-
-struct StarpuExprInvalid <: StarpuExpr
-end
-
-print(io :: IO, x :: StarpuExprInvalid ; indent = 0) = print(io, "INVALID")
-
-
-
-function starpu_parse(raw_value :: Any)
-    return StarpuExprValue(raw_value)
-end
-
-function starpu_parse(sym :: Symbol)
-    return StarpuExprVar(sym)
-end

+ 0 - 109
julia/src/Compiler/Expressions/typed.jl

@@ -1,109 +0,0 @@
-
-#======================================================
-                TYPED EXPRESSION
-======================================================#
-
-
-
-
-
-
-
-abstract type StarpuExprTyped <: StarpuExpr end
-
-
-struct StarpuExprTypedVar <: StarpuExprTyped
-    name :: Symbol
-    typ :: Type
-end
-
-struct StarpuExprTypedExpr <: StarpuExprTyped # TODO : remove typed expression ?
-    expr :: StarpuExpr
-    typ :: Type
-end
-
-
-function starpu_parse_typed(x :: Expr)
-
-    if (x.head != :(::))
-        error("Invalid type assigned expression")
-    end
-
-    expr = starpu_parse(x.args[1])
-    typ = nothing
-
-    try
-        typ = eval(x.args[2]) :: Type
-    catch
-        error("Invalid type in type assigned expression")
-    end
-
-    if (isa(expr, StarpuExprVar))
-        return StarpuExprTypedVar(expr.name, typ)
-    end
-
-    return StarpuExprTypedExpr(expr, typ)
-end
-
-
-
-
-
-starpu_type_traduction_dict = Dict(
-    Void => "void",
-    Int32 => "int32_t",
-    UInt32 => "uint32_t",
-    Float32 => "float",
-    Int64 => "int64_t",
-    UInt64 => "uint64_t",
-    Float64 => "double"
-)
-
-
-
-function starpu_type_traduction(x)
-
-    if x <: Array
-        return starpu_type_traduction_array(x)
-    end
-
-    if x <: Ptr
-        return starpu_type_traduction(eltype(x)) * "*"
-    end
-
-    return starpu_type_traduction_dict[x]
-
-end
-
-
-function starpu_type_traduction_array(x :: Type{Array{T,N}}) where {T,N}
-
-    output = starpu_type_traduction(T)
-
-    for i in (1 : N)
-        output *= "*"
-    end
-
-    return output
-end
-
-
-
-function print(io :: IO, x :: StarpuExprTyped ; indent = 0)
-
-    if (isa(x, StarpuExprTypedVar))
-        print(io, starpu_type_traduction(x.typ), " ")
-        print(io, x.name)
-    else
-        print(io, x.expr, indent = indent)
-    end
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprTypedExpr)
-
-    new_expr = apply(func, expr.expr)
-
-    return func(StarpuExprTypedExpr(new_expr, expr.typ))
-end

+ 0 - 53
julia/src/Compiler/Expressions/while.jl

@@ -1,53 +0,0 @@
-
-#======================================================
-                While loop
-======================================================#
-
-
-struct StarpuExprWhile <: StarpuExpr
-    cond :: StarpuExpr
-    body :: StarpuExpr
-end
-
-function starpu_parse_while(x :: Expr)
-
-    if (x.head != :while)
-        error("Invalid \"while\" loop")
-    end
-
-    len = length(x.args)
-
-    if (len < 2)
-        error("Invalid \"while\" loop")
-    end
-
-    cond = starpu_parse(x.args[1])
-    body = starpu_parse(x.args[2])
-
-    return StarpuExprWhile(cond, body)
-end
-
-
-function print(io :: IO, x :: StarpuExprWhile ; indent = 0)
-    print_newline(io, indent)
-    print(io, "while (")
-    print(io, x.cond, indent = indent + starpu_indent_size)
-    print(io, ")")
-    print_newline(io, indent)
-    print(io, "{")
-    print_newline(io, indent + starpu_indent_size)
-    print(io, x.body, indent = indent + starpu_indent_size)
-    print_newline(io, indent)
-    print(io, "}")
-    print_newline(io, indent)
-end
-
-
-
-function apply(func :: Function, expr :: StarpuExprWhile)
-
-    cond = apply(func, expr.cond)
-    body = apply(func, expr.body)
-
-    return func(StarpuExprWhile(cond, body))
-end

+ 0 - 69
julia/src/Compiler/Generate_files/c_files.jl

@@ -1,69 +0,0 @@
-
-
-
-global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
-
-const cpu_kernel_file_start = "#include <stdio.h>
-#include <stdint.h>
-#include <starpu.h>
-
-static inline long long jlstarpu_max(long long a, long long b)
-{
-	return (a > b) ? a : b;
-}
-
-static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
-{
-    if (stop >= start){
-            return jlstarpu_max(0, (stop - start + 1) / step);
-    } else {
-            return jlstarpu_max(0, (stop - start - 1) / step);
-    }
-}
-
-"
-
-
-"""
-	Opens a new C source file, where generated CPU kernels will be written
-"""
-function starpu_new_cpu_kernel_file(file_name :: String)
-
-    global generated_cpu_kernel_file_name = file_name
-
-    kernel_file = open(file_name, "w")
-    print(kernel_file, cpu_kernel_file_start)
-    close(kernel_file)
-
-    return nothing
-end
-
-
-"""
-	Executes the StarPU C compiler to the following function declaration.
-	If no call to starpu_new_cpu_kernel_file has been made before, it only
-	prints the reulting function. Otherwise, it writes into the source file
-	specified when starpu_new_cpu_kernel_file was called.
-"""
-macro cpu_kernel(x)
-
-    starpu_expr = transform_to_cpu_kernel(starpu_parse(x))
-
-    return quote
-
-        to_stdout = ($(esc(generated_cpu_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-			println("\nNo specified CPU kernel file to write into : writting to STDOUT instead\n")
-            kernel_file = STDOUT
-        else
-            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, $starpu_expr)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-    end
-end

+ 0 - 134
julia/src/Compiler/Generate_files/cuda_files.jl

@@ -1,134 +0,0 @@
-
-
-
-global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
-
-const cuda_kernel_file_start = "#include <stdio.h>
-#include <stdint.h>
-#include <starpu.h>
-
-#define THREADS_PER_BLOCK 64
-
-static inline long long jlstarpu_max(long long a, long long b)
-{
-	return (a > b) ? a : b;
-}
-
-static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
-{
-    if (stop >= start){
-            return jlstarpu_max(0, (stop - start + 1) / step);
-    } else {
-            return jlstarpu_max(0, (stop - start - 1) / step);
-    }
-}
-
-
-__device__ static inline long long jlstarpu_max__device(long long a, long long b)
-{
-	return (a > b) ? a : b;
-}
-
-__device__ static inline long long jlstarpu_interval_size__device(long long start, long long step, long long stop)
-{
-	if (stop >= start){
-		return jlstarpu_max__device(0, (stop - start + 1) / step);
-	} else {
-		return jlstarpu_max__device(0, (stop - start - 1) / step);
-	}
-}
-
-
-"
-
-"""
-	Opens a new Cuda source file, where generated GPU kernels will be written
-"""
-function starpu_new_cuda_kernel_file(file_name :: String)
-
-    global generated_cuda_kernel_file_name = file_name
-
-    kernel_file = open(file_name, "w")
-    print(kernel_file, cuda_kernel_file_start)
-    close(kernel_file)
-
-    return nothing
-end
-
-
-"""
-	Executes the StarPU Cuda compiler to the following function declaration.
-	If no call to starpu_new_cuda_kernel_file has been made before, it only
-	prints the reulting function. Otherwise, it writes into the source file
-	specified when starpu_new_cuda_kernel_file was called.
-"""
-macro cuda_kernel(x)
-
-    prekernel, kernel = transform_to_cuda_kernel(starpu_parse(x))
-
-    return quote
-
-        to_stdout = ($(esc(generated_cuda_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-			println("\nNo specified CUDA kernel file to write into : writting to STDOUT instead\n")
-            kernel_file = STDOUT
-        else
-            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, "__global__ ", $kernel)
-        print(kernel_file, "\nextern \"C\" ", $prekernel)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-    end
-end
-
-
-
-"""
-	Executes @cuda_kernel and @cpu_kernel
-"""
-macro cpu_cuda_kernel(x)
-
-	parsed = starpu_parse(x)
-	cpu_expr = transform_to_cpu_kernel(parsed)
-	prekernel, kernel = transform_to_cuda_kernel(parsed)
-
-	return quote
-
-		to_stdout = ($(esc(generated_cpu_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-            kernel_file = STDOUT
-			println("\nNo specified CPU kernel file to write into : writting to STDOUT instead\n")
-        else
-            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, $cpu_expr)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-
-
-		to_stdout = ($(esc(generated_cuda_kernel_file_name)) == "PRINT TO STDOUT")
-
-        if to_stdout
-            kernel_file = STDOUT
-			println("\nNo specified CUDA kernel file to write into : writting to STDOUT instead\n")
-        else
-            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "a+")
-        end
-
-        print(kernel_file, "__global__ ", $kernel)
-        print(kernel_file, "\nextern \"C\" ", $prekernel)
-
-        if (!to_stdout)
-            close(kernel_file)
-        end
-	end
-end

+ 0 - 54
julia/src/Compiler/Generate_files/so_files.jl

@@ -1,54 +0,0 @@
-
-
-"""
-	Compiles C source file opened by starpu_new_cpu_kernel_file
-    and filled by @cpu_kernel declarations.
-    Output file is a shared library which can be provided to starpu_init() in
-    order to find kernel.
-"""
-function compile_cpu_kernels(output_file :: String)
-
-    starpu_cflags = readstring(`pkg-config --cflags starpu-1.3`)[1:end-1]
-    starpu_libs = readstring(`pkg-config --libs starpu-1.3`)[1:end-1]
-    options = "-O3 -shared -fPIC"
-
-    system("gcc $generated_cpu_kernel_file_name $options $starpu_cflags $starpu_libs -o $output_file")
-
-    global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
-
-    return nothing
-end
-
-
-"""
-	Compiles Cuda source file opened by starpu_new_cuda_kernel_file
-    and filled by @cuda_kernel declarations.
-    Output file is a shared library which can be provided to starpu_init() in
-    order to find kernel.
-"""
-function compile_cuda_kernels(output_file :: String)
-
-    starpu_cflags = readstring(`pkg-config --cflags starpu-1.3`)[1:end-1]
-    starpu_libs = readstring(`pkg-config --libs starpu-1.3`)[1:end-1]
-    options = " -O3 --shared --compiler-options \'-fPIC\' "
-
-    system("nvcc $generated_cuda_kernel_file_name $options $starpu_cflags $starpu_libs -o $output_file")
-
-    global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
-
-    return nothing
-end
-
-
-"""
-    Combines several shared library into a new one.
-    Can be used to have both CPU and Cuda kernels (from compile_cpu_kernels
-    compile_cuda_kernels) accessible from the same library.
-"""
-function combine_kernel_files(output_file :: String, input_files :: Vector{String})
-
-    input_str = (*)(map((x -> x * " "), input_files)...)
-
-    system("gcc -shared -fPIC $input_str -o $output_file")
-
-end

+ 0 - 39
julia/src/Compiler/include.jl

@@ -1,39 +0,0 @@
-
-
-
-include("utils.jl")
-
-include("Expressions/simple_expressions.jl")
-include("Expressions/affect.jl")
-include("Expressions/block.jl")
-include("Expressions/call.jl")
-include("Expressions/cuda_call.jl")
-include("Expressions/field.jl")
-include("Expressions/interval.jl")
-include("Expressions/for.jl")
-include("Expressions/typed.jl")
-include("Expressions/function.jl")
-include("Expressions/if.jl")
-include("Expressions/ref.jl")
-include("Expressions/return.jl")
-include("Expressions/while.jl")
-
-include("parsing.jl")
-
-include("expression_manipulation.jl")
-
-include("C/substitute_args.jl")
-include("C/substitute_func_calls.jl")
-include("C/substitute_indexing.jl")
-include("C/add_for_loop_declarations.jl")
-include("C/flatten_blocks.jl")
-include("C/create_cpu_kernel.jl")
-
-include("Cuda/indep_for.jl")
-include("Cuda/indep_for_kernel_ids.jl")
-include("Cuda/create_cuda_kernel.jl")
-
-
-include("Generate_files/c_files.jl")
-include("Generate_files/cuda_files.jl")
-include("Generate_files/so_files.jl")

+ 0 - 146
julia/src/Wrapper/Julia/starpu_codelet.jl

@@ -1,146 +0,0 @@
-
-
-export StarpuDataAccessMode
-export STARPU_NONE,STARPU_R,STARPU_W,STARPU_RW, STARPU_SCRATCH
-export STARPU_REDUX,STARPU_COMMUTE, STARPU_SSEND, STARPU_LOCALITY
-export STARPU_ACCESS_MODE_MAX
-
-@enum(StarpuDataAccessMode,
-
-    STARPU_NONE = 0,
-    STARPU_R = (1 << 0),
-    STARPU_W = (1 << 1),
-    STARPU_RW = ((1 << 0) | (1 << 1)),
-    STARPU_SCRATCH = (1 << 2),
-    STARPU_REDUX = (1 << 3),
-    STARPU_COMMUTE = (1 << 4),
-    STARPU_SSEND = (1 << 5),
-    STARPU_LOCALITY = (1 << 6),
-    STARPU_ACCESS_MODE_MAX = (1 << 7)
-
-)
-
-
-export StarpuCodelet
-struct StarpuCodelet
-
-    where_to_execute :: UInt32
-
-    cpu_func :: String
-    gpu_func :: String
-
-    modes :: Vector{StarpuDataAccessMode}
-
-    perfmodel :: StarpuPerfmodel
-
-    c_codelet :: Ptr{Void}
-
-
-    function StarpuCodelet(;
-        cpu_func :: String = "",
-        gpu_func :: String = "",
-        modes :: Vector{StarpuDataAccessMode} = StarpuDataAccessMode[],
-        perfmodel :: StarpuPerfmodel = StarpuPerfmodel(),
-        where_to_execute :: Union{Void, UInt32} = nothing
-    )
-
-        if (length(modes) > STARPU_NMAXBUFS)
-            error("Codelet has too much buffers ($(length(modes)) but only $STARPU_NMAXBUFS are allowed)")
-        end
-
-        real_c_codelet_ptr = @starpucall jlstarpu_new_codelet Ptr{Void} ()
-        push!(jlstarpu_allocated_structures, real_c_codelet_ptr)
-
-        if (where_to_execute == nothing)
-            real_where = ((cpu_func != "") * STARPU_CPU) | ((gpu_func != "") * STARPU_CUDA)
-        else
-            real_where = where_to_execute
-        end
-
-        output = new(real_where, cpu_func, gpu_func, modes, perfmodel, real_c_codelet_ptr)
-
-        starpu_c_codelet_update(output)
-
-        return output
-    end
-end
-
-
-
-function starpu_c_codelet_update(cl :: StarpuCodelet)
-
-    translating_cl = StarpuCodeletTranslator(cl)
-
-    @starpucall(jlstarpu_codelet_update,
-                Void, (Ptr{Void}, Ptr{Void}),
-                Ref{StarpuCodeletTranslator}(translating_cl),
-                cl.c_codelet
-            )
-end
-
-
-
-function load_starpu_function_pointer(func_name :: String)
-
-    if (isempty(func_name))
-        return C_NULL
-    end
-
-    func_pointer = Libdl.dlsym(starpu_tasks_library_handle, func_name)
-
-    if (func_pointer == C_NULL)
-        error("Couldn't find function symbol $func_name into extern library file $starpu_tasks_library")
-    end
-
-    return func_pointer
-end
-
-
-
-mutable struct StarpuCodeletTranslator
-
-    where_to_execute :: UInt32
-
-    cpu_func :: Ptr{Void}
-    cpu_func_name :: Cstring
-
-    gpu_func :: Ptr{Void}
-
-    nbuffers :: Cint
-    modes :: Ptr{Void}
-
-    perfmodel :: Ptr{Void}
-
-
-
-    function StarpuCodeletTranslator(cl :: StarpuCodelet)
-
-        output = new()
-
-        if (iszero(cl.where_to_execute))
-            error("StarpuCodelet field \"where_to_execute\" is empty")
-        end
-
-        output.where_to_execute = cl.where_to_execute
-
-        cpu_func_ptr = load_starpu_function_pointer(cl.cpu_func)
-        gpu_func_ptr = load_starpu_function_pointer(cl.gpu_func)
-
-        if (cpu_func_ptr == C_NULL && gpu_func_ptr == C_NULL)
-            error("No function specified inside codelet")
-        end
-
-        output.cpu_func = cpu_func_ptr
-        output.cpu_func_name = Cstring_from_String(cl.cpu_func)
-
-        output.gpu_func = gpu_func_ptr
-
-        output.nbuffers = Cint(length(cl.modes))
-        output.modes = pointer(cl.modes)
-
-        output.perfmodel = cl.perfmodel.c_perfmodel
-
-        return output
-    end
-
-end

+ 0 - 234
julia/src/Wrapper/Julia/starpu_data_handle.jl

@@ -1,234 +0,0 @@
-
-
-STARPU_MAIN_RAM = 0 #TODO: ENUM
-
-
-const StarpuDataHandlePointer = Ptr{Void}
-
-
-
-StarpuDataHandle = StarpuDestructible{StarpuDataHandlePointer}
-
-
-
-function StarpuNewDataHandle(ptr :: StarpuDataHandlePointer, destr :: Function...) :: StarpuDataHandle
-    return StarpuDestructible(ptr, destr...)
-end
-
-
-
-function starpu_data_unregister_pointer(ptr :: StarpuDataHandlePointer)
-    @starpucall(starpu_data_unregister, Void, (Ptr{Void},), ptr)
-end
-
-
-export starpu_data_unregister
-function starpu_data_unregister(handles :: StarpuDataHandle...)
-    for h in handles
-        starpu_execute_destructor!(h, starpu_data_unregister_pointer)
-    end
-end
-
-
-
-export starpu_data_register
-
-function starpu_data_register(v :: Vector{T}) where T
-
-    output = Ref{Ptr{Void}}(0)
-    data_pointer = pointer(v)
-
-    @starpucall(starpu_vector_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void}, UInt32, Csize_t),
-                output, STARPU_MAIN_RAM, data_pointer,
-                length(v), sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer])
-end
-
-
-function starpu_data_register(m :: Matrix{T}) where T
-
-    output = Ref{Ptr{Void}}(0)
-    data_pointer = pointer(m)
-    (height, width) = size(m)
-
-    @starpucall(starpu_matrix_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void},
-                    UInt32, UInt32, UInt32, Csize_t),
-                output, STARPU_MAIN_RAM, data_pointer,
-                height, height, width, sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer])
-end
-
-
-function starpu_data_register(block :: Array{T,3}) where T
-
-    output = Ref{Ptr{Void}}(0)
-    data_pointer = pointer(block)
-    (height, width, depth) = size(block)
-
-    @starpucall(starpu_block_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void},
-                    UInt32, UInt32, UInt32, UInt32,
-                    UInt32, Csize_t),
-                output, STARPU_MAIN_RAM, data_pointer,
-                height, height * width,
-                height, width, depth,
-                sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)
-end
-
-
-
-function starpu_data_register(ref :: Ref{T}) where T
-
-    output = Ref{Ptr{Void}}(0)
-
-    @starpucall(starpu_variable_data_register,
-                Void,
-                (Ptr{Void}, Cint, Ptr{Void}, Csize_t),
-                output, STARPU_MAIN_RAM, ref, sizeof(T)
-            )
-
-    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)
-end
-
-
-
-function starpu_data_register(x1, x2, next_args...)
-
-    handle_1 = starpu_data_register(x1)
-    handle_2 = starpu_data_register(x2)
-
-    next_handles = map(starpu_data_register, next_args)
-
-    return [handle_1, handle_2, next_handles...]
-end
-
-
-
-
-export starpu_data_get_sub_data
-
-function starpu_data_get_sub_data(root_data :: StarpuDataHandle, id)
-
-    output = @starpucall(starpu_data_get_sub_data,
-                        Ptr{Void}, (Ptr{Void}, Cuint, Cuint),
-                        root_data.object, 1, id - 1
-                    )
-
-    return StarpuNewDataHandle(output)
-end
-
-
-function starpu_data_get_sub_data(root_data :: StarpuDataHandle, idx, idy)
-
-    output = @starpucall(starpu_data_get_sub_data,
-                        Ptr{Void}, (Ptr{Void}, Cuint, Cuint, Cuint),
-                        root_data.object, 2, idx - 1, idy - 1
-                    )
-
-    return StarpuNewDataHandle(output)
-end
-
-import Base.getindex
-
-
-
-function Base.getindex(handle :: StarpuDataHandle, indexes...)
-     starpu_data_get_sub_data(handle, indexes...)
- end
-
-
-
-
-export StarpuDataFilterFunc
-export STARPU_MATRIX_FILTER_VERTICAL_BLOCK, STARPU_MATRIX_FILTER_BLOCK
-
-@enum(StarpuDataFilterFunc,
-
-    STARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
-    STARPU_MATRIX_FILTER_BLOCK = 1
-)
-
-export StarpuDataFilter
-"""
-    TODO : use real function pointers loaded from starpu shared library
-"""
-mutable struct StarpuDataFilter
-
-    filter_func :: StarpuDataFilterFunc
-    nchildren :: Cuint
-
-    function StarpuDataFilter(filter_func, nchildren)
-        output = new()
-        output.filter_func = filter_func
-        output.nchildren = Cuint(nchildren)
-        return output
-    end
-
-end
-
-
-function starpu_data_unpartition_pointer(ptr :: StarpuDataHandlePointer)
-    @starpucall(starpu_data_unpartition, Void, (Ptr{Void}, Cuint), ptr, STARPU_MAIN_RAM)
-end
-
-export starpu_data_partition
-function starpu_data_partition(handle :: StarpuDataHandle, filter :: StarpuDataFilter)
-
-    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
-
-    @starpucall(jlstarpu_data_partition,
-            Void, (Ptr{Void}, Ptr{Void}),
-            handle.object, Ref{StarpuDataFilter}(filter)
-        )
-end
-
-
-export starpu_data_unpartition
-function starpu_data_unpartition(handles :: StarpuDataHandle...)
-
-    for h in handles
-        starpu_execute_destructor!(h, starpu_data_unpartition_pointer)
-    end
-
-    return nothing
-end
-
-
-
-export starpu_data_map_filters
-
-function starpu_data_map_filters(handle :: StarpuDataHandle, filter :: StarpuDataFilter)
-
-    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
-
-    @starpucall(jlstarpu_data_map_filters_1_arg,
-            Void, (Ptr{Void}, Ptr{Void}),
-            handle.object, Ref{StarpuDataFilter}(filter)
-    )
-end
-
-
-function starpu_data_map_filters(handle :: StarpuDataHandle, filter_1 :: StarpuDataFilter, filter_2 :: StarpuDataFilter)
-
-    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
-
-    @starpucall(jlstarpu_data_map_filters_2_arg,
-            Void, (Ptr{Void}, Ptr{Void}, Ptr{Void}),
-            handle.object,
-            Ref{StarpuDataFilter}(filter_1),
-            Ref{StarpuDataFilter}(filter_2)
-    )
-
-end

+ 0 - 49
julia/src/Wrapper/Julia/starpu_define.jl

@@ -1,49 +0,0 @@
-
-
-
-
-STARPU_MAXIMPLEMENTATIONS = 1 # TODO : These must be the same values as defined in C macros !
-STARPU_NMAXBUFS = 8 # TODO : find a way to make it automatically match
-
-
-STARPU_CPU = 1 << 1
-STARPU_CUDA = 1 << 3
-
-macro starpufunc(symbol)
-    :($symbol, "libjlstarpu_c_wrapper")
-end
-
-"""
-    Used to call a StarPU function compiled inside "libjlstarpu_c_wrapper.so"
-    Works as ccall function
-"""
-macro starpucall(func, ret_type, arg_types, args...)
-    return Expr(:call, :ccall, (func, "libjlstarpu_c_wrapper"), esc(ret_type), esc(arg_types), map(esc, args)...)
-end
-
-
-export @debugprint
-macro debugprint(x...)
-
-    expr = Expr(:call, :println, "\x1b[32m", map(esc, x)..., "\x1b[0m")
-
-    quote
-        $expr
-        flush(STDOUT)
-    end
-end
-
-
-
-function Cstring_from_String(str :: String)
-    return Cstring(pointer(str))
-end
-
-
-
-function jlstarpu_set_to_zero(x :: T) :: Ptr{Void} where {T}
-    @starpucall(memset,
-          Ptr{Void}, (Ptr{Void}, Cint, Csize_t),
-          Ref{T}(x), 0, sizeof(x)
-        )
-end

+ 0 - 125
julia/src/Wrapper/Julia/starpu_destructible.jl

@@ -1,125 +0,0 @@
-
-
-
-"""
-    Object used to store a lost of function which must
-    be applied to and object
-"""
-mutable struct StarpuDestructible{T}
-
-    object :: T
-    destructors :: LinkedList{Function}
-
-end
-
-starpu_block_list = Vector{LinkedList{StarpuDestructible}}()
-
-
-
-function StarpuDestructible(obj :: T, destructors :: Function...) where T
-
-    if (isempty(starpu_block_list))
-        error("Creation of a StarpuDestructible object while not beeing in a @starpu_block")
-    end
-
-    l = LinkedList{Function}()
-
-    for destr in destructors
-        add_to_tail!(l, destr)
-    end
-
-    output = StarpuDestructible{T}(obj, l)
-    add_to_head!(starpu_block_list[end], output)
-
-    return output
-end
-
-
-
-
-function starpu_enter_new_block()
-
-    push!(starpu_block_list, LinkedList{StarpuDestructible}())
-end
-
-"""
-    Applies every stored destructores to the StarpuDestructible stored object
-"""
-function starpu_destruct!(x :: StarpuDestructible)
-
-    for destr in x.destructors
-        destr(x.object)
-    end
-
-    empty!(x.destructors)
-
-    return nothing
-end
-
-
-function starpu_exit_block()
-
-    destr_list = pop!(starpu_block_list)
-
-    for x in destr_list
-        starpu_destruct!(x)
-    end
-
-end
-
-"""
-    Adds new destructors to the list of function. They will be executed before
-        already stored ones when calling starpu_destruct!
-"""
-function starpu_add_destructor!(x :: StarpuDestructible, destrs :: Function...)
-
-    for d in destrs
-        add_to_head!(x.destructors, d)
-    end
-
-    return nothing
-end
-
-"""
-    Removes detsructor without executing it
-"""
-function starpu_remove_destructor!(x :: StarpuDestructible, destr :: Function)
-
-    @foreach_asc x.destructors lnk begin
-
-        if (lnk.data == destr)
-            remove_link!(lnk)
-            break
-        end
-    end
-
-    return nothing
-end
-
-
-"""
-    Executes "destr" function. If it was one of the stored destructors, it
-    is removed.
-    This function can be used to allow user to execute a specific action manually
-        (ex : explicit call to starpu_data_unpartition() without unregistering)
-"""
-function starpu_execute_destructor!(x :: StarpuDestructible, destr :: Function)
-
-    starpu_remove_destructor!(x, destr)
-    return destr(x.object)
-end
-
-
-export @starpu_block
-
-"""
-    Declares a block of code. Every declared StarpuDestructible in this code
-    will execute its destructors on its object, once the block is exited
-"""
-macro starpu_block(expr)
-    quote
-        starpu_enter_new_block()
-        $(esc(expr))
-        starpu_exit_block()
-    end
-end

+ 0 - 20
julia/src/Wrapper/Julia/starpu_include.jl

@@ -1,20 +0,0 @@
-
-__precompile__()
-module StarPU
-
-
-    include("starpu_define.jl")
-    include("static_structures.jl")
-    include("starpu_simple_functions.jl")
-    include("starpu_perfmodel.jl")
-    include("starpu_codelet.jl")
-
-    include("linked_list.jl")
-    include("starpu_destructible.jl")
-    include("starpu_data_handle.jl")
-
-    include("starpu_task.jl")
-    include("starpu_task_submit.jl")
-    include("starpu_init_shutdown.jl")
-
-end

+ 0 - 35
julia/src/Wrapper/Julia/starpu_init_shutdown.jl

@@ -1,35 +0,0 @@
-
-export starpu_init
-
-"""
-    Must be called before any other starpu function. Field extern_task_path is the
-    shared library path which will be used to find StarpuCodelet
-    cpu and gpu function names
-"""
-function starpu_init(; extern_task_path = "")
-
-    if (!isempty(extern_task_path))
-        global starpu_tasks_library_handle = Libdl.dlopen(extern_task_path)
-    else
-        global starpu_tasks_library_handle = Libdl.dlopen("libjlstarpu_c_wrapper.so")
-    end
-
-    output = @starpucall jlstarpu_init Cint ()
-
-    starpu_enter_new_block()
-
-    return output
-end
-
-
-export starpu_shutdown
-
-"""
-    Must be called at the end of the program
-"""
-function starpu_shutdown()
-    starpu_exit_block()
-    @starpucall starpu_shutdown Void ()
-    jlstarpu_free_allocated_structures()
-    return nothing
-end

+ 0 - 90
julia/src/Wrapper/Julia/starpu_perfmodel.jl

@@ -1,90 +0,0 @@
-
-export StarpuPerfmodelType
-export STARPU_PERFMODEL_INVALID, STARPU_PER_ARCH, STARPU_COMMON
-export STARPU_HISTORY_BASED, STARPU_REGRESSION_BASED
-export STARPU_NL_REGRESSION_BASED, STARPU_MULTIPLE_REGRESSION_BASED
-
-@enum(StarpuPerfmodelType,
-    STARPU_PERFMODEL_INVALID = 0,
-	STARPU_PER_ARCH = 1,
-	STARPU_COMMON = 2,
-	STARPU_HISTORY_BASED = 3,
-	STARPU_REGRESSION_BASED = 4,
-	STARPU_NL_REGRESSION_BASED = 5,
-	STARPU_MULTIPLE_REGRESSION_BASED = 6
-)
-
-
-mutable struct StarpuPerfmodel_c
-
-    perf_type :: StarpuPerfmodelType
-
-    cost_function :: Ptr{Void}
-    arch_cost_function :: Ptr{Void}
-
-    size_base :: Ptr{Void}
-    footprint :: Ptr{Void}
-
-    symbol :: Cstring
-
-    is_loaded :: Cuint
-    benchmarking :: Cuint
-    is_init :: Cuint
-
-    parameters :: Ptr{Void}
-    parameters_names :: Ptr{Void}
-    nparameters :: Cuint
-    combinations :: Ptr{Void}
-    ncombinations :: Cuint
-
-    state :: Ptr{Void}
-
-
-    function StarpuPerfmodel_c()
-
-        output = new()
-        jlstarpu_set_to_zero(output)
-
-        return output
-    end
-
-end
-
-
-
-export StarpuPerfmodel
-struct StarpuPerfmodel
-
-    perf_type :: StarpuPerfmodelType
-    symbol :: String
-
-    c_perfmodel :: Ptr{StarpuPerfmodel_c}
-end
-
-
-
-
-function StarpuPerfmodel(; perf_type = STARPU_PERFMODEL_INVALID, symbol = "")
-
-    if (perf_type == STARPU_PERFMODEL_INVALID)
-        return StarpuPerfmodel(perf_type, symbol, Ptr{StarpuPerfmodel_c}(C_NULL))
-    end
-
-    if (isempty(symbol))
-        error("Field \"symbol\" can't be empty when creating a StarpuPerfmodel")
-    end
-
-    c_perfmodel = StarpuPerfmodel_c()
-    c_perfmodel.perf_type = perf_type
-    c_perfmodel.symbol = Cstring_from_String(symbol)
-
-    c_perfmodel_ptr = jlstarpu_allocate_and_store(c_perfmodel)
-
-    return StarpuPerfmodel(perf_type, symbol, c_perfmodel_ptr)
-end
-
-
-function show_c_perfmodel(x :: StarpuPerfmodel)
-    x_c = unsafe_load(x.c_perfmodel)
-    println(x_c)
-end

+ 0 - 28
julia/src/Wrapper/Julia/starpu_simple_functions.jl

@@ -1,28 +0,0 @@
-
-"""
-    Declares a Julia function wich is just calling the StarPU function
-    having the same name.
-"""
-macro starpu_noparam_function(func_name, ret_type)
-
-    func = Symbol(func_name)
-
-    quote
-        export $func
-        global $func() = ccall(($func_name, "libjlstarpu_c_wrapper"),
-                                $ret_type, ()) :: $ret_type
-    end
-end
-
-
-global starpu_tasks_library_handle = C_NULL
-
-
-
-@starpu_noparam_function "starpu_is_initialized" Cint
-
-
-
-@starpu_noparam_function "starpu_cublas_init" Void
-@starpu_noparam_function "starpu_cublas_set_stream" Void
-@starpu_noparam_function "starpu_cublas_shutdown" Void

+ 0 - 184
julia/src/Wrapper/Julia/starpu_task.jl

@@ -1,184 +0,0 @@
-
-export StarpuTask
-mutable struct StarpuTask
-
-    cl :: StarpuCodelet
-    handles :: Vector{StarpuDataHandle}
-    handle_pointers :: Vector{StarpuDataHandlePointer}
-    synchronous :: Bool
-    cl_arg :: Union{Ref, Void}
-
-    c_task :: Ptr{Void}
-
-
-    """
-        StarpuTask(; cl :: StarpuCodelet, handles :: Vector{StarpuDataHandle}, cl_arg :: Ref)
-
-        Creates a new task which will run the specified codelet on handle buffers and cl_args data
-    """
-    function StarpuTask(; cl :: Union{Void, StarpuCodelet} = nothing, handles :: Vector{StarpuDataHandle} = StarpuDataHandle[], cl_arg :: Union{Ref, Void} = nothing)
-
-        if (cl == nothing)
-            error("\"cl\" field can't be empty when creating a StarpuTask")
-        end
-
-        output = new()
-
-        output.cl = cl
-        output.handles = handles
-        output.cl_arg = cl_arg
-
-        output.synchronous = false
-        output.handle_pointers = StarpuDataHandlePointer[]
-
-        c_task = @starpucall starpu_task_create Ptr{Void} ()
-
-        if (c_task == C_NULL)
-            error("Couldn't create new task: starpu_task_create() returned NULL")
-        end
-
-        output.c_task = c_task
-
-        starpu_c_task_update(output)
-
-        return output
-    end
-
-end
-
-
-"""
-    Updates fields of the real C structures stored at "c_task" field
-"""
-function starpu_c_task_update(task :: StarpuTask)
-
-    task_translator = StarpuTaskTranslator(task)
-
-    @starpucall(jlstarpu_task_update,
-                Void, (Ptr{Void}, Ptr{Void}),
-                Ref{StarpuTaskTranslator}(task_translator),
-                task.c_task
-            )
-end
-
-
-"""
-    Structure used to update fields of the real C task structure 
-"""
-mutable struct StarpuTaskTranslator
-
-    cl :: Ptr{Void}
-    handles :: Ptr{Void}
-    synchronous :: Cuint
-
-    cl_arg :: Ptr{Void}
-    cl_arg_size :: Csize_t
-
-    function StarpuTaskTranslator(task :: StarpuTask)
-
-        output = new()
-
-        output.cl = task.cl.c_codelet
-
-        task.handle_pointers = map((x -> x.object), task.handles)
-        output.handles = pointer(task.handle_pointers)
-        output.synchronous = Cuint(task.synchronous)
-
-        if (task.cl_arg == nothing)
-            output.cl_arg = C_NULL
-            output.cl_arg_size = 0
-        else
-            output.cl_arg = pointer_from_objref(task.cl_arg) #TODO : Libc.malloc and cl_arg_free set to 1 ? but it should be done only when submitting
-            output.cl_arg_size = sizeof(eltype(task.cl_arg))
-        end
-
-        return output
-    end
-
-end
-
-
-export StarpuTag
-const StarpuTag = UInt64
-
-
-export starpu_tag_declare_deps
-function starpu_tag_declare_deps(id :: StarpuTag, dep :: StarpuTag, other_deps :: StarpuTag...)
-
-    v = [dep, other_deps...]
-
-    @starpucall(starpu_tag_declare_deps_array,
-                Void, (StarpuTag, Cuint, Ptr{StarpuTag}),
-                id, length(v), pointer(v)
-        )
-end
-
-
-export starpu_iteration_push
-function starpu_iteration_push(iteration)
-
-    @starpucall(starpu_iteration_push,
-                Void, (Culong,), iteration
-        )
-end
-
-
-export starpu_iteration_pop
-function starpu_iteration_pop()
-    @starpucall starpu_iteration_pop Void ()
-end
-
-
-export starpu_tag_wait
-function starpu_tag_wait(id :: StarpuTag)
-    @starpucall starpu_tag_wait Cint (StarpuTag,) id
-end
-
-
-function starpu_tag_wait(ids :: Vector{StarpuTag})
-
-    @starpucall(starpustarpu_tag_wait_array,
-                Cint, (Cuint, Ptr{StarpuTag}),
-                length(ids), pointer(ids)
-        )
-end
-
-
-export starpu_task_destroy
-function starpu_task_destroy(task :: StarpuTask)
-    @starpucall starpu_task_destroy Void (Ptr{Void},) task.c_task
-end
-
-
-export starpu_task_wait_for_n_submitted
-
-"""
-    Block until there are n submitted tasks left (to the current context or the global one if there is no current context) to
-    be executed. It does not destroy these tasks.
-"""
-function starpu_task_wait_for_n_submitted(n)
-    @starpucall starpu_task_wait_for_n_submitted Cint (Cuint,) n
-end
-
-
-export starpu_task_declare_deps
-
-"""
-    starpu_task_declare_deps(task :: StarpuTask, dep :: StarpuTask [, other_deps :: StarpuTask...])
-
-    Declare task dependencies between a task and the following provided ones. This function must be called
-    prior to the submission of the task, but it may called after the submission or the execution of the tasks in the array,
-    provided the tasks are still valid (i.e. they were not automatically destroyed). Calling this function on a task that was
-    already submitted or with an entry of task_array that is no longer a valid task results in an undefined behaviour.
-"""
-function starpu_task_declare_deps(task :: StarpuTask, dep :: StarpuTask, other_deps :: StarpuTask...)
-
-    task_array = [dep.c_task, map((t -> t.c_task), other_deps)...]
-
-    @starpucall(starpu_task_declare_deps_array,
-                Void, (Ptr{Void}, Cuint, Ptr{Void}),
-                task.c_task,
-                length(task_array),
-                pointer(task_array)
-            )
-end

+ 0 - 72
julia/src/Wrapper/Julia/starpu_task_submit.jl

@@ -1,72 +0,0 @@
-
-
-
-export starpu_task_submit
-
-"""
-    Launches task execution, if "synchronous" task field is set to "false", call
-    returns immediately
-"""
-function starpu_task_submit(task :: StarpuTask)
-
-    if (length(task.handles) != length(task.cl.modes))
-        error("Invalid number of handles for task : $(length(task.handles)) where given while codelet has $(output.cl.nbuffers) modes")
-    end
-
-    starpu_c_task_update(task)
-
-    @starpucall starpu_task_submit Cint (Ptr{Void},) task.c_task
-end
-
-
-export @starpu_async_cl
-
-"""
-    Creates and submits an asynchronous task running cl Codelet function.
-    Ex : @starpu_async_cl cl(handle1, handle2)
-"""
-macro starpu_async_cl(expr)
-
-    if (!isa(expr, Expr) || expr.head != :call)
-        error("Invalid task submit syntax")
-    end
-
-    cl = expr.args[1]
-    handles = Expr(:vect, expr.args[2:end]...)
-
-    quote
-        task = StarpuTask(cl = $(esc(cl)), handles = $(esc(handles)))
-        starpu_task_submit(task)
-    end
-end
-
-
-export starpu_task_wait_for_all
-"""
-    Blocks until every submitted task has finished.
-"""
-function starpu_task_wait_for_all()
-    @threadcall(@starpufunc(:starpu_task_wait_for_all),
-                          Cint, ())
-end
-
-
-export @starpu_sync_tasks
-
-"""
-    Blocks until every submitted task has finished.
-    Ex : @starpu_sync_tasks begin
-                [...]
-                starpu_task_submit(task)
-                [...]
-        end
-
-    TODO : Make the macro only wait for tasks declared inside the following expression.
-            (similar mechanism as @starpu_block)
-"""
-macro starpu_sync_tasks(expr)
-    quote
-        $(esc(expr))
-        starpu_task_wait_for_all()
-    end
-end

+ 0 - 35
julia/src/Wrapper/Julia/static_structures.jl

@@ -1,35 +0,0 @@
-
-
-
-const jlstarpu_allocated_structures = Vector{Ptr{Void}}([])
-
-
-"""
-    Copies x_c to a new allocated memory zone.
-    Returns the pointer toward the copied object. Every pointer
-    returned by this function will be freed after a call to
-    jlstarpu_free_allocated_structures
-"""
-function jlstarpu_allocate_and_store(x_c :: T) where {T}
-
-    allocated_ptr = Ptr{T}(Libc.malloc(sizeof(T)))
-
-    if (allocated_ptr == C_NULL)
-        error("Base.Libc.malloc returned NULL")
-    end
-
-    unsafe_store!(allocated_ptr, x_c)
-    push!(jlstarpu_allocated_structures, Ptr{Void}(allocated_ptr))
-
-    return allocated_ptr
-end
-
-
-"""
-    Frees every pointer allocated by jlstarpu_allocate_and_store
-"""
-function jlstarpu_free_allocated_structures()
-    map(Libc.free, jlstarpu_allocated_structures)
-    empty!(jlstarpu_allocated_structures)
-    return nothing
-end

+ 0 - 68
julia/tst/Makefile.mk

@@ -1,68 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2018-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
-# Copyright (C) 2018       Alexis Juven
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-#
-
-
-
-
-
-
-CC = gcc
-CFLAGS += $(shell pkg-config --cflags starpu-1.3)
-LDFLAGS += $(shell pkg-config --libs starpu-1.3)
-
-
-all: libjlstarpu_c_wrapper.so build/mult build/extern_tasks.so build/generated_tasks.so
-
-
-
-libjlstarpu_c_wrapper.so: ../src/Wrapper/C/jlstarpu_task_submit.c ../src/Wrapper/C/jlstarpu_simple_functions.c ../src/Wrapper/C/jlstarpu_data_handles.c
-	$(CC) -O3 -shared -fPIC $(CFLAGS) $^ -o $@ $(LDFLAGS)
-
-
-
-build/mult: mult.c build/cpu_mult.o build/gpu_mult.o
-	$(CC) $(CFLAGS) -O3 $^ -o $@ $(LDFLAGS)	
-
-build/gpu_mult.o: gpu_mult.cu
-	nvcc -c -O3 $(CFLAGS) $^ -o $@
-
-build/cpu_mult.o: cpu_mult.c
-	$(CC) -c $(CFLAGS) -O3 $^ -o $@ $(LDFLAGS)
-
-
-
-
-build/extern_tasks.so: build/cpu_mult.so build/gpu_mult.so
-	gcc -shared -fPIC $^ -o $@
-
-build/cpu_mult.so: cpu_mult.c
-	$(CC) -O3 -shared -fPIC $(CFLAGS) $^ -o $@ $(LDFLAGS)
-
-build/gpu_mult.so: gpu_mult.cu
-	nvcc -O3 $(CFLAGS) $^ --shared --compiler-options '-fPIC' -o $@
-
-
-
-
-build/generated_tasks.so: cpu_cuda_mult.jl
-	julia $^
-
-
-
-
-clean:
-	rm build/* libjlstarpu_c_wrapper.so

+ 0 - 41
julia/tst/README

@@ -1,41 +0,0 @@
-
-
-(Rename Makefile.mk to Makefile)
-
-Command to compile tests and libjlstarpu_c_wrapper.co file, needed in current directory to launch starPU from Julia, and also run cpu_cuda_mult.jl, a Julia file which uses C/Cuda compiler in order to generate kernels from Julia code :
-	 
-	make		
-
-
-
-Then you can run julia matrix bloc multiplication tests. These tests run StarPU multiplication aglorithm on squared matrix of several sizes, and displays, for each size of matrix, the average execution time (format : "width ; time"):
-
-     "julia test_file start_dim step_dim stop_dim nb_tests nslicesx nslicesy"
-
-     - test_file : one of the julia files used for test. Can be 
-     	 - "mult_extern.jl" to make Julia code run, but using C and Cuda written Kernel.
-	 - "mult_generated.jl" to make Julia code run, with compiled C and Cuda kernels from Julia code
-
-	 - start_dim, step_dim, stop_dim : The test will run with squared matrices with a side length tarting ti start_dim, and increasing of step_dim until reaching stop_dim.      	 
-
-	 - nb_tests : number of tests for each matrix size. The median value obtained is displayed.
-
-	 - nslicesx, nslicesy : Matrix will be cut in nclicesx * nscliesy  slices, one StarPU task per slice will run in each test.
-
-	 Example : julia mult_generated.jl 64 64 256 10 4 4
-
-	 You can define variable sin environnemnt to make starPu use proper cheduling polycy.
-	 Example : STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia mult_generated.jl 64 64 256 10 4 4
-	 
-
-
-You can run the same test with same arguments, but with the C/Cuda version:
-
-    STARPU_SCHED=dmda STARPU_CALIBRATE=1 build/mult 64 64 256 10 4 4
-
-
-
-
-
-
-

+ 0 - 241
julia/tst/black_scholes/black_scholes.c

@@ -1,241 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2019       Mael Keryell
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-#include <stdlib.h>
-#include <stdio.h>
-#include <starpu.h>
-#include <math.h>
-#include "../includes/sorting.h"
-
-
-
-void cpu_black_scholes(void **, void *);
-void gpu_black_scholes(void **, void *);
-
-static struct starpu_codelet cl =
-{
-	.cpu_funcs = {cpu_black_scholes},
-	.cuda_funcs = {gpu_black_scholes},
-	.nbuffers = 7,
-	.modes = {STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_W, STARPU_W}
-};
-
-void black_scholes_with_starpu(double *S, double *K, double *R, double *T, double *sig, double *call_res, double *put_res, unsigned nbr_data, unsigned nslices)
-{
-	starpu_data_handle_t S_handle, K_handle, R_handle, T_handle, SIG_handle, CRES_handle, PRES_handle;
-	
-
-	starpu_vector_data_register(&S_handle, STARPU_MAIN_RAM, (uintptr_t)S, nbr_data, sizeof(double));	
-	starpu_vector_data_register(&K_handle, STARPU_MAIN_RAM, (uintptr_t)K, nbr_data, sizeof(double));
-	starpu_vector_data_register(&R_handle, STARPU_MAIN_RAM, (uintptr_t)R, nbr_data, sizeof(double));
-	starpu_vector_data_register(&T_handle, STARPU_MAIN_RAM, (uintptr_t)T, nbr_data, sizeof(double));
-	starpu_vector_data_register(&SIG_handle, STARPU_MAIN_RAM, (uintptr_t)sig, nbr_data, sizeof(double));
-	starpu_vector_data_register(&CRES_handle, STARPU_MAIN_RAM, (uintptr_t)call_res, nbr_data, sizeof(double));
-	starpu_vector_data_register(&PRES_handle, STARPU_MAIN_RAM, (uintptr_t)put_res, nbr_data, sizeof(double));
-
-	struct starpu_data_filter f =
-	{
-		.filter_func = starpu_vector_filter_block,
-		.nchildren = nslices
-	};
-	/* printf("%f %f\n", nslices, nbr_data); */
-
-	starpu_data_partition(S_handle, &f);
-	starpu_data_partition(K_handle, &f);
-	starpu_data_partition(R_handle, &f);
-	starpu_data_partition(T_handle, &f);
-	starpu_data_partition(SIG_handle, &f);
-	starpu_data_partition(CRES_handle, &f);
-	starpu_data_partition(PRES_handle, &f);
-	
-	unsigned taskid;
-
-	for (taskid = 0; taskid < nslices; taskid++){
-
-		struct starpu_task *task = starpu_task_create();
-
-		task->cl = &cl;
-		task->handles[0] = starpu_data_get_sub_data(S_handle, 1, taskid);
-		task->handles[1] = starpu_data_get_sub_data(K_handle, 1, taskid);
-		task->handles[2] = starpu_data_get_sub_data(R_handle, 1, taskid);
-		task->handles[3] = starpu_data_get_sub_data(T_handle, 1, taskid);
-		task->handles[4] = starpu_data_get_sub_data(SIG_handle, 1, taskid);
-		task->handles[5] = starpu_data_get_sub_data(CRES_handle, 1, taskid);
-		task->handles[6] = starpu_data_get_sub_data(PRES_handle, 1, taskid);
-		
-		starpu_task_submit(task);
-
-	}
-
-	starpu_task_wait_for_all();
-
-	starpu_data_unpartition(S_handle, STARPU_MAIN_RAM);
-	starpu_data_unpartition(K_handle, STARPU_MAIN_RAM);
-	starpu_data_unpartition(R_handle, STARPU_MAIN_RAM);
-	starpu_data_unpartition(T_handle, STARPU_MAIN_RAM);
-	starpu_data_unpartition(SIG_handle, STARPU_MAIN_RAM);
-	starpu_data_unpartition(CRES_handle, STARPU_MAIN_RAM);
-	starpu_data_unpartition(PRES_handle, STARPU_MAIN_RAM);
-
-	starpu_data_unregister(S_handle);
-	starpu_data_unregister(K_handle);
-	starpu_data_unregister(R_handle);
-	starpu_data_unregister(T_handle);
-	starpu_data_unregister(SIG_handle);
-	starpu_data_unregister(CRES_handle);
-	starpu_data_unregister(PRES_handle);
-	
-
-
-}
-
-static void init_S(double *S, unsigned nbr_data)
-{
-	unsigned i;
-	for (i = 0; i < nbr_data; i++){
-		S[i] = 100. * rand() / (double) RAND_MAX;
-	}
-}
-
-static void init_K(double *K, unsigned nbr_data)
-{
-	unsigned i;
-	for (i = 0; i < nbr_data; i++){
-		K[i] = 100. * rand() / (double) RAND_MAX;
-	}
-}
-
-static void init_R(double *R, unsigned nbr_data)
-{
-	unsigned i;
-	for (i = 0; i < nbr_data; i++){
-		R[i] = rand() / (double) RAND_MAX;
-	}
-}
-
-static void init_T(double *T, unsigned nbr_data)
-{
-	unsigned i;
-	for (i = 0; i < nbr_data; i++){
-		T[i] = 10. * rand() / (double) RAND_MAX;
-	}
-}
-
-static void init_sig(double *sig, unsigned nbr_data)
-{
-	unsigned i;
-	for (i = 0; i < nbr_data; i++){
-		sig[i] = 10. * rand() / (double) RAND_MAX;
-	}
-}
-
-
-double median_time(unsigned nbr_data, unsigned nslices, unsigned nbr_tests)
-{
-	double exec_times[nbr_tests];
-	
-	double *S = malloc(nbr_data * sizeof(double));
-	double *K = malloc(nbr_data * sizeof(double));
-	double *R = malloc(nbr_data * sizeof(double));
-	double *T = malloc(nbr_data * sizeof(double));
-	double *sig = malloc(nbr_data * sizeof(double));
-
-	double *call_res = calloc(nbr_data, sizeof(double));
-	double *put_res = calloc(nbr_data, sizeof(double));
-
-	double start, stop;
-	unsigned i;
-	for (i = 0; i < nbr_tests; i++){
-
-		init_S(S,nbr_data);
-		init_K(K,nbr_data);
-		init_R(R,nbr_data);
-		init_T(T,nbr_data);
-		init_sig(sig,nbr_data);
-
-		/* S[0] = 100.; */
-		/* K[0] = 100.; */
-		/* R[0] = 0.05; */
-		/* T[0] = 1.0; */
-		/* sig[0] = 0.2; */
-		
-		start = starpu_timing_now();
-		black_scholes_with_starpu(S, K, R, T, sig, call_res, put_res, nbr_data, nslices);
-		stop = starpu_timing_now();
-	
-		exec_times[i] = (stop - start) / 1.e6;
-	}
-
-	/* printf("%f %f\n", call_res[0], put_res[0]); */
-
-	free(S);
-	free(K);
-	free(R);
-	free(T);
-	free(sig);
-	free(call_res);
-	free(put_res);
-
-	quicksort(exec_times, 0, nbr_tests - 1);
-
-	
-	return exec_times[nbr_tests/2];
-}
-	
-
-
-void display_times(unsigned start_nbr, unsigned step_nbr, unsigned stop_nbr, unsigned nslices, unsigned nbr_tests)
-{
-	FILE *myfile;
-
-	myfile = fopen("DAT/black_scholes_c_times.dat", "w");
-
-	unsigned nbr_data;
-
-	for (nbr_data = start_nbr; nbr_data <= stop_nbr; nbr_data += step_nbr){
-		double t = median_time(nbr_data, nslices, nbr_tests);
-		printf("nbr_data:\n%u\nTime:\n%f\n", nbr_data, t);
-		fprintf(myfile, "%f\n", t);
-	}
-	fclose(myfile);
-}
-
-int main(int argc, char *argv[])
-{
-	if (argc != 6){
-		printf("Usage: %s start_nbr step_nbr stop_nbr nslices nbr_tests\n", argv[0]);
-		return 1;
-	}
-
-	if (starpu_init(NULL) != EXIT_SUCCESS){
-		fprintf(stderr, "ERROR\n");
-		return 77;
-	}
-
-	unsigned start_nbr = (unsigned) atoi(argv[1]);
-	unsigned step_nbr = (unsigned) atoi(argv[2]);
-	unsigned stop_nbr = (unsigned) atoi(argv[3]);
-	unsigned nslices = (unsigned) atoi(argv[4]);
-	unsigned nbr_tests = (unsigned) atoi(argv[5]);
-
-	srand(time(NULL));
-
-	display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests);
-
-	starpu_shutdown();
-
-	return 0;
-}
-		

+ 0 - 81
julia/tst/black_scholes/black_scholes_def.jl

@@ -1,81 +0,0 @@
-function black_scholes_starpu(data ::Matrix{Float64}, res ::Matrix{Float64}, nslices ::Int64)
-    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslices)
-
-    @starpu_block let
-        dat_handle, res_handle = starpu_data_register(data, res)
-
-        starpu_data_partition(dat_handle, vert)
-        starpu_data_partition(res_handle, vert)
-        
-        #Compute the price of call and put option in the res matrix
-        @starpu_sync_tasks for task in (1:nslices)
-            @starpu_async_cl cl(dat_handle[task], res_handle[task])
-        end
-    end
-end
-
-
-function init_data(data, data_nbr);
-    for i in 1:data_nbr
-        data[1,i] = rand(Float64) * 100
-        data[2,i] = rand(Float64) * 100
-        data[3,i] = rand(Float64)
-        data[4,i] = rand(Float64) * 10
-        data[5,i] = rand(Float64) * 10
-    end
-    return data
-end
-        
-
-
-function median_times(data_nbr, nslices, nbr_tests)
-
-    data ::Matrix{Float64} = zeros(5, data_nbr)
-    # data[1,1] = 100.0
-    # data[2,1] = 100.0
-    # data[3,1] = 0.05
-    # data[4,1] = 1.0
-    # data[5,1] = 0.2
-
-
-    res ::Matrix{Float64} = zeros(2, data_nbr)
-
-    exec_times ::Vector{Float64} = [0. for i in 1:nbr_tests]
-
-    for i = 1:nbr_tests
-        
-        init_data(data, data_nbr)
-
-        tic()
-        black_scholes_starpu(data, res, nslices);
-        t = toq()
-
-        exec_times[i] = t
-    end
-    sort!(exec_times)
-    # println(data)
-    # println(res)
-    
-    return exec_times[1 + div(nbr_tests - 1, 2)]
-end
-
-function display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests)
-
-    mtc = map( (x->parse(Float64,x)), open("../DAT/black_scholes_c_times.dat") do f
-                  readlines(f)
-                  end)
-
-
-    mtcgen = map( (x->parse(Float64,x)), open("../DAT/black_scholes_c_generated_times.dat") do f
-                  readlines(f)
-                  end)
-    i = 1
-    open("../DAT/black_scholes_times.dat", "w") do f 
-        for data_nbr in (start_nbr : step_nbr : stop_nbr)
-            t = median_times(data_nbr, nslices, nbr_tests)
-            println("Number of data:\n$data_nbr\nTimes:\njl: $t\nC: $(mtc[i])\nGen: $(mtcgen[i])")
-            write(f, "$data_nbr $(t) $(mtcgen[i]) $(mtc[i])\n")
-            i = i + 1
-        end
-    end
-end

+ 0 - 35
julia/tst/black_scholes/black_scholes_generated.jl

@@ -1,35 +0,0 @@
-if length(ARGS) != 5
-    println("Usage: julia prog.jl start_data_nbr step_data_nbr stop_data_nbr nslices nbr_tests")
-    quit()
-end
-
-
-if (parse(Int64,ARGS[1]) < parse(Int64,ARGS[4]))
-    println("The number of slices must be smaller than the number of data")
-    quit()
-end
-
-include("../../src/Wrapper/Julia/starpu_include.jl")
-using StarPU
-
-@debugprint "starpu_init"
-starpu_init(extern_task_path = "../build/generated_tasks_black_scholes")
-
-perfmodel = StarpuPerfmodel(
-    perf_type = STARPU_HISTORY_BASED,
-    symbol = "history_perf"
-)
-
-cl = StarpuCodelet(
-cpu_func = "black_scholes",
-gpu_func = "CUDA_black_scholes",
-modes = [STARPU_RW, STARPU_RW],
-perfmodel = perfmodel
-)
-
-include("black_scholes_def.jl")
-
-display_times(map( (x->parse(Int64, x)), ARGS)...)
-
-@debugprint "starpu_shutdown"
-starpu_shutdown()

+ 0 - 170
julia/tst/black_scholes/black_scholes_with_generated.c

@@ -1,170 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2019       Mael Keryell
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <starpu.h>
-#include "../includes/sorting.h"
-
-void black_scholes(void **, void *);
-void CUDA_black_scholes(void **, void*);
-
-static struct starpu_perfmodel model =
-{
-	.type = STARPU_HISTORY_BASED,
-	.symbol = "history_perf"
-};
-
-struct starpu_codelet cl =
-{
-	.cpu_funcs = {black_scholes},
-	.cuda_funcs = {CUDA_black_scholes},
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_RW},
-	.model = &model
-};
-
-void black_scholes_with_starpu(double *data, double *res, unsigned nslices, unsigned nbr_data)
-{
-
-	starpu_data_handle_t D_handle, RES_handle;
-
-	starpu_matrix_data_register(&D_handle, STARPU_MAIN_RAM, (uintptr_t)data, 5, 5, nbr_data, sizeof(double));
-	starpu_matrix_data_register(&RES_handle, STARPU_MAIN_RAM, (uintptr_t)res, 2, 2, nbr_data, sizeof(double));
-
-	struct starpu_data_filter vert =
-	{
-		.filter_func = starpu_matrix_filter_vertical_block,
-		.nchildren = nslices
-	};
-
-	starpu_data_partition(D_handle, &vert);
-	starpu_data_partition(RES_handle, &vert);
-
-	unsigned taskx;
-	
-	for (taskx = 0; taskx < nslices; taskx++){
-		struct starpu_task *task = starpu_task_create();
-		
-		task->cl = &cl;
-		task->handles[0] = starpu_data_get_sub_data(D_handle, 1, taskx);
-		task->handles[1] = starpu_data_get_sub_data(RES_handle, 1, taskx);
-		
-		starpu_task_submit(task);
-	}
-
-	starpu_task_wait_for_all();
-
-
-	starpu_data_unpartition(D_handle, STARPU_MAIN_RAM);
-	starpu_data_unpartition(RES_handle, STARPU_MAIN_RAM);
-
-	starpu_data_unregister(D_handle);
-	starpu_data_unregister(RES_handle);
-}
-	
-
-void init_data(double *data, unsigned nbr_data)
-{
-	unsigned i;
-	for (i = 0; i < nbr_data; i++){
-
-		data[5*i] = 100. * rand() / (double) RAND_MAX;
-		data[5*i + 1] = 100. * rand() / (double) RAND_MAX;
-		data[5*i + 2] = rand() / (double) RAND_MAX;
-		data[5*i + 3] = 10. * rand() / (double) RAND_MAX;
-		data[5*i + 4] = 10. * rand() / (double) RAND_MAX;
-		
-	}
-}
-
-double median_time(unsigned nbr_data, unsigned nslices, unsigned nbr_tests)
-{
-	double *data = malloc(5 * nbr_data * sizeof(double));
-	double *res = calloc(2 * nbr_data, sizeof(double));
-	double exec_times[nbr_tests];
-	
-	/* printf("nbr_data: %u\n", nbr_data); */
-	unsigned i;
-	for (i = 0; i < nbr_tests; i++){
-		
-		init_data(data, nbr_data);
-		/* data[0] = 100.0; */
-		/* data[1] = 100.0; */
-		/* data[2] = 0.05; */
-		/* data[3] = 1.0; */
-		/* data[4] = 0.2; */
-
-		double start = starpu_timing_now();
-		black_scholes_with_starpu(data, res, nslices, nbr_data);
-		double stop = starpu_timing_now();
-		
-		exec_times[i] = (stop-start)/1.e6;
-		
-		
-	}
-
-	/* printf("RES:\n%f\n%f\n", res[0], res[1]); */
-
-	free(data);
-	free(res);
-
-	quicksort(exec_times, 0, nbr_tests - 1);
-	return exec_times[nbr_tests/2];
-}
-	
-
-
-void display_times(unsigned start_nbr, unsigned step_nbr, unsigned stop_nbr, unsigned nslices, unsigned nbr_tests){
-	
-	double t;
-	unsigned nbr_data;
-
-	FILE *myfile;
-	myfile = fopen("DAT/black_scholes_c_generated_times.dat", "w");
-
-	for (nbr_data = start_nbr; nbr_data <= stop_nbr; nbr_data+=step_nbr){
-		t = median_time(nbr_data, nslices, nbr_tests);
-		printf("Number of data: %u\nTime: %f\n", nbr_data, t);
-		fprintf(myfile, "%f\n", t);
-	}
-	fclose(myfile);
-}
-
-int main(int argc, char *argv[])
-{
-	if (argc != 6){
-		printf("Usage: %s start_nbr step_nbr stop_nbr nslices nbr_tests\n", argv[0]);
-		return 1;
-	}
-	
-	if (starpu_init(NULL) != EXIT_SUCCESS){
-		fprintf(stderr, "ERROR\n");
-		return 77;
-	}
-
-	unsigned start_nbr = (unsigned) atoi(argv[1]);
-	unsigned step_nbr = (unsigned) atoi(argv[2]);
-	unsigned stop_nbr = (unsigned) atoi(argv[3]);
-	unsigned nslices = (unsigned) atoi(argv[4]);
-	unsigned nbr_tests = (unsigned) atoi(argv[5]);
-
-
-	display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests);
-		
-	starpu_shutdown();
-
-	return 0;
-}

+ 0 - 54
julia/tst/black_scholes/cpu_black_scholes.c

@@ -1,54 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2019       Mael Keryell
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-#include <stdint.h>
-#include <starpu.h>
-#include <math.h>
-
-
-static inline double normcdf(double x)
-{
-	
-	return (1.0 + erf(x/sqrt(2.0)))/2.0;
-}
-
-void cpu_black_scholes(void *descr[], void *arg)
-{ 
-	double *S, *K, *R, *T, *SIG, *CRES, *PRES;
-
-	uint32_t nxS;
-	
-	
-	S = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
-	K = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-	R = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-	T = (double *)STARPU_MATRIX_GET_PTR(descr[3]);
-	SIG = (double *)STARPU_MATRIX_GET_PTR(descr[4]);
-	CRES = (double *)STARPU_MATRIX_GET_PTR(descr[5]);
-	PRES = (double *)STARPU_MATRIX_GET_PTR(descr[6]);
-	
-	nxS = STARPU_MATRIX_GET_NX(descr[0]);
-
-	
-	uint32_t i;
-	for (i = 0; i < nxS; i++){
-				
-		double d1 = (log(S[i] / K[i]) + (R[i] + pow(SIG[i], 2.0) * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));
-		double d2 = (log(S[i] / K[i]) + (R[i] - pow(SIG[i], 2.0) * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));
-		
-		CRES[i] = S[i] * normcdf(d1) - K[i] * exp(-R[i] * T[i]) * normcdf(d2);
-		PRES[i] = -S[i] * normcdf(-d1) + K[i] * exp(-R[i] * T[i]) * normcdf(-d2);
-	}
-}

+ 0 - 124
julia/tst/black_scholes/cpu_cuda_black_scholes.jl

@@ -1,124 +0,0 @@
-include("../../src/Compiler/include.jl")
-
-starpu_new_cpu_kernel_file("../build/generated_cpu_black_scholes.c")
-starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
-
-
-
-
-
-@cpu_cuda_kernel function black_scholes(data ::Matrix{Float64}, res ::Matrix{Float64}) ::Void
-    
-    widthn ::Int64 = width(data)
-        
-    # data[1,...] -> S
-    # data[2,...] -> K
-    # data[3,...] -> r
-    # data[4,...] -> T
-    # data[4,...] -> sig
-
-    p ::Float64 = 0.2316419
-    b1 ::Float64 = 0.31938153
-    b2 ::Float64 = -0.356563782
-    b3 ::Float64 = 1.781477937
-    b4 ::Float64 = -1.821255978
-    b5 ::Float64 = 1.330274428
-
-    
-    @indep for i = 1:widthn
-        
-
-        d1 ::Float64 = (log(data[1,i] / data[2,i]) + (data[3,i] + pow(data[5,i], 2.0) * 0.5) * data[4,i]) / (data[5,i] * sqrt(data[4,i]))
-        d2 ::Float64 = (log(data[1,i] / data[2,i]) + (data[3,i] - pow(data[5,i], 2.0) * 0.5) * data[4,i]) / (data[5,i] * sqrt(data[4,i]))
-        
-
-
-
-        f ::Float64 = 0
-        ff ::Float64 = 0
-        s1 ::Float64 = 0
-        s2 ::Float64 = 0
-        s3 ::Float64 = 0
-        s4 ::Float64 = 0
-        s5 ::Float64 = 0
-        sz ::Float64 = 0
-        
-
-
-        
-        ######## Compute normcdf of d1
-
-        normd1p ::Float64 = 0
-        normd1n ::Float64 = 0
-
-        boold1 ::Int64 = (d1 >= 0) + (d1 <= 0)
-        
-        if (boold1 >= 2)
-            normd1p = 0.5
-            normd1n = 0.5
-        else
-            tmp1 ::Float64 = abs(d1)
-            f = 1 / sqrt(2 * M_PI)
-            ff = exp(-pow(tmp1, 2.0) / 2) * f
-            s1 = b1 / (1 + p * tmp1)
-            s2 = b2 / pow((1 + p * tmp1), 2.0)
-            s3 = b3 / pow((1 + p * tmp1), 3.0)
-            s4 = b4 / pow((1 + p * tmp1), 4.0)
-            s5 = b5 / pow((1 + p * tmp1), 5.0)
-            sz = ff * (s1 + s2 + s3 + s4 + s5)
-        
-            if (d1 > 0)
-                normd1p = 1 - sz # normcdf(d1)
-                normd1n = sz # normcdf(-d1)
-            else
-                normd1p = sz
-                normd1n = 1 - sz
-            end    
-        end
-        ########
-        
-
-        ######## Compute normcdf of d2
-        normd2p ::Float64 = 0
-        normd2n ::Float64 = 0
-
-        boold2 ::Int64 = (d2 >= 0) + (d2 <= 0)
-        
-        if (boold2 >= 2)
-            normd2p = 0.5
-            normd2n = 0.5
-        else
-            tmp2 ::Float64 = abs(d2)
-            f = 1 / sqrt(2 * M_PI)
-            ff = exp(-pow(tmp2, 2.0) / 2) * f
-            s1 = b1 / (1 + p * tmp2)
-            s2 = b2 / pow((1 + p * tmp2), 2.0)
-            s3 = b3 / pow((1 + p * tmp2), 3.0)
-            s4 = b4 / pow((1 + p * tmp2), 4.0)
-            s5 = b5 / pow((1 + p * tmp2), 5.0)
-            sz = ff * (s1 + s2 + s3 + s4 + s5)
-        
-        
-            if (d2 > 0)
-                normd2p = 1 - sz # normcdf(d2)
-                normd2n = sz # normcdf(-d2)
-            else
-                normd2p = sz
-                normd2n = 1 - sz
-            end
-        end
-        # normd1p = (1 + erf(d1/sqrt(2.0)))/2.0
-        # normd1n = (1 + erf(-d1/sqrt(2.0)))/2.0
-        
-        # normd2p = (1 + erf(d2/sqrt(2.0)))/2.0
-        # normd2n = (1 + erf(-d2/sqrt(2.0)))/2.0
-        
-        res[1,i] = data[1,i] * (normd1p) - data[2,i]*exp(-data[3,i]*data[4,i]) * (normd2p) # S * N(d1) - r*exp(-r*T) * norm(d2)
-        res[2,i] = -data[1,i] * (normd1n) + data[2,i]*exp(-data[3,i]*data[4,i]) * (normd2n) # -S * N(-d1) + r*exp(-r*T) * norm(-d2)
-        
-    end
-end
-
-compile_cpu_kernels("../build/generated_cpu_black_scholes.so")
-compile_cuda_kernels("../build/generated_cuda_black_scholes.so")
-combine_kernel_files("../build/generated_tasks_black_scholes.so", ["../build/generated_cpu_black_scholes.so", "../build/generated_cuda_black_scholes.so"])

+ 0 - 89
julia/tst/black_scholes/gpu_black_scholes.cu

@@ -1,89 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2019       Mael Keryell
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-#include <stdio.h>
-#include <stdint.h>
-#include <math.h>
-#include <starpu.h>
-
-// __device__ inline double cndGPU(double d)
-// {
-//   const double A1 = 0.31938153f;
-//   const double A2 = -0.356563782f;
-//   const double A3 = 1.781477937f;
-//   const double A4 = -1.821255978f;
-//   const double A5 = 1.330274429f;
-//   const float RSQRT2PI = 0.39894228040143267793994605993438f;
-
-    
-//   double K = __fdividef(1.0f, (1.0f + 0.2316419f * fabsf(d)));
-
-    
-//   double cnd = RSQRT2PI * __expf(- 0.5f * d * d) * (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
-
-//     if (d > 0)
-//       cnd = 1.0f - cnd;
-
-//     return cnd;
-// }
-
-__device__ inline double cndGPU(double d)
-{
-  return (1.0 + erf(d/sqrt(2.0)))/2.0;
-}
-
-__global__ void gpuBlackScholesKernel(double *S, double *K, double *R, double *T, 
-				      double *SIG, double *CRES, double *PRES,
-				      uint32_t nxS)
-{
-  uint32_t i, id;
-  
-  id = blockIdx.x * blockDim.x + threadIdx.x;
-  i = id % nxS;
-  
-  double sqrtT = __fdividef(1.0F, rsqrtf(T[i]));
-  double d1 = (log(S[i] / K[i]) + (R[i] + SIG[i] * SIG[i] * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));  
-  double d2 = (log(S[i] / K[i]) + (R[i] - SIG[i] * SIG[i] * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));
-  
-  CRES[i] = S[i] * (normcdf(d1)) - K[i] * exp(-R[i] * T[i]) * normcdf(d2);
-  PRES[i] = -S[i] * (normcdf(-d1)) + K[i] * exp(-R[i] * T[i]) * normcdf(-d2);
-}
-
-#define THREADS_PER_BLOCK 64
-
-extern "C" void gpu_black_scholes(void *descr[], void *args)
-{
-  double *S, *K, *R, *T, *SIG, *CRES, *PRES;
-  uint32_t nxS;
-  uint32_t nblocks;
-
-  S = (double *) STARPU_MATRIX_GET_PTR(descr[0]);
-  K = (double *) STARPU_MATRIX_GET_PTR(descr[1]);
-  R = (double *) STARPU_MATRIX_GET_PTR(descr[2]);
-  T = (double *) STARPU_MATRIX_GET_PTR(descr[3]);
-  SIG = (double *) STARPU_MATRIX_GET_PTR(descr[4]);
-  CRES = (double *) STARPU_MATRIX_GET_PTR(descr[5]);
-  PRES = (double *) STARPU_MATRIX_GET_PTR(descr[6]);
-
-  nxS = STARPU_MATRIX_GET_NX(descr[0]);
-
-  nblocks = (nxS + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
-
-  gpuBlackScholesKernel
-    <<< nblocks, THREADS_PER_BLOCK, 0, starpu_cuda_get_local_stream()
-    >>> (S, K, R, T, SIG, CRES, PRES, nxS);
-  
-  cudaStreamSynchronize(starpu_cuda_get_local_stream());
-}

+ 0 - 0
julia/tst/cpu_cuda_mult.jl


Some files were not shown because too many files changed in this diff