Browse Source

merge branch 'master' into starpurm

Olivier Aumage 6 years ago
parent
commit
57fd37ebbb
69 changed files with 4544 additions and 8 deletions
  1. 8 1
      ChangeLog
  2. 1 0
      configure.ac
  3. 47 0
      julia/src/Compiler/C/add_for_loop_declarations.jl
  4. 15 0
      julia/src/Compiler/C/create_cpu_kernel.jl
  5. 27 0
      julia/src/Compiler/C/flatten_blocks.jl
  6. 79 0
      julia/src/Compiler/C/substitute_args.jl
  7. 25 0
      julia/src/Compiler/C/substitute_func_calls.jl
  8. 52 0
      julia/src/Compiler/C/substitute_indexing.jl
  9. 179 0
      julia/src/Compiler/Cuda/create_cuda_kernel.jl
  10. 49 0
      julia/src/Compiler/Cuda/indep_for.jl
  11. 121 0
      julia/src/Compiler/Cuda/indep_for_kernel_ids.jl
  12. 60 0
      julia/src/Compiler/Expressions/affect.jl
  13. 68 0
      julia/src/Compiler/Expressions/block.jl
  14. 75 0
      julia/src/Compiler/Expressions/call.jl
  15. 60 0
      julia/src/Compiler/Expressions/cuda_call.jl
  16. 44 0
      julia/src/Compiler/Expressions/field.jl
  17. 89 0
      julia/src/Compiler/Expressions/for.jl
  18. 85 0
      julia/src/Compiler/Expressions/function.jl
  19. 94 0
      julia/src/Compiler/Expressions/if.jl
  20. 48 0
      julia/src/Compiler/Expressions/interval.jl
  21. 70 0
      julia/src/Compiler/Expressions/ref.jl
  22. 33 0
      julia/src/Compiler/Expressions/return.jl
  23. 63 0
      julia/src/Compiler/Expressions/simple_expressions.jl
  24. 109 0
      julia/src/Compiler/Expressions/typed.jl
  25. 53 0
      julia/src/Compiler/Expressions/while.jl
  26. 62 0
      julia/src/Compiler/Generate_files/c_files.jl
  27. 125 0
      julia/src/Compiler/Generate_files/cuda_files.jl
  28. 39 0
      julia/src/Compiler/Generate_files/so_files.jl
  29. 91 0
      julia/src/Compiler/expression_manipulation.jl
  30. 39 0
      julia/src/Compiler/include.jl
  31. 50 0
      julia/src/Compiler/parsing.jl
  32. 50 0
      julia/src/Compiler/utils.jl
  33. 24 0
      julia/src/Wrapper/C/jlstarpu.h
  34. 162 0
      julia/src/Wrapper/C/jlstarpu_data_handles.c
  35. 16 0
      julia/src/Wrapper/C/jlstarpu_simple_functions.c
  36. 62 0
      julia/src/Wrapper/C/jlstarpu_task.h
  37. 192 0
      julia/src/Wrapper/C/jlstarpu_task_submit.c
  38. 52 0
      julia/src/Wrapper/C/jlstarpu_utils.h
  39. 304 0
      julia/src/Wrapper/Julia/linked_list.jl
  40. 146 0
      julia/src/Wrapper/Julia/starpu_codelet.jl
  41. 260 0
      julia/src/Wrapper/Julia/starpu_data_handle.jl
  42. 53 0
      julia/src/Wrapper/Julia/starpu_define.jl
  43. 147 0
      julia/src/Wrapper/Julia/starpu_destructible.jl
  44. 21 0
      julia/src/Wrapper/Julia/starpu_include.jl
  45. 25 0
      julia/src/Wrapper/Julia/starpu_init_shutdown.jl
  46. 90 0
      julia/src/Wrapper/Julia/starpu_perfmodel.jl
  47. 25 0
      julia/src/Wrapper/Julia/starpu_simple_functions.jl
  48. 198 0
      julia/src/Wrapper/Julia/starpu_task.jl
  49. 48 0
      julia/src/Wrapper/Julia/starpu_task_submit.jl
  50. 38 0
      julia/src/Wrapper/Julia/starpu_worker.jl
  51. 28 0
      julia/src/Wrapper/Julia/static_structures.jl
  52. 29 0
      julia/tst/cpu_cuda_mult.jl
  53. 54 0
      julia/tst/cpu_mult.c
  54. 68 0
      julia/tst/gpu_mult.cu
  55. 278 0
      julia/tst/mult.c
  56. 107 0
      julia/tst/mult_def.jl
  57. 30 0
      julia/tst/mult_extern.jl
  58. 34 0
      julia/tst/mult_generated.jl
  59. 9 4
      mpi/src/mpi/starpu_mpi_tag.c
  60. 2 1
      src/core/combined_workers.c
  61. 1 0
      src/core/dependencies/data_concurrency.c
  62. 3 0
      src/core/jobs.c
  63. 2 1
      src/core/perfmodel/perfmodel_bus.c
  64. 4 0
      src/core/sched_ctx.c
  65. 2 0
      src/core/sched_policy.c
  66. 7 0
      src/core/task.c
  67. 9 0
      src/core/topology.c
  68. 1 0
      src/core/workers.c
  69. 3 1
      src/drivers/driver_common/driver_common.c

+ 8 - 1
ChangeLog

@@ -124,7 +124,14 @@ Small changes:
     scheduler context
   * Fonction starpu_is_initialized() is moved to the public API.
 
-StarPU 1.2.5 (git revision xxx)
+StarPU 1.2.6 (git revision xxx)
+==============================================
+
+Small changes:
+  * Fix crash for lws scheduler
+  * Avoid making hwloc load PCI topology when CUDA is not enabled
+
+StarPU 1.2.5 (git revision 22f32916916d158e3420033aa160854d1dd341bd)
 ==============================================
 
 Small features:

+ 1 - 0
configure.ac

@@ -3238,6 +3238,7 @@ AS_IF([test "$have_valid_hwloc" = "yes"],
       ])
 
 AC_CHECK_FUNCS([hwloc_topology_dup])
+AC_CHECK_FUNCS([hwloc_topology_set_components])
 AM_CONDITIONAL(STARPU_HWLOC_HAVE_TOPOLOGY_DUP, test $ac_cv_func_hwloc_topology_dup = yes)
 
 LDFLAGS="${SAVED_LDFLAGS}"

+ 47 - 0
julia/src/Compiler/C/add_for_loop_declarations.jl

@@ -0,0 +1,47 @@
+
+
+
+function interval_evaluation_declarations(set :: StarpuExprInterval, for_index_var :: Symbol)
+
+    const decl_pattern = @parse € :: Int64
+    const affect_pattern = @parse € :: Int64 = €
+    const interv_size_affect_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
+
+    id = set.id
+
+    start_var = starpu_parse(Symbol(:start_, id))
+    start_decl = replace_pattern(affect_pattern, start_var, set.start)
+
+    step_var = starpu_parse(Symbol(:step_, id))
+    step_decl = replace_pattern(affect_pattern, step_var, set.step)
+
+    dim_var = starpu_parse(Symbol(:dim_, id))
+    dim_decl = replace_pattern(interv_size_affect_pattern, dim_var, start_var, step_var, set.stop)
+
+    iter_var = starpu_parse(Symbol(:iter_, id))
+    iter_decl = replace_pattern(decl_pattern, iter_var)
+
+    index_var = starpu_parse(for_index_var)
+    index_decl = replace_pattern(decl_pattern, index_var)
+
+
+    return StarpuExpr[start_decl, step_decl, dim_decl, iter_decl, index_decl]
+end
+
+
+
+function add_for_loop_declarations(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprFor)
+            return x
+        end
+
+        interval_decl = interval_evaluation_declarations(x.set, x.iter)
+
+        return StarpuExprFor(x.iter, x.set, x.body, x.is_independant, interval_decl)
+    end
+
+    return apply(func_to_apply, expr)
+end

+ 15 - 0
julia/src/Compiler/C/create_cpu_kernel.jl

@@ -0,0 +1,15 @@
+
+
+
+
+
+function transform_to_cpu_kernel(expr :: StarpuExprFunction)
+
+    output = add_for_loop_declarations(expr)
+    output = substitute_args(output)
+    output = substitute_func_calls(output)
+    output = substitute_indexing(output)
+    output = flatten_blocks(output)
+
+    return output
+end

+ 27 - 0
julia/src/Compiler/C/flatten_blocks.jl

@@ -0,0 +1,27 @@
+
+
+
+function flatten_blocks(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprBlock)
+            return x
+        end
+
+        instrs = StarpuExpr[]
+
+        for sub_expr in x.exprs
+
+            if isa(sub_expr, StarpuExprBlock)
+                push!(instrs, sub_expr.exprs...)
+            else
+                push!(instrs, sub_expr)
+            end
+        end
+
+        return StarpuExprBlock(instrs)
+    end
+
+    return apply(func_to_run, expr)
+end

+ 79 - 0
julia/src/Compiler/C/substitute_args.jl

@@ -0,0 +1,79 @@
+
+
+function substitute_argument_usage(expr :: StarpuExpr, arg_index, buffer_name :: Symbol, arg_name :: Symbol, ptr_name :: Symbol)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if x == StarpuExprVar(arg_name)
+            return StarpuExprVar(ptr_name)
+        end
+
+        if !(isa(x, StarpuExprCall) && x.func in keys(func_substitution))
+            return x
+        end
+
+        if (length(x.args) != 1)
+            error("Invalid arrity for function $(x.func)")
+        end
+
+        if (x.args[1] != StarpuExprVar(ptr_name))
+            return x
+        end
+
+        new_func = func_substitution[x.func]
+        new_arg = starpu_parse(:($buffer_name[$arg_index]))
+
+        return StarpuExprCall(new_func, [new_arg])
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+function substitute_args(expr :: StarpuExprFunction)
+
+    new_body = expr.body
+    func_id = rand_string()
+    buffer_arg_name = Symbol("buffers_", func_id)
+    cl_arg_name = Symbol("cl_arg_", func_id)
+
+    function_start_affectations = StarpuExpr[]
+
+    for i in (1 : length(expr.args))
+
+        var_id = rand_string()
+        ptr = Symbol(:ptr_, var_id)
+
+        if (expr.args[i].typ <: Vector)
+            func_interface = :STARPU_VECTOR_GET_PTR
+
+        elseif (expr.args[i].typ <: Matrix)
+            func_interface = :STARPU_MATRIX_GET_PTR
+            ld_name = Symbol("ld_", var_id)
+            new_affect = starpu_parse( :($ld_name :: UInt32 = STARPU_MATRIX_GET_LD($buffer_arg_name[$i])) )
+            push!(function_start_affectations, new_affect)
+
+        else
+            error("Task arguments must be either vector or matrix (got $(expr.args[i].typ))") #TODO : cl_args, variable ?
+        end
+
+        type_in_arg = eltype(expr.args[i].typ)
+        new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$i])) )
+        push!(function_start_affectations, new_affect)
+
+        #var_to_replace = starpu_parse(expr.args[i].name)
+        #replace_with = starpu_parse(ptr)
+        #new_body = substitute(new_body, var_to_replace, replace_with)
+        new_body = substitute_argument_usage(new_body, i, buffer_arg_name, expr.args[i].name, ptr)
+    end
+
+
+    new_args = [
+                    starpu_parse(:($buffer_arg_name :: Matrix{Void})),
+                    starpu_parse(:($cl_arg_name :: Vector{Void}))
+                ]
+    new_body = StarpuExprBlock([function_start_affectations..., new_body.exprs...])
+
+    return StarpuExprFunction(expr.ret_type, expr.func, new_args, new_body)
+end

+ 25 - 0
julia/src/Compiler/C/substitute_func_calls.jl

@@ -0,0 +1,25 @@
+
+
+
+func_substitution = Dict(
+    :width => :STARPU_MATRIX_GET_NY,
+    :height => :STARPU_MATRIX_GET_NX,
+
+    :length => :STARPU_VECTOR_GET_NX
+)
+
+
+
+function substitute_func_calls(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprCall) || !(x.func in keys(func_substitution))
+            return x
+        end
+
+        return StarpuExprCall(func_substitution[x.func], x.args)
+    end
+
+    return apply(func_to_apply, expr)
+end

+ 52 - 0
julia/src/Compiler/C/substitute_indexing.jl

@@ -0,0 +1,52 @@
+
+
+function substitute_indexing(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+
+        if !isa(x, StarpuExprRef)
+            return x
+        end
+
+        if !isa(x.ref, StarpuExprVar)
+            error("Only variable indexing is allowed") #TODO allow more ?
+        end
+
+
+        nb_indexes = length(x.indexes)
+
+        if (nb_indexes >= 3)
+            error("Indexing with more than 2 indexes is not allowed") # TODO : blocks
+        end
+
+        if (nb_indexes == 0)
+            return x
+
+        elseif nb_indexes == 1
+            new_index = StarpuExprCall(:-, [x.indexes[1], StarpuExprValue(1)])  #TODO : add field "offset" from STARPU_VECTOR_GET interface
+                                                                            #TODO : detect when it is a matrix used with one index only
+            return StarpuExprRef(x.ref, [new_index])
+
+        elseif nb_indexes == 2
+
+            var_name = String(x.ref.name)
+
+            if !ismatch(r"ptr_", var_name) || isempty(var_name[5:end])
+                error("Invalid variable ($var_name) for multiple index dereferencing")
+            end
+
+            var_id = var_name[5:end]
+            ld_name = Symbol("ld_", var_id) # TODO : check if this variable is legit (var_name must refer to a matrix)
+
+            new_index = x.indexes[2]
+            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
+            new_index = StarpuExprCall(:(*), [new_index, StarpuExprVar(ld_name)])
+            new_index = StarpuExprCall(:(+), [x.indexes[1], new_index])
+            new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)])
+
+            return StarpuExprRef(x.ref, [new_index])
+        end
+    end
+
+    return apply(func_to_run, expr)
+end

+ 179 - 0
julia/src/Compiler/Cuda/create_cuda_kernel.jl

@@ -0,0 +1,179 @@
+
+
+function is_indep_for_expr(x :: StarpuExpr)
+    return isa(x, StarpuExprFor) && x.is_independant
+end
+
+
+function extract_init_indep_finish(expr :: StarpuExpr) # TODO : it is not a correct extraction (example : if (cond) {@indep for ...} else {return} would not work)
+                                                            # better use apply() (NOTE :assert_no_indep_for already exists) to find recursively every for loops
+    init = StarpuExpr[]
+    finish = StarpuExpr[]
+
+    if is_indep_for_expr(expr)
+        return init, StarpuIndepFor(expr), finish
+    end
+
+    if !isa(expr, StarpuExprBlock)
+        return [expr], nothing, finish
+    end
+
+    for i in (1 : length(expr.exprs))
+
+        if !is_indep_for_expr(expr.exprs[i])
+            continue
+        end
+
+        init = expr.exprs[1 : i-1]
+        indep = StarpuIndepFor(expr.exprs[i])
+        finish = expr.exprs[i+1 : end]
+
+        if any(is_indep_for_expr, finish)
+            error("Sequence of several independant loops is not allowed") #same it may be tricked by a Block(Indep_for(...))
+        end
+
+        return init, indep, finish
+    end
+
+    return expr.exprs, nothing, finish
+end
+
+
+
+
+function analyse_variable_declarations(expr :: StarpuExpr, already_defined :: Vector{StarpuExprTypedVar} = StarpuExprTypedVar[])
+
+    undefined_variables = Symbol[]
+    defined_variable_names = map((x -> x.name), already_defined)
+    defined_variable_types = map((x -> x.typ), already_defined)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if isa(x, StarpuExprFunction)
+            error("No function declaration allowed in this section")
+        end
+
+        if isa(x, StarpuExprVar) || isa(x, StarpuExprTypedVar)
+
+            if !(x.name in defined_variable_names) && !(x.name in undefined_variables)
+                push!(undefined_variables, x.name)
+            end
+
+            return x
+        end
+
+        if isa(x, StarpuExprAffect) || isa(x, StarpuExprFor)
+
+            if isa(x, StarpuExprAffect)
+
+                var = x.var
+
+                if !isa(var, StarpuExprTypedVar)
+                    return x
+                end
+
+                name = var.name
+                typ = var.typ
+
+            else
+                name = x.iter
+                typ = Int64
+            end
+
+            if name in defined_variable_names
+                error("Multiple definition of variable $name")
+            end
+
+            filter!((sym -> sym != name), undefined_variables)
+            push!(defined_variable_names, name)
+            push!(defined_variable_types, typ)
+
+            return x
+        end
+
+        return x
+    end
+
+    apply(func_to_apply, expr)
+    defined_variable = map(StarpuExprTypedVar, defined_variable_names, defined_variable_types)
+
+    return defined_variable, undefined_variables
+end
+
+
+
+function find_variable(name :: Symbol, vars :: Vector{StarpuExprTypedVar})
+
+    for x in vars
+        if x.name == name
+            return x
+        end
+    end
+
+    return nothing
+end
+
+
+
+function add_device_to_interval_call(expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if isa(x, StarpuExprCall) && x.func == :jlstarpu_interval_size
+            return StarpuExprCall(:jlstarpu_interval_size__device, x.args)
+        end
+
+        return x
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+function transform_to_cuda_kernel(func :: StarpuExprFunction)
+
+    cpu_func = transform_to_cpu_kernel(func)
+
+    init, indep, finish = extract_init_indep_finish(cpu_func.body)
+
+    if indep == nothing
+        error("No independant for loop has been found") # TODO can fail because extraction is not correct yet
+    end
+
+    prekernel_instr, kernel_args, kernel_instr = analyse_sets(indep)
+
+    kernel_call = StarpuExprCudaCall(:cudaKernel, (@parse nblocks), (@parse THREADS_PER_BLOCK), StarpuExpr[])
+    prekernel_instr = vcat(init, prekernel_instr)
+    kernel_instr = vcat(kernel_instr, indep.body)
+
+    indep_for_def, indep_for_undef = analyse_variable_declarations(StarpuExprBlock(kernel_instr), kernel_args)
+    prekernel_def, prekernel_undef = analyse_variable_declarations(StarpuExprBlock(prekernel_instr), cpu_func.args)
+
+    for undef_var in indep_for_undef
+
+        found_var = find_variable(undef_var, prekernel_def)
+
+        if found_var == nothing # TODO : error then ?
+            continue
+        end
+
+        push!(kernel_args, found_var)
+    end
+
+    call_args = map((x -> StarpuExprVar(x.name)), kernel_args)
+    cuda_call = StarpuExprCudaCall(func.func, (@parse nblocks), (@parse THREADS_PER_BLOCK), call_args)
+    push!(prekernel_instr, cuda_call)
+    push!(prekernel_instr, @parse cudaStreamSynchronize(starpu_cuda_get_local_stream()))
+    prekernel_instr = vcat(prekernel_instr, finish)
+
+    prekernel_name = Symbol("CUDA_", func.func)
+    prekernel = StarpuExprFunction(Void, prekernel_name, cpu_func.args, StarpuExprBlock(prekernel_instr))
+    prekernel = flatten_blocks(prekernel)
+
+    kernel = StarpuExprFunction(Void, func.func, kernel_args, StarpuExprBlock(kernel_instr))
+    kernel = add_device_to_interval_call(kernel)
+    kernel = flatten_blocks(kernel)
+    
+    return prekernel, kernel
+end

+ 49 - 0
julia/src/Compiler/Cuda/indep_for.jl

@@ -0,0 +1,49 @@
+
+
+struct StarpuIndepFor
+
+    iters :: Vector{Symbol}
+    sets :: Vector{StarpuExprInterval}
+
+    body :: StarpuExpr
+end
+
+
+function assert_no_indep_for(expr :: StarpuExpr)
+
+    function func_to_run(x :: StarpuExpr)
+
+        if (isa(x, StarpuExprFor) && x.is_independant)
+            error("Invalid usage of intricated @indep for loops")
+        end
+
+        return x
+    end
+
+    return apply(func_to_run, expr)
+end
+
+
+function StarpuIndepFor(expr :: StarpuExprFor)
+
+    if !expr.is_independant
+        error("For expression must be prefixed by @indep")
+    end
+
+    iters = []
+    sets = []
+    for_loop = expr
+
+    while isa(for_loop, StarpuExprFor) && for_loop.is_independant
+
+        push!(iters, for_loop.iter)
+        push!(sets, for_loop.set)
+        for_loop = for_loop.body
+
+        while (isa(for_loop, StarpuExprBlock) && length(for_loop.exprs) == 1)
+            for_loop = for_loop.exprs[1]
+        end
+    end
+
+    return StarpuIndepFor(iters, sets, assert_no_indep_for(for_loop))
+end

+ 121 - 0
julia/src/Compiler/Cuda/indep_for_kernel_ids.jl

@@ -0,0 +1,121 @@
+
+
+function translate_index_code(dims :: Vector{StarpuExprVar})
+
+    ndims = length(dims)
+
+    if ndims == 0
+        error("No dimension specified")
+    end
+
+    prod = StarpuExprValue(1)
+    output = StarpuExpr[]
+    reversed_dim = reverse(dims)
+    thread_index_patern = @parse € :: Int64 = (€ / €) % €
+    thread_id = @parse THREAD_ID
+
+    for i in (1 : ndims)
+        index_lvalue = StarpuExprVar(Symbol(:kernel_ids__index_, ndims - i + 1))
+        expr = replace_pattern(thread_index_patern, index_lvalue, thread_id, prod, reversed_dim[i])
+        push!(output, expr)
+
+        prod = StarpuExprCall(:(*), [prod, reversed_dim[i]])
+    end
+
+    thread_id_pattern = @parse begin
+
+        € :: Int64 = blockIdx.x * blockDim.x + threadIdx.x
+
+        if (€ >= €)
+            return
+        end
+    end
+
+    bound_verif = replace_pattern(thread_id_pattern, thread_id, thread_id, prod)
+    push!(output, bound_verif)
+
+    return reverse(output)
+end
+
+
+
+
+
+
+
+function kernel_index_declarations(ind_for :: StarpuIndepFor)
+
+    pre_kernel_instr = StarpuExpr[]
+    kernel_args = StarpuExprTypedVar[]
+    kernel_instr = StarpuExpr[]
+
+    decl_pattern = @parse € :: Int64 = €
+    interv_size_decl_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €)
+    iter_pattern = @parse € :: Int64 = € + € * €
+
+    dims = StarpuExprVar[]
+    ker_instr_to_add_later_on = StarpuExpr[]
+
+    for k in (1 : length(ind_for.sets))
+
+        set = ind_for.sets[k]
+
+        start_var = starpu_parse(Symbol(:kernel_ids__start_, k))
+        start_decl = replace_pattern(decl_pattern, start_var, set.start)
+
+        step_var = starpu_parse(Symbol(:kernel_ids__step_, k))
+        step_decl = replace_pattern(decl_pattern, step_var, set.step)
+
+        dim_var = starpu_parse(Symbol(:kernel_ids__dim_, k))
+        dim_decl = replace_pattern(interv_size_decl_pattern, dim_var, start_var, step_var, set.stop)
+
+        push!(dims, dim_var)
+
+        push!(pre_kernel_instr, start_decl, step_decl, dim_decl)
+        push!(kernel_args, StarpuExprTypedVar(start_var.name, Int64))
+        push!(kernel_args, StarpuExprTypedVar(step_var.name, Int64))
+        push!(kernel_args, StarpuExprTypedVar(dim_var.name, Int64))
+
+        iter_var = starpu_parse(ind_for.iters[k])
+        index_var = starpu_parse(Symbol(:kernel_ids__index_, k))
+        iter_decl = replace_pattern(iter_pattern, iter_var, start_var, index_var, step_var)
+
+        push!(ker_instr_to_add_later_on, iter_decl)
+    end
+
+
+    return dims, ker_instr_to_add_later_on, pre_kernel_instr , kernel_args, kernel_instr
+end
+
+
+
+function analyse_sets(ind_for :: StarpuIndepFor)
+
+
+    decl_pattern = @parse € :: Int64 = €
+    nblocks_decl_pattern = @parse € :: Int64 = (€ + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK
+
+    dims, ker_instr_to_add, pre_kernel_instr, kernel_args, kernel_instr  = kernel_index_declarations(ind_for)
+
+    dim_prod = @parse 1
+
+    for d in dims
+        dim_prod = StarpuExprCall(:(*), [dim_prod, d])
+    end
+
+    nthreads_var = @parse nthreads
+    nthreads_decl = replace_pattern(decl_pattern, nthreads_var, dim_prod)
+    push!(pre_kernel_instr, nthreads_decl)
+
+    nblocks_var = @parse nblocks
+    nblocks_decl = replace_pattern(nblocks_decl_pattern, nblocks_var, nthreads_var)
+    push!(pre_kernel_instr, nblocks_decl)
+
+
+    index_decomposition = translate_index_code(dims)
+
+    push!(kernel_instr, index_decomposition...)
+    push!(kernel_instr, ker_instr_to_add...)
+
+    return pre_kernel_instr, kernel_args, kernel_instr
+end

+ 60 - 0
julia/src/Compiler/Expressions/affect.jl

@@ -0,0 +1,60 @@
+
+#======================================================
+                AFFECTATION
+======================================================#
+
+
+struct StarpuExprAffect <: StarpuExpr
+    var :: StarpuExpr
+    expr :: StarpuExpr
+end
+
+function starpu_parse_affect(x :: Expr)
+
+    if (x.head != :(=))
+        error("Invalid \"affectation\" expression")
+    end
+
+    var = starpu_parse(x.args[1])
+    expr = starpu_parse(x.args[2])
+
+    return StarpuExprAffect(var, expr)
+end
+
+
+function equals(x :: StarpuExprAffect, y :: StarpuExpr)
+
+    if typeof(y) != StarpuExprAffect
+        return false
+    end
+
+    return equals(x.var, y.var) && equals(x.expr, y.expr)
+end
+
+
+function print(io :: IO, x :: StarpuExprAffect ; indent = 0)
+
+    print(io, x.var, indent = indent)
+    print(io, " = ")
+
+    need_to_transtyp = isa(x.var, StarpuExprTypedVar) # transtyping to avoid warning (or errors for cuda) during compilation time
+
+    if need_to_transtyp
+        print(io, "(", starpu_type_traduction(x.var.typ), ") (")
+    end
+
+    print(io, x.expr, indent = indent)
+
+    if need_to_transtyp
+        print(io, ")")
+    end
+
+end
+
+function apply(func :: Function, expr :: StarpuExprAffect)
+
+    var = apply(func, expr.var)
+    new_expr = apply(func, expr.expr)
+
+    return func(StarpuExprAffect(var, new_expr))
+end

+ 68 - 0
julia/src/Compiler/Expressions/block.jl

@@ -0,0 +1,68 @@
+
+#======================================================
+                BLOCK
+(series of instruction, not C variable scoping block)
+======================================================#
+
+
+struct StarpuExprBlock <: StarpuExpr
+    exprs :: Vector{StarpuExpr}
+end
+
+
+function is_unwanted(x :: Symbol)
+    return false
+end
+
+function is_unwanted(x :: Expr)
+
+    if (x.head == :line)
+        return true
+    end
+
+    return false
+end
+
+
+function starpu_parse_block(x :: Expr)
+
+    if (x.head != :block)
+        error("Invalid \"block\" expression")
+    end
+
+    exprs = map(starpu_parse, filter(!is_unwanted, x.args))
+
+    #=for y in x.args
+
+        if (is_unwanted(y))
+            continue
+        end
+
+        push!(exprs, starpu_parse(y))
+    end
+    =#
+    #if (length(exprs) == 1)
+    #    return exprs[1]  #TODO : let 1 instruction blocks be a thing ?
+    #end
+
+    return StarpuExprBlock(exprs)
+end
+
+
+function print(io :: IO, x :: StarpuExprBlock ; indent = 0)
+    for i in (1 : length(x.exprs))
+        print(io, x.exprs[i], indent = indent)
+        print(io, ";")
+        if (i != length(x.exprs))
+            print_newline(io, indent)
+        end
+    end
+end
+
+
+
+
+function apply(func :: Function, expr :: StarpuExprBlock)
+
+    return func(StarpuExprBlock(map((x -> apply(func, x)), expr.exprs)))
+end

+ 75 - 0
julia/src/Compiler/Expressions/call.jl

@@ -0,0 +1,75 @@
+
+#======================================================
+                FUNCTION CALL
+======================================================#
+
+
+struct StarpuExprCall <: StarpuExpr
+    func :: Symbol
+    args :: Vector{StarpuExpr}
+end
+
+
+function starpu_parse_call(x :: Expr)
+
+    if (x.head != :call)
+        error("Invalid \"call\" expression")
+    end
+
+    func = starpu_parse(x.args[1])
+
+    if (!isa(func, StarpuExprVar))
+        error("Invalid \"call\" expression : function must be a variable")
+    end
+
+    args = map(starpu_parse, x.args[2:end])
+
+    return StarpuExprCall(func.name, args)
+end
+
+
+starpu_infix_operators = (:(+), :(*), :(-), :(/), :(<), :(>), :(<=), :(>=), :(%))
+
+
+function print_prefix(io :: IO, x :: StarpuExprCall ; indent = 0)
+
+    print(io, x.func, "(")
+
+    for i in (1 : length(x.args))
+        if (i != 1)
+            print(io, ", ")
+        end
+        print(io, x.args[i], indent = indent)
+    end
+
+    print(io, ")")
+end
+
+
+function print_infix(io :: IO, x :: StarpuExprCall ; indent = 0)
+    for i in (1 : length(x.args))
+        if (i != 1)
+            print(io, " ", x.func, " ")
+        end
+        print(io, "(")
+        print(io, x.args[i], indent = indent)
+        print(io, ")")
+    end
+end
+
+function print(io :: IO, x :: StarpuExprCall ; indent = 0)
+
+    if (length(x.args) >= 2 && x.func in starpu_infix_operators)
+        print_infix(io, x, indent = indent)
+    else
+        print_prefix(io, x, indent = indent)
+    end
+end
+
+
+
+
+function apply(func :: Function, expr :: StarpuExprCall)
+
+    return func(StarpuExprCall(expr.func, map((x -> apply(func, x)), expr.args)))
+end

+ 60 - 0
julia/src/Compiler/Expressions/cuda_call.jl

@@ -0,0 +1,60 @@
+
+
+#======================================================
+                CUDA KERNEL CALL
+======================================================#
+
+
+
+struct StarpuExprCudaCall <: StarpuExpr
+
+    ker_name :: Symbol
+
+    nblocks :: StarpuExpr
+    threads_per_block :: StarpuExpr
+
+    args :: Vector{StarpuExpr}
+
+end
+
+
+function print(io :: IO, expr :: StarpuExprCudaCall ; indent = 0)
+
+    print_newline(io, indent)
+    print(io, expr.ker_name)
+    print_newline(io, indent + starpu_indent_size)
+    print(io, "<<< ")
+    print(io, expr.nblocks, indent = indent + 2 * starpu_indent_size)
+    print(io, ", ")
+    print(io, expr.threads_per_block, indent = indent + 2 * starpu_indent_size)
+    print(io, ", 0, starpu_cuda_get_local_stream()")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, ">>> (")
+
+    for i in (1 : length(expr.args))
+
+        if (i != 1)
+            print(io, ", ")
+            if (i % 4 == 1)
+                print_newline(io, indent + 2 * starpu_indent_size + 1)
+            end
+        end
+
+        print(io, expr.args[i], indent = indent + 2 * starpu_indent_size)
+
+    end
+
+    print(io, ");")
+    print_newline(io, indent)
+
+end
+
+
+function apply(func :: Function, expr :: StarpuExprCudaCall)
+
+    nblocks = func(expr.nblocks)
+    threads_per_block = func(expr.threads_per_block)
+    args = map((x -> apply(func, x)), expr.args)
+
+    return StarpuExprCudaCall(expr.ker_name, nblocks, threads_per_block, args)
+end

+ 44 - 0
julia/src/Compiler/Expressions/field.jl

@@ -0,0 +1,44 @@
+
+
+#======================================================
+                STRUCTURE FIELDS
+======================================================#
+
+
+
+struct StarpuExprField <: StarpuExpr
+
+    left :: StarpuExpr
+    field :: Symbol
+
+    is_an_arrow :: Bool
+end
+
+
+function starpu_parse_field(x :: Expr)
+
+    if x.head != :(.) || length(x.args) != 2
+        error("Invalid parsing of dot expression")
+    end
+
+    left = starpu_parse(x.args[1])
+
+    if (!isa(x.args[2], QuoteNode) || !isa(x.args[2].value, Symbol))
+        error("Invalid parsing of dot expression")
+    end
+
+    return StarpuExprField(left, x.args[2].value, false)
+end
+
+
+function print(io :: IO, x :: StarpuExprField ; indent = 0)
+    print(io, "(")
+    print(io, x.left, indent = indent)
+    print(io, ")", x.is_an_arrow ? "->" : '.', x.field)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprField)
+    return func(StarpuExprField(func(expr.left), expr.field, expr.is_an_arrow))
+end

+ 89 - 0
julia/src/Compiler/Expressions/for.jl

@@ -0,0 +1,89 @@
+
+#======================================================
+                FOR LOOPS
+======================================================#
+
+
+struct StarpuExprFor <: StarpuExpr
+
+    iter :: Symbol
+    set:: StarpuExprInterval
+    body :: StarpuExpr
+
+    is_independant :: Bool
+    set_declarations :: Vector{StarpuExpr}
+
+end
+
+
+
+function starpu_parse_for(x :: Expr; is_independant = false)
+
+    if (x.head != :for)
+        error("Invalid \"for\" expression")
+    end
+
+    affect = x.args[1]
+
+    if (affect.head != :(=))
+        error("Invalid \"for\" iterator affectation")
+    end
+
+    iter = starpu_parse(affect.args[1])
+
+    if (!isa(iter, StarpuExprVar))
+        error("Invalid \"for\" iterator")
+    end
+
+    set = starpu_parse(affect.args[2])
+
+    if (!isa(set, StarpuExprInterval))
+        error("Set of values in \"for\" loop must be an interval")
+    end
+
+    body = starpu_parse(x.args[2])
+
+    return StarpuExprFor(iter.name, set, body, is_independant, StarpuExpr[])
+end
+
+
+
+
+
+function print(io :: IO, x :: StarpuExprFor ; indent = 0)
+
+    print_newline(io, indent)
+    print(io, StarpuExprBlock(x.set_declarations), indent = indent)
+
+    id = x.set.id
+
+    start = "start_" * id
+    stop = "stop_" * id
+    step = "step_" * id
+    dim = "dim_" * id
+    iter = "iter_" * id
+
+    print_newline(io, indent, 2)
+    print(io, "for ($iter = 0, $(x.iter) = $start ; ")
+    print(io, "$iter < $dim ; ")
+    print(io, "$iter += 1, $(x.iter) += $step)")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprFor)
+
+    set_declarations = map( (x -> apply(func, x)), expr.set_declarations)
+    set = apply(func, expr.set)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprFor(expr.iter, set, body, expr.is_independant, set_declarations))
+end

+ 85 - 0
julia/src/Compiler/Expressions/function.jl

@@ -0,0 +1,85 @@
+
+
+#======================================================
+                FUNCTION DECLARATION
+======================================================#
+
+
+struct StarpuExprFunction <: StarpuExpr
+    ret_type :: Type
+    func :: Symbol
+    args :: Vector{StarpuExprTypedVar}
+    body :: StarpuExpr
+end
+
+
+function starpu_parse_function(x :: Expr)
+
+    if (x.head != :function)
+        error("Invalid \"function\" expression")
+    end
+
+    typed_decl = starpu_parse(x.args[1])
+
+    if (!isa(typed_decl, StarpuExprTypedExpr))
+        error("Invalid \"function\" prototype : a return type must me explicited")
+    end
+
+    prototype = typed_decl.expr
+
+    if (!isa(prototype, StarpuExprCall))
+        error("Invalid \"function\" prototype")
+    end
+
+    arg_list = StarpuExprTypedVar[]
+
+    for type_arg in prototype.args
+        if (!isa(type_arg, StarpuExprTypedVar))
+            error("Invalid \"function\" argument list")
+        end
+        push!(arg_list, type_arg)
+    end
+
+    body = starpu_parse(x.args[2])
+
+    return StarpuExprFunction(typed_decl.typ, prototype.func, arg_list, body)
+end
+
+
+
+function print(io :: IO, x :: StarpuExprFunction ; indent = 0)
+
+    print(io, starpu_type_traduction(x.ret_type), " ")
+    print(io, x.func, '(')
+
+    for i in (1 : length(x.args))
+
+        if (i != 1)
+            print(io, ", ")
+            if (i % 4 == 1)
+                print_newline(io, indent + starpu_indent_size + length(String(x.func)) + 13)
+            end
+        end
+
+        print(io, x.args[i], indent = indent + starpu_indent_size)
+    end
+
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}\n\n")
+    print_newline(io, indent)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprFunction)
+
+    args = map((x -> apply(func, x)), expr.args)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprFunction(expr.ret_type, expr.func, args, body))
+end

+ 94 - 0
julia/src/Compiler/Expressions/if.jl

@@ -0,0 +1,94 @@
+
+
+#======================================================
+                IF STATEMENT
+======================================================#
+
+
+
+struct StarpuExprIf <: StarpuExpr
+    cond :: StarpuExpr
+    then_statement :: StarpuExpr
+end
+
+
+struct StarpuExprIfElse <: StarpuExpr
+    cond :: StarpuExpr
+    then_statement :: StarpuExpr
+    else_statement :: StarpuExpr
+end
+
+
+function starpu_parse_if(x :: Expr)
+
+    if (x.head != :if)
+        error("Invalid \"if\" expression")
+    end
+
+    len = length(x.args)
+
+    if (len < 2)
+        error("Invalid \"if\" statement")
+    end
+
+    cond = starpu_parse(x.args[1])
+    then_statement = starpu_parse(x.args[2])
+
+    if (len == 2)
+        return StarpuExprIf(cond, then_statement)
+    end
+
+    else_statement = starpu_parse(x.args[3])
+
+    return StarpuExprIfElse(cond, then_statement, else_statement)
+end
+
+
+function print(io :: IO, x :: Union{StarpuExprIf, StarpuExprIfElse}; indent = 0)
+
+    print_newline(io, indent)
+    print(io, "if (")
+    print(io, x.cond, indent = indent + starpu_indent_size)
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.then_statement, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+
+    if (!isa(x, StarpuExprIfElse))
+        return
+    end
+
+    print(io, " else")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.else_statement, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprIf)
+
+    cond = apply(func, expr.cond)
+    then_statement = apply(func, expr.then_statement)
+
+    return func(StarpuExprIf(cond, then_statement))
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprIfElse)
+
+    cond = apply(func, expr.cond)
+    then_statement = apply(func, expr.then_statement)
+    else_statement = apply(func, expr.else_statement)
+
+    return func(StarpuExprIfElse(cond, then_statement, else_statement))
+end

+ 48 - 0
julia/src/Compiler/Expressions/interval.jl

@@ -0,0 +1,48 @@
+
+#======================================================
+                INTERVALS
+======================================================#
+
+
+struct StarpuExprInterval <: StarpuExpr
+    start :: StarpuExpr
+    step :: StarpuExpr
+    stop :: StarpuExpr
+
+    id :: String
+
+    function StarpuExprInterval(start :: StarpuExpr, step :: StarpuExpr, stop :: StarpuExpr ; id :: String = rand_string())
+        return new(start, step, stop, id)
+    end
+
+end
+
+
+function starpu_parse_interval(x :: Expr)
+
+    if (x.head != :(:))
+        error("Invalid \"interval\" expression")
+    end
+
+    start = starpu_parse(x.args[1])
+    steop = starpu_parse(x.args[2])
+
+    if (length(x.args) == 2)
+        return StarpuExprInterval(start, (starpu_parse(Symbol(1))), steop)
+    end
+
+    stop = starpu_parse(x.args[3])
+
+    return StarpuExprInterval(start, steop, stop)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprInterval)
+
+    start = apply(func, expr.start)
+    step = apply(func, expr.step)
+    stop = apply(func, expr.stop)
+
+    return func(StarpuExprInterval(start, step, stop, id = expr.id))
+end

+ 70 - 0
julia/src/Compiler/Expressions/ref.jl

@@ -0,0 +1,70 @@
+
+#======================================================
+                ARRAYS AND REFERENCES
+======================================================#
+
+
+struct StarpuExprRef <: StarpuExpr
+    ref :: StarpuExpr
+    indexes :: Vector{StarpuExpr}
+end
+
+
+function starpu_parse_ref(x :: Expr)
+
+    if (x.head != :ref)
+        error("Invalid \"reference\" expression")
+    end
+
+    ref = starpu_parse(x.args[1])
+    indexes = map(starpu_parse, x.args[2:end])
+
+    #=
+    StarpuExpr[]
+
+    for i in (2 : length(x.args))
+        push!(indexes, starpu_parse(x.args[i]))
+    end=#
+
+    return StarpuExprRef(ref, indexes)
+end
+
+
+
+function equals(x :: StarpuExprRef, y :: StarpuExpr)
+
+    if typeof(y) != StarpuExprRef
+        return false
+    end
+
+    if !equals(x.ref, y.ref) || length(x.indexes) != length(y.indexes)
+        return false
+    end
+
+    return all(map(equals, x.indexes, y.indexes))
+end
+
+
+
+
+function print(io :: IO, x :: StarpuExprRef ; indent = 0)
+
+    print(io, x.ref, indent = indent)
+
+    for i in (1 : length(x.indexes))
+        print(io, "[")
+        print(io, x.indexes[i], indent = indent)
+        print(io, "]")
+    end
+
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprRef)
+
+    ref = apply(func, expr.ref)
+    indexes = map((x -> apply(func, x)), expr.indexes)
+
+    return func(StarpuExprRef(ref, indexes))
+end

+ 33 - 0
julia/src/Compiler/Expressions/return.jl

@@ -0,0 +1,33 @@
+
+#======================================================
+                RETURN EXPRESSION
+======================================================#
+
+
+struct StarpuExprReturn <: StarpuExpr
+    value :: StarpuExpr
+end
+
+function starpu_parse_return(x :: Expr)
+
+    if (x.head != :return)
+        error("Invalid \"return\" expression")
+    end
+
+    value = starpu_parse(x.args[1])
+
+    return StarpuExprReturn(value)
+end
+
+
+function print(io :: IO, x :: StarpuExprReturn ; indent = 0)
+    print(io, "return ")
+    print(io, x.value, indent = indent)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprReturn)
+
+    return func(StarpuExprReturn(apply(func, expr.value)))
+end

+ 63 - 0
julia/src/Compiler/Expressions/simple_expressions.jl

@@ -0,0 +1,63 @@
+
+
+abstract type StarpuExpr end
+
+
+function apply(func :: Function, expr :: StarpuExpr)
+    return func(expr)
+end
+
+
+
+
+struct StarpuExprVar <: StarpuExpr
+    name :: Symbol
+end
+
+print(io :: IO, x :: StarpuExprVar ; indent = 0) = print(io, x.name)
+
+
+
+struct StarpuExprValue <: StarpuExpr
+    value :: Any
+end
+
+
+function print(io :: IO, x :: StarpuExprValue ; indent = 0)
+
+    value = x.value
+
+    if value == nothing
+        return
+    end
+
+    if isa(value, AbstractString)
+        print(io, '"', value, '"')
+        return
+    end
+
+    if isa(value, Char)
+        print(io, '\'', value, '\'')
+        return
+    end
+
+    print(io, value)
+end
+
+
+
+
+struct StarpuExprInvalid <: StarpuExpr
+end
+
+print(io :: IO, x :: StarpuExprInvalid ; indent = 0) = print(io, "INVALID")
+
+
+
+function starpu_parse(raw_value :: Any)
+    return StarpuExprValue(raw_value)
+end
+
+function starpu_parse(sym :: Symbol)
+    return StarpuExprVar(sym)
+end

+ 109 - 0
julia/src/Compiler/Expressions/typed.jl

@@ -0,0 +1,109 @@
+
+#======================================================
+                TYPED EXPRESSION
+======================================================#
+
+
+
+
+
+
+
+abstract type StarpuExprTyped <: StarpuExpr end
+
+
+struct StarpuExprTypedVar <: StarpuExprTyped
+    name :: Symbol
+    typ :: Type
+end
+
+struct StarpuExprTypedExpr <: StarpuExprTyped # TODO : remove typed expression ?
+    expr :: StarpuExpr
+    typ :: Type
+end
+
+
+function starpu_parse_typed(x :: Expr)
+
+    if (x.head != :(::))
+        error("Invalid type assigned expression")
+    end
+
+    expr = starpu_parse(x.args[1])
+    typ = nothing
+
+    try
+        typ = eval(x.args[2]) :: Type
+    catch
+        error("Invalid type in type assigned expression")
+    end
+
+    if (isa(expr, StarpuExprVar))
+        return StarpuExprTypedVar(expr.name, typ)
+    end
+
+    return StarpuExprTypedExpr(expr, typ)
+end
+
+
+
+
+
+starpu_type_traduction_dict = Dict(
+    Void => "void",
+    Int32 => "int32_t",
+    UInt32 => "uint32_t",
+    Float32 => "float",
+    Int64 => "int64_t",
+    UInt64 => "uint64_t",
+    Float64 => "double"
+)
+
+
+
+function starpu_type_traduction(x)
+
+    if x <: Array
+        return starpu_type_traduction_array(x)
+    end
+
+    if x <: Ptr
+        return starpu_type_traduction(eltype(x)) * "*"
+    end
+
+    return starpu_type_traduction_dict[x]
+
+end
+
+
+function starpu_type_traduction_array(x :: Type{Array{T,N}}) where {T,N}
+
+    output = starpu_type_traduction(T)
+
+    for i in (1 : N)
+        output *= "*"
+    end
+
+    return output
+end
+
+
+
+function print(io :: IO, x :: StarpuExprTyped ; indent = 0)
+
+    if (isa(x, StarpuExprTypedVar))
+        print(io, starpu_type_traduction(x.typ), " ")
+        print(io, x.name)
+    else
+        print(io, x.expr, indent = indent)
+    end
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprTypedExpr)
+
+    new_expr = apply(func, expr.expr)
+
+    return func(StarpuExprTypedExpr(new_expr, expr.typ))
+end

+ 53 - 0
julia/src/Compiler/Expressions/while.jl

@@ -0,0 +1,53 @@
+
+#======================================================
+                While loop
+======================================================#
+
+
+struct StarpuExprWhile <: StarpuExpr
+    cond :: StarpuExpr
+    body :: StarpuExpr
+end
+
+function starpu_parse_while(x :: Expr)
+
+    if (x.head != :while)
+        error("Invalid \"while\" loop")
+    end
+
+    len = length(x.args)
+
+    if (len < 2)
+        error("Invalid \"while\" loop")
+    end
+
+    cond = starpu_parse(x.args[1])
+    body = starpu_parse(x.args[2])
+
+    return StarpuExprWhile(cond, body)
+end
+
+
+function print(io :: IO, x :: StarpuExprWhile ; indent = 0)
+    print_newline(io, indent)
+    print(io, "while (")
+    print(io, x.cond, indent = indent + starpu_indent_size)
+    print(io, ")")
+    print_newline(io, indent)
+    print(io, "{")
+    print_newline(io, indent + starpu_indent_size)
+    print(io, x.body, indent = indent + starpu_indent_size)
+    print_newline(io, indent)
+    print(io, "}")
+    print_newline(io, indent)
+end
+
+
+
+function apply(func :: Function, expr :: StarpuExprWhile)
+
+    cond = apply(func, expr.cond)
+    body = apply(func, expr.body)
+
+    return func(StarpuExprWhile(cond, body))
+end

+ 62 - 0
julia/src/Compiler/Generate_files/c_files.jl

@@ -0,0 +1,62 @@
+
+
+
+global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
+
+const cpu_kernel_file_start = "#include <stdio.h>
+#include <stdint.h>
+#include <starpu.h>
+
+static inline long long jlstarpu_max(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
+{
+    if (stop >= start){
+            return jlstarpu_max(0, (stop - start + 1) / step);
+    } else {
+            return jlstarpu_max(0, (stop - start - 1) / step);
+    }
+}
+
+"
+
+
+
+function starpu_new_cpu_kernel_file(file_name :: String)
+
+    global generated_cpu_kernel_file_name = file_name
+
+    kernel_file = open(file_name, "w")
+    print(kernel_file, cpu_kernel_file_start)
+    close(kernel_file)
+
+    return nothing
+end
+
+
+
+macro cpu_kernel(x)
+
+    starpu_expr = transform_to_cpu_kernel(starpu_parse(x))
+
+    return quote
+
+        to_stdout = ($(esc(generated_cpu_kernel_file_name)) == "PRINT TO STDOUT")
+
+        if to_stdout
+			println("\nNo specified CPU kernel file to write into : writting to STDOUT instead\n")
+            kernel_file = STDOUT
+        else
+            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "a+")
+        end
+
+        print(kernel_file, $starpu_expr)
+
+        if (!to_stdout)
+            close(kernel_file)
+        end
+    end
+end

+ 125 - 0
julia/src/Compiler/Generate_files/cuda_files.jl

@@ -0,0 +1,125 @@
+
+
+
+global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
+
+const cuda_kernel_file_start = "#include <stdio.h>
+#include <stdint.h>
+#include <starpu.h>
+
+#define THREADS_PER_BLOCK 64
+
+static inline long long jlstarpu_max(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+static inline long long jlstarpu_interval_size(long long start, long long step, long long stop)
+{
+    if (stop >= start){
+            return jlstarpu_max(0, (stop - start + 1) / step);
+    } else {
+            return jlstarpu_max(0, (stop - start - 1) / step);
+    }
+}
+
+
+__device__ static inline long long jlstarpu_max__device(long long a, long long b)
+{
+	return (a > b) ? a : b;
+}
+
+__device__ static inline long long jlstarpu_interval_size__device(long long start, long long step, long long stop)
+{
+	if (stop >= start){
+		return jlstarpu_max__device(0, (stop - start + 1) / step);
+	} else {
+		return jlstarpu_max__device(0, (stop - start - 1) / step);
+	}
+}
+
+
+"
+
+
+function starpu_new_cuda_kernel_file(file_name :: String)
+
+    global generated_cuda_kernel_file_name = file_name
+
+    kernel_file = open(file_name, "w")
+    print(kernel_file, cuda_kernel_file_start)
+    close(kernel_file)
+
+    return nothing
+end
+
+
+
+macro cuda_kernel(x)
+
+    prekernel, kernel = transform_to_cuda_kernel(starpu_parse(x))
+
+    return quote
+
+        to_stdout = ($(esc(generated_cuda_kernel_file_name)) == "PRINT TO STDOUT")
+
+        if to_stdout
+			println("\nNo specified CUDA kernel file to write into : writting to STDOUT instead\n")
+            kernel_file = STDOUT
+        else
+            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "a+")
+        end
+
+        print(kernel_file, "__global__ ", $kernel)
+        print(kernel_file, "\nextern \"C\" ", $prekernel)
+
+        if (!to_stdout)
+            close(kernel_file)
+        end
+    end
+end
+
+
+
+
+macro cpu_cuda_kernel(x)
+
+	parsed = starpu_parse(x)
+	cpu_expr = transform_to_cpu_kernel(parsed)
+	prekernel, kernel = transform_to_cuda_kernel(parsed)
+
+	return quote
+
+		to_stdout = ($(esc(generated_cpu_kernel_file_name)) == "PRINT TO STDOUT")
+
+        if to_stdout
+            kernel_file = STDOUT
+			println("\nNo specified CPU kernel file to write into : writting to STDOUT instead\n")
+        else
+            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "a+")
+        end
+
+        print(kernel_file, $cpu_expr)
+
+        if (!to_stdout)
+            close(kernel_file)
+        end
+
+
+		to_stdout = ($(esc(generated_cuda_kernel_file_name)) == "PRINT TO STDOUT")
+
+        if to_stdout
+            kernel_file = STDOUT
+			println("\nNo specified CUDA kernel file to write into : writting to STDOUT instead\n")
+        else
+            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "a+")
+        end
+
+        print(kernel_file, "__global__ ", $kernel)
+        print(kernel_file, "\nextern \"C\" ", $prekernel)
+
+        if (!to_stdout)
+            close(kernel_file)
+        end
+	end
+end

+ 39 - 0
julia/src/Compiler/Generate_files/so_files.jl

@@ -0,0 +1,39 @@
+
+
+
+function compile_cpu_kernels(output_file :: String)
+
+    starpu_cflags = readstring(`pkg-config --cflags starpu-1.3`)[1:end-1]
+    starpu_libs = readstring(`pkg-config --libs starpu-1.3`)[1:end-1]
+    options = "-O3 -shared -fPIC"
+
+    system("gcc $generated_cpu_kernel_file_name $options $starpu_cflags $starpu_libs -o $output_file")
+
+    global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
+
+    return nothing
+end
+
+
+function compile_cuda_kernels(output_file :: String)
+
+    starpu_cflags = readstring(`pkg-config --cflags starpu-1.3`)[1:end-1]
+    starpu_libs = readstring(`pkg-config --libs starpu-1.3`)[1:end-1]
+    options = " -O3 --shared --compiler-options \'-fPIC\' "
+
+    system("nvcc $generated_cuda_kernel_file_name $options $starpu_cflags $starpu_libs -o $output_file")
+
+    global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
+
+    return nothing
+end
+
+
+
+function combine_kernel_files(output_file :: String, input_files :: Vector{String})
+
+    input_str = (*)(map((x -> x * " "), input_files)...)
+
+    system("gcc -shared -fPIC $input_str -o $output_file")
+
+end

+ 91 - 0
julia/src/Compiler/expression_manipulation.jl

@@ -0,0 +1,91 @@
+
+function substitute(expr :: StarpuExpr, expr_to_replace :: StarpuExpr, new_expr :: StarpuExpr)
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if (x == expr_to_replace)
+            return new_expr
+        end
+
+        return x
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+function replace_pattern(expr :: StarpuExpr, replace_€ :: StarpuExpr...)
+
+    replace_index = 0
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if x == @parse €
+            replace_index += 1
+            return replace_€[replace_index]
+        end
+
+        if isa(x, StarpuExprTypedVar) && x.name == :€
+
+            replace_index += 1
+
+            if isa(replace_€[replace_index], StarpuExprVar)
+                return StarpuExprTypedVar(replace_€[replace_index].name, x.typ)
+            end
+
+            return StarpuExprTypedExpr(replace_€[replace_index], x.typ)
+        end
+
+        if isa(x, StarpuExprFunction) && x.func == :€
+
+            replace_index += 1
+
+            if !(isa(replace_€[replace_index], StarpuExprVar))
+                error("Can only replace a function name by a variable")
+            end
+
+            return StarpuExprFunction(x.ret_type, replace_€[replace_index].name, x.args, x.body)
+        end
+
+        return x
+    end
+
+    return apply(func_to_apply, expr)
+end
+
+
+
+import Base.any
+function any(cond :: Function, expr :: StarpuExpr)
+
+    err_to_catch = "Catch me, condition is true somewhere !"
+
+    function func_to_apply(x :: StarpuExpr)
+
+        if cond(x)
+            error(err_to_catch) # dirty but osef
+        end
+
+        return x
+    end
+
+    try
+        apply(func_to_apply, expr)
+    catch err
+
+        if (isa(err, ErrorException) && err.msg == err_to_catch)
+            return true
+        end
+
+        throw(err)
+    end
+
+    return false
+end
+
+
+import Base.all
+function all(cond :: Function, expr :: StarpuExpr)
+    return !any(!cond, expr)
+end

+ 39 - 0
julia/src/Compiler/include.jl

@@ -0,0 +1,39 @@
+
+
+
+include("utils.jl")
+
+include("Expressions/simple_expressions.jl")
+include("Expressions/affect.jl")
+include("Expressions/block.jl")
+include("Expressions/call.jl")
+include("Expressions/cuda_call.jl")
+include("Expressions/field.jl")
+include("Expressions/interval.jl")
+include("Expressions/for.jl")
+include("Expressions/typed.jl")
+include("Expressions/function.jl")
+include("Expressions/if.jl")
+include("Expressions/ref.jl")
+include("Expressions/return.jl")
+include("Expressions/while.jl")
+
+include("parsing.jl")
+
+include("expression_manipulation.jl")
+
+include("C/substitute_args.jl")
+include("C/substitute_func_calls.jl")
+include("C/substitute_indexing.jl")
+include("C/add_for_loop_declarations.jl")
+include("C/flatten_blocks.jl")
+include("C/create_cpu_kernel.jl")
+
+include("Cuda/indep_for.jl")
+include("Cuda/indep_for_kernel_ids.jl")
+include("Cuda/create_cuda_kernel.jl")
+
+
+include("Generate_files/c_files.jl")
+include("Generate_files/cuda_files.jl")
+include("Generate_files/so_files.jl")

+ 50 - 0
julia/src/Compiler/parsing.jl

@@ -0,0 +1,50 @@
+
+
+#======================================================
+                GLOBAL PARSING
+======================================================#
+
+
+
+starpu_parse_key_word_parsing_function = Dict{Symbol, Function}()
+
+
+function starpu_parse(x :: Expr)
+
+    if (x.head == :macrocall)
+
+        if (x.args[1] != Symbol("@indep"))
+            error("Only @indep macro, used before a for loop, is allowed ($(x.args[1]) was found)")
+        end
+
+        if (length(x.args) != 2)
+            error("Invalid usage of @indep macro")
+        end
+
+        return starpu_parse_for(x.args[2], is_independant = true)
+    end
+
+
+    if !(x.head in keys(starpu_parse_key_word_parsing_function))
+        return StarpuExprInvalid() #TODO error ?
+    end
+
+    return starpu_parse_key_word_parsing_function[x.head](x)
+
+end
+
+for kw in (:if, :call, :for, :block, :return, :function, :while, :ref)
+    starpu_parse_key_word_parsing_function[kw] = eval(Symbol(:starpu_parse_, kw))
+end
+
+starpu_parse_key_word_parsing_function[:(:)] = starpu_parse_interval
+starpu_parse_key_word_parsing_function[:(::)] = starpu_parse_typed
+starpu_parse_key_word_parsing_function[:(=)] = starpu_parse_affect
+starpu_parse_key_word_parsing_function[:(.)] = starpu_parse_field
+
+
+
+macro parse(x)
+    y = Expr(:quote, x)
+    :(starpu_parse($y))
+end

+ 50 - 0
julia/src/Compiler/utils.jl

@@ -0,0 +1,50 @@
+
+import Base.print
+
+
+function print_newline(io :: IO, indent = 0, n_lines = 1)
+
+    for i in (1 : n_lines)
+        print(io, "\n")
+    end
+
+    for i in (1 : indent)
+        print(io, " ")
+    end
+end
+
+starpu_indent_size = 4
+
+
+
+
+function rand_char()
+
+    r = rand(UInt) % 62
+
+    if (0 <= r < 10)
+        return '0' + r
+    elseif (10 <= r < 36)
+        return 'a' + (r - 10)
+    else
+        return 'A' + (r - 36)
+    end
+end
+
+function rand_string(size = 8)
+
+    output = ""
+
+    for i in (1 : size)
+        output *= string(rand_char())
+    end
+
+    return output
+end
+
+
+
+
+function system(cmd :: String)
+    ccall((:system, "libc"), Cint, (Cstring,), cmd)
+end

+ 24 - 0
julia/src/Wrapper/C/jlstarpu.h

@@ -0,0 +1,24 @@
+/*
+ * jlstarpu.h
+ *
+ *  Created on: 27 juin 2018
+ *      Author: ajuven
+ */
+
+#ifndef JLSTARPU_H_
+#define JLSTARPU_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <starpu.h>
+#include <pthread.h>
+
+
+#include "jlstarpu_utils.h"
+#include "jlstarpu_task.h"
+
+
+
+
+
+#endif /* JLSTARPU_H_ */

+ 162 - 0
julia/src/Wrapper/C/jlstarpu_data_handles.c

@@ -0,0 +1,162 @@
+
+#include "jlstarpu.h"
+
+
+
+
+#if 0
+void print_vector_interface(struct starpu_vector_interface * i)
+{
+	printf("Vector interface at %p\n", i);
+	printf("\tdev_handle : %p\n", i->dev_handle);
+	printf("\telement_size : %u\n", i->elemsize);
+	printf("\tnx : %u\n", i->nx);
+	printf("\toffset : %u\n", i->offset);
+	printf("\tptr : %p\n", i->ptr);
+	printf("\tslide_base : %u\n", i->slice_base);
+}
+#endif
+
+
+enum jlstarpu_data_filter_func
+{
+    JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
+    JLSTARPU_MATRIX_FILTER_BLOCK
+};
+
+
+
+struct jlstarpu_data_filter
+{
+	enum jlstarpu_data_filter_func func;
+	unsigned int nchildren;
+
+};
+
+
+void * jlstarpu_translate_data_filter_func(enum jlstarpu_data_filter_func func)
+{
+
+	switch (func){
+
+	case JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK:
+		return starpu_matrix_filter_vertical_block;
+
+	case JLSTARPU_MATRIX_FILTER_BLOCK:
+		return starpu_matrix_filter_block;
+
+	default:
+		return NULL;
+
+	}
+
+}
+
+
+void jlstarpu_translate_data_filter
+(
+		const struct jlstarpu_data_filter * const input,
+		struct starpu_data_filter * output
+)
+{
+	memset(output, 0, sizeof(struct starpu_data_filter));
+
+	output->filter_func = jlstarpu_translate_data_filter_func(input->func);
+	output->nchildren = input->nchildren;
+
+}
+
+
+
+
+
+
+
+
+
+void jlstarpu_data_partition
+(
+		starpu_data_handle_t handle,
+		const struct jlstarpu_data_filter * const jl_filter
+)
+{
+	struct starpu_data_filter filter;
+	jlstarpu_translate_data_filter(jl_filter, &filter);
+
+	starpu_data_partition(handle, &filter);
+
+}
+
+
+void jlstarpu_data_map_filters_1_arg
+(
+		starpu_data_handle_t handle,
+		const struct jlstarpu_data_filter * const jl_filter
+)
+{
+	struct starpu_data_filter filter;
+	jlstarpu_translate_data_filter(jl_filter, &filter);
+
+	starpu_data_map_filters(handle, 1, &filter);
+
+}
+
+
+void jlstarpu_data_map_filters_2_arg
+(
+		starpu_data_handle_t handle,
+		const struct jlstarpu_data_filter * const jl_filter_1,
+		const struct jlstarpu_data_filter * const jl_filter_2
+)
+{
+	struct starpu_data_filter filter_1;
+	jlstarpu_translate_data_filter(jl_filter_1, &filter_1);
+
+	struct starpu_data_filter filter_2;
+	jlstarpu_translate_data_filter(jl_filter_2, &filter_2);
+
+
+	starpu_data_map_filters(handle, 2, &filter_1, &filter_2);
+
+}
+
+
+
+
+#define JLSTARPU_GET(interface, field, ret_type)\
+	\
+	ret_type jlstarpu_##interface##_get_##field(const struct starpu_##interface##_interface * const x)\
+	{\
+		return (ret_type) x->field;\
+	}\
+
+
+
+
+
+JLSTARPU_GET(vector, ptr, void *)
+JLSTARPU_GET(vector, nx, uint32_t)
+JLSTARPU_GET(vector, elemsize, size_t)
+
+
+
+JLSTARPU_GET(matrix, ptr, void *)
+JLSTARPU_GET(matrix, ld, uint32_t)
+JLSTARPU_GET(matrix, nx, uint32_t)
+JLSTARPU_GET(matrix, ny, uint32_t)
+JLSTARPU_GET(matrix, elemsize, size_t)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 16 - 0
julia/src/Wrapper/C/jlstarpu_simple_functions.c

@@ -0,0 +1,16 @@
+#include "jlstarpu.h"
+
+
+
+
+int jlstarpu_init(void)
+{
+	return starpu_init(NULL);
+}
+
+
+
+void jlstarpu_set_to_zero(void * ptr, unsigned int size)
+{
+	memset(ptr, 0, size);
+}

+ 62 - 0
julia/src/Wrapper/C/jlstarpu_task.h

@@ -0,0 +1,62 @@
+/*
+ * jlstarpu_task.h
+ *
+ *  Created on: 27 juin 2018
+ *      Author: ajuven
+ */
+
+#ifndef JLSTARPU_TASK_H_
+#define JLSTARPU_TASK_H_
+
+
+#include "jlstarpu.h"
+
+
+
+
+
+
+struct jlstarpu_codelet
+{
+	uint32_t where;
+
+	starpu_cpu_func_t cpu_func;
+	char * cpu_func_name;
+
+	starpu_cuda_func_t cuda_func;
+
+	int nbuffer;
+	enum starpu_data_access_mode * modes;
+
+	struct starpu_perfmodel * model;
+
+};
+
+
+
+struct jlstarpu_task
+{
+	struct starpu_codelet * cl;
+	starpu_data_handle_t * handles;
+	unsigned int synchronous;
+
+	void * cl_arg;
+	size_t cl_arg_size;
+};
+
+
+#if 0
+
+struct cl_args_decorator
+{
+	struct jlstarpu_function_launcher * launcher;
+	void * cl_args;
+};
+
+#endif
+
+
+
+
+
+#endif /* JLSTARPU_TASK_H_ */

+ 192 - 0
julia/src/Wrapper/C/jlstarpu_task_submit.c

@@ -0,0 +1,192 @@
+/*
+ * jlstarpu_task_submit.c
+ *
+ *  Created on: 27 juin 2018
+ *      Author: ajuven
+ */
+
+
+#include "jlstarpu.h"
+
+
+struct starpu_codelet * jlstarpu_new_codelet()
+{
+	struct starpu_codelet * output;
+	TYPE_MALLOC(output, 1);
+
+	starpu_codelet_init(output);
+
+	return output;
+}
+
+
+#if 0
+struct starpu_codelet * jlstarpu_translate_codelet(struct jlstarpu_codelet * const input)
+{
+	struct starpu_codelet * output;
+	TYPE_MALLOC(output, 1);
+
+	starpu_codelet_init(output);
+
+
+
+	output->where = input->where;
+
+	output->cpu_funcs[0] = input->cpu_func;
+	output->cpu_funcs_name[0] = input->cpu_func_name;
+
+	output->cuda_funcs[0] = input->cuda_func;
+
+	output->nbuffers = input->nbuffer;
+	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
+
+	output->model = input->model;
+
+	return output;
+}
+#endif
+
+void jlstarpu_codelet_update(const struct jlstarpu_codelet * const input, struct starpu_codelet * const output)
+{
+	output->where = input->where;
+
+	output->cpu_funcs[0] = input->cpu_func;
+	output->cpu_funcs_name[0] = input->cpu_func_name;
+
+	output->cuda_funcs[0] = input->cuda_func;
+
+	output->nbuffers = input->nbuffer;
+	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
+
+	output->model = input->model;
+
+}
+#if 0
+void jlstarpu_free_codelet(struct starpu_codelet * cl)
+{
+	free(cl);
+}
+#endif
+
+
+
+#if 0
+struct starpu_task * jlstarpu_translate_task(const struct jlstarpu_task * const input)
+{
+	struct starpu_task * output = starpu_task_create();
+
+	if (output == NULL){
+		return NULL;
+	}
+
+	output->cl = input->cl;
+	memcpy(&(output->handles), input->handles, input->cl->nbuffers * sizeof(starpu_data_handle_t));
+	output->synchronous = input->synchronous;
+
+
+	return output;
+}
+#endif
+
+
+
+void jlstarpu_task_update(const struct jlstarpu_task * const input, struct starpu_task * const output)
+{
+	output->cl = input->cl;
+	memcpy(&(output->handles), input->handles, input->cl->nbuffers * sizeof(starpu_data_handle_t));
+	output->synchronous = input->synchronous;
+	output->cl_arg = input->cl_arg;
+	output->cl_arg_size = input->cl_arg_size;
+}
+
+
+
+/*
+
+void print_perfmodel(struct starpu_perfmodel * p)
+{
+	printf("Perfmodel at address %p:\n");
+	printf("\ttype : %u\n", p->type);
+	printf("\tcost_function : %p\n", p->cost_function);
+	printf("\tarch_cost_function : %p\n", p->arch_cost_function);
+	printf("\tsize_base : %p\n", p->size_base);
+	printf("\tfootprint : %p\n", p->footprint);
+	printf("\tsymbol : %s\n", p->symbol);
+	printf("\tis_loaded : %u\n", p->is_loaded);
+	printf("\tbenchmarking : %u\n", p->benchmarking);
+	printf("\tis_init : %u\n", p->is_init);
+	printf("\tparameters : %p\n", p->parameters);
+	printf("\tparameters_names : %p\n", p->parameters_names);
+	printf("\tnparameters : %u\n", p->nparameters);
+	printf("\tcombinations : %p\n", p->combinations);
+	printf("\tncombinations : %u\n", p->ncombinations);
+	printf("\tstate : %p\n", p->state);
+
+}
+
+
+*/
+
+#if 0
+/*
+ * TODO : free memory
+ */
+int jlstarpu_task_submit(const struct jlstarpu_task * const jl_task)
+{
+	DEBUG_PRINT("Inside C wrapper");
+
+	struct starpu_task * task;
+	int ret_code;
+
+
+	DEBUG_PRINT("Translating task...");
+	task = jlstarpu_translate_task(jl_task);
+
+	if (task == NULL){
+		fprintf(stderr, "Error while creating the task.\n");
+		return EXIT_FAILURE;
+	}
+
+	DEBUG_PRINT("Task translated");
+	DEBUG_PRINT("Submitting task to StarPU...");
+	ret_code = starpu_task_submit(task);
+	DEBUG_PRINT("starpu_task_submit has returned");
+
+
+	if (ret_code != 0){
+		fprintf(stderr, "Error while submitting task.\n");
+		return ret_code;
+	}
+
+
+	DEBUG_PRINT("Done");
+	DEBUG_PRINT("END OF STARPU FUNCTION");
+
+
+	return ret_code;
+}
+
+#endif
+
+
+
+
+
+
+
+#define JLSTARPU_UPDATE_FUNC(type, field)\
+	\
+	void jlstarpu_##type##_update_##field(const struct jlstarpu_##type * const input, struct starpu_##type * const output)\
+	{\
+		output->field = input->field;\
+	}
+
+
+
+
+
+
+
+
+
+

+ 52 - 0
julia/src/Wrapper/C/jlstarpu_utils.h

@@ -0,0 +1,52 @@
+/*
+ * jlstarpu_utils.h
+ *
+ *  Created on: 27 juin 2018
+ *      Author: ajuven
+ */
+
+#ifndef JLSTARPU_UTILS_H_
+#define JLSTARPU_UTILS_H_
+
+#include "jlstarpu.h"
+
+
+#define TYPE_MALLOC(ptr, nb_elements) \
+		do {\
+			if ((nb_elements) == 0){ \
+				ptr = NULL; \
+			} else { \
+				ptr = malloc((nb_elements) * sizeof(*(ptr))); \
+				if (ptr == NULL){ \
+					fprintf(stderr, "\033[31mCRITICAL : MALLOC HAS RETURNED NULL\n\033[0m");\
+					fflush(stderr);\
+					exit(1);\
+				} \
+			} \
+		} while(0)
+
+
+
+//#define DEBUG
+#ifdef DEBUG
+
+#define DEBUG_PRINT(...)\
+		do {\
+			fprintf(stderr, "\x1B[34m%s : \x1B[0m", __FUNCTION__);\
+			fprintf(stderr, __VA_ARGS__);\
+			fprintf(stderr, "\n");\
+			fflush(stderr);\
+		} while (0)
+
+
+
+
+#else
+
+#define DEBUG_PRINT(...)
+
+#endif
+
+
+
+#endif /* JLSTARPU_UTILS_H_ */

+ 304 - 0
julia/src/Wrapper/Julia/linked_list.jl

@@ -0,0 +1,304 @@
+
+
+
+    export Link
+    mutable struct Link{T}
+
+        data :: T
+
+        previous :: Union{Nullable{Link{T}}, Link{T}}
+        next :: Union{Nullable{Link{T}}, Link{T}}
+
+        list
+
+        function Link{T}(x :: T, l) where {T}
+            output = new()
+            output.data = x
+            output.previous = Nullable{Link{T}}()
+            output.next = Nullable{Link{T}}()
+            output.list = l
+            return output
+        end
+    end
+
+
+    export LinkedList
+    mutable struct LinkedList{T}
+
+        nelement :: Int64
+
+        first :: Union{Nullable{Link{T}}, Link{T}}
+        last :: Union{Nullable{Link{T}}, Link{T}}
+
+        function LinkedList{T}() where {T}
+            output = new()
+            output.nelement = 0
+            output.first = Nullable{Link{T}}()
+            output.last = Nullable{Link{T}}()
+
+            return output
+        end
+
+    end
+
+    export add_to_head!
+    function add_to_head!(l :: LinkedList{T}, el :: T) where {T}
+
+        new_first = Link{T}(el, l)
+        old_first = l.first
+
+        l.first = new_first
+        new_first.next = old_first
+
+        if (isnull(old_first))
+            l.last = new_first
+        else
+            old_first.previous = new_first
+        end
+
+        l.nelement += 1
+
+        return new_first
+    end
+
+
+    export add_to_tail!
+    function add_to_tail!(l :: LinkedList{T}, el :: T) where {T}
+
+        new_last = Link{T}(el, l)
+        old_last = l.last
+
+        l.last = new_last
+        new_last.previous = old_last
+
+        if (isnull(old_last))
+            l.first = new_last
+        else
+            old_last.next = new_last
+        end
+
+        l.nelement += 1
+
+        return new_last
+    end
+
+
+    function LinkedList(v :: Union{Array{T,N}, NTuple{N,T}}) where {N,T}
+
+        output = LinkedList{T}()
+
+        for x in v
+            add_to_tail!(output, x)
+        end
+
+        return output
+    end
+
+
+    export remove_link!
+    function remove_link!(lnk :: Link{T}) where {T}
+
+        if (lnk.list == nothing)
+            return lnk.data
+        end
+
+        l = lnk.list
+        next = lnk.next
+        previous = lnk.previous
+
+        if (isnull(next))
+            l.last = previous
+        else
+            next.previous = previous
+        end
+
+        if (isnull(previous))
+            l.first = next
+        else
+            previous.next = next
+        end
+
+        l.nelement -= 1
+        lnk.list = nothing
+
+        return lnk.data
+    end
+
+
+    export is_linked
+    function is_linked(lnk :: Link)
+        return (lnk.list != nothing)
+    end
+
+
+
+
+
+    export foreach_asc
+    macro foreach_asc(list, lnk_iterator, expression)
+
+        quote
+            $(esc(lnk_iterator)) = $(esc(list)).first
+
+            while (!isnull($(esc(lnk_iterator))))
+                __next_lnk_iterator = $(esc(lnk_iterator)).next
+                $(esc(expression))
+                $(esc(lnk_iterator)) = __next_lnk_iterator
+            end
+        end
+    end
+
+
+    export foreach_desc
+    macro foreach_desc(list, lnk_iterator, expression)
+
+        quote
+            $(esc(lnk_iterator)) = $(esc(list)).last
+
+            while (!isnull($(esc(lnk_iterator))))
+                __next_lnk_iterator = $(esc(lnk_iterator)).previous
+                $(esc(expression))
+                $(esc(lnk_iterator)) = __next_lnk_iterator
+            end
+        end
+    end
+
+
+
+
+    function Base.show(io :: IO, lnk :: Link{T}) where {T}
+
+        print(io, "Link{$T}{data: ")
+        print(io, lnk.data)
+
+        print(io, " ; previous: ")
+
+        if (isnull(lnk.previous))
+            print(io, "NONE")
+        else
+            print(io, lnk.previous.data)
+        end
+
+        print(io, " ; next: ")
+
+        if (isnull(lnk.next))
+            print(io, "NONE")
+        else
+            print(io, lnk.next.data)
+        end
+
+        print(io, "}")
+
+    end
+
+
+
+    function Base.show(io :: IO, l :: LinkedList{T}) where {T}
+
+        print(io, "LinkedList{$T}{")
+
+        @foreach_asc l lnk begin
+
+            if (!isnull(lnk.previous))
+                print(io, ", ")
+            end
+
+            print(io, lnk.data)
+
+        end
+
+        print(io, "}")
+
+    end
+
+
+
+    import Base.start
+    function start(l :: LinkedList)
+        return nothing
+    end
+
+
+    import Base.done
+    function done(l :: LinkedList, state)
+
+        if (state == nothing)
+            return isnull(l.first)
+        end
+
+        return isnull(state.next)
+    end
+
+
+    import Base.next
+    function next(l :: LinkedList, state)
+
+        if (state == nothing)
+            next_link = l.first
+        else
+            next_link = state.next
+        end
+
+        return (next_link.data, next_link)
+    end
+
+
+    import Base.endof
+    function endof(l :: LinkedList)
+        return l.nelement
+    end
+
+    export index_to_link
+    function index_to_link(l :: LinkedList, ind)
+
+        if (ind > l.nelement || ind <= 0)
+            error("Invalid index")
+        end
+
+        lnk = l.first
+
+        for i in (1:(ind - 1))
+            lnk = lnk.next
+        end
+
+        return lnk
+    end
+
+
+    import Base.getindex
+    function getindex(l :: LinkedList, ind)
+        return index_to_link(l,ind).data
+    end
+
+    import Base.setindex!
+    function setindex!(l :: LinkedList{T}, ind, value :: T) where T
+        lnk = index_to_link(l,ind)
+        lnk.data = value
+    end
+
+
+
+
+
+    import Base.eltype
+    function eltype(l :: LinkedList{T}) where T
+        return T
+    end
+
+
+    import Base.isempty
+    function isempty(l :: LinkedList)
+        return (l.nelement == 0)
+    end
+
+
+    import Base.empty!
+    function empty!(l :: LinkedList)
+        @foreach_asc l lnk remove_link!(lnk)
+    end
+
+
+    import Base.length
+    function length(l :: LinkedList)
+        return l.nelement
+    end

+ 146 - 0
julia/src/Wrapper/Julia/starpu_codelet.jl

@@ -0,0 +1,146 @@
+
+
+export StarpuDataAccessMode
+export STARPU_NONE,STARPU_R,STARPU_W,STARPU_RW, STARPU_SCRATCH
+export STARPU_REDUX,STARPU_COMMUTE, STARPU_SSEND, STARPU_LOCALITY
+export STARPU_ACCESS_MODE_MAX
+
+@enum(StarpuDataAccessMode,
+
+    STARPU_NONE = 0,
+    STARPU_R = (1 << 0),
+    STARPU_W = (1 << 1),
+    STARPU_RW = ((1 << 0) | (1 << 1)),
+    STARPU_SCRATCH = (1 << 2),
+    STARPU_REDUX = (1 << 3),
+    STARPU_COMMUTE = (1 << 4),
+    STARPU_SSEND = (1 << 5),
+    STARPU_LOCALITY = (1 << 6),
+    STARPU_ACCESS_MODE_MAX = (1 << 7)
+
+)
+
+
+export StarpuCodelet
+struct StarpuCodelet
+
+    where_to_execute :: UInt32
+
+    cpu_func :: String
+    gpu_func :: String
+
+    modes :: Vector{StarpuDataAccessMode}
+
+    perfmodel :: StarpuPerfmodel
+
+    c_codelet :: Ptr{Void}
+
+
+    function StarpuCodelet(;
+        cpu_func :: String = "",
+        gpu_func :: String = "",
+        modes :: Vector{StarpuDataAccessMode} = StarpuDataAccessMode[],
+        perfmodel :: StarpuPerfmodel = StarpuPerfmodel(),
+        where_to_execute :: Union{Void, UInt32} = nothing
+    )
+
+        if (length(modes) > STARPU_NMAXBUFS)
+            error("Codelet has too much buffers ($(length(modes)) but only $STARPU_NMAXBUFS are allowed)")
+        end
+
+        real_c_codelet_ptr = @starpucall jlstarpu_new_codelet Ptr{Void} ()
+        push!(jlstarpu_allocated_structures, real_c_codelet_ptr)
+
+        if (where_to_execute == nothing)
+            real_where = ((cpu_func != "") * STARPU_CPU) | ((gpu_func != "") * STARPU_CUDA)
+        else
+            real_where = where_to_execute
+        end
+
+        output = new(real_where, cpu_func, gpu_func, modes, perfmodel, real_c_codelet_ptr)
+
+        starpu_c_codelet_update(output)
+
+        return output
+    end
+end
+
+
+
+function starpu_c_codelet_update(cl :: StarpuCodelet)
+
+    translating_cl = StarpuCodeletTranslator(cl)
+
+    @starpucall(jlstarpu_codelet_update,
+                Void, (Ptr{Void}, Ptr{Void}),
+                Ref{StarpuCodeletTranslator}(translating_cl),
+                cl.c_codelet
+            )
+end
+
+
+
+function load_starpu_function_pointer(func_name :: String)
+
+    if (isempty(func_name))
+        return C_NULL
+    end
+
+    func_pointer = Libdl.dlsym(starpu_tasks_library_handle, func_name)
+
+    if (func_pointer == C_NULL)
+        error("Couldn't find function symbol $func_name into extern library file $starpu_tasks_library")
+    end
+
+    return func_pointer
+end
+
+
+
+mutable struct StarpuCodeletTranslator
+
+    where_to_execute :: UInt32
+
+    cpu_func :: Ptr{Void}
+    cpu_func_name :: Cstring
+
+    gpu_func :: Ptr{Void}
+
+    nbuffers :: Cint
+    modes :: Ptr{Void}
+
+    perfmodel :: Ptr{Void}
+
+
+
+    function StarpuCodeletTranslator(cl :: StarpuCodelet)
+
+        output = new()
+
+        if (iszero(cl.where_to_execute))
+            error("StarpuCodelet field \"where_to_execute\" is empty")
+        end
+
+        output.where_to_execute = cl.where_to_execute
+
+        cpu_func_ptr = load_starpu_function_pointer(cl.cpu_func)
+        gpu_func_ptr = load_starpu_function_pointer(cl.gpu_func)
+
+        if (cpu_func_ptr == C_NULL && gpu_func_ptr == C_NULL)
+            error("No function specified inside codelet")
+        end
+
+        output.cpu_func = cpu_func_ptr
+        output.cpu_func_name = Cstring_from_String(cl.cpu_func)
+
+        output.gpu_func = gpu_func_ptr
+
+        output.nbuffers = Cint(length(cl.modes))
+        output.modes = pointer(cl.modes)
+
+        output.perfmodel = cl.perfmodel.c_perfmodel
+
+        return output
+    end
+
+end

+ 260 - 0
julia/src/Wrapper/Julia/starpu_data_handle.jl

@@ -0,0 +1,260 @@
+
+
+STARPU_MAIN_RAM = 0 #TODO: ENUM
+
+
+const StarpuDataHandlePointer = Ptr{Void}
+
+
+
+StarpuDataHandle = StarpuDestructible{StarpuDataHandlePointer}
+
+
+
+function StarpuNewDataHandle(ptr :: StarpuDataHandlePointer, destr :: Function...) :: StarpuDataHandle
+    return StarpuDestructible(ptr, destr...)
+end
+
+#=
+struct StarpuDataHandle
+
+    pointer :: StarpuDataHandlePointer
+    #destructors :: Vector{Function}
+
+    function StarpuDataHandle(ptr :: StarpuDataHandlePointer, destr = Function[])
+        return new(ptr)
+        #output = new(ptr)#, destr)
+        #push!(starpu_data_handle_list, output)
+        #return output
+    end
+
+end
+=#
+#starpu_data_handle_list = StarpuDataHandle[]
+
+
+
+function starpu_data_unregister_pointer(ptr :: StarpuDataHandlePointer)
+    @starpucall(starpu_data_unregister, Void, (Ptr{Void},), ptr)
+end
+
+
+export starpu_data_unregister
+function starpu_data_unregister(handles :: StarpuDataHandle...)
+    for h in handles
+        starpu_execute_destructor!(h, starpu_data_unregister_pointer)
+    end
+end
+
+
+
+export starpu_data_register
+
+function starpu_data_register(v :: Vector{T}) where T
+
+    output = Ref{Ptr{Void}}(0)
+    data_pointer = pointer(v)
+
+    @starpucall(starpu_vector_data_register,
+                Void,
+                (Ptr{Void}, Cint, Ptr{Void}, UInt32, Csize_t),
+                output, STARPU_MAIN_RAM, data_pointer,
+                length(v), sizeof(T)
+            )
+
+    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer])
+end
+
+
+function starpu_data_register(m :: Matrix{T}) where T
+
+    output = Ref{Ptr{Void}}(0)
+    data_pointer = pointer(m)
+    (height, width) = size(m)
+
+    @starpucall(starpu_matrix_data_register,
+                Void,
+                (Ptr{Void}, Cint, Ptr{Void},
+                    UInt32, UInt32, UInt32, Csize_t),
+                output, STARPU_MAIN_RAM, data_pointer,
+                height, height, width, sizeof(T)
+            )
+
+    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer])
+end
+
+
+function starpu_data_register(block :: Array{T,3}) where T
+
+    output = Ref{Ptr{Void}}(0)
+    data_pointer = pointer(block)
+    (height, width, depth) = size(block)
+
+    @starpucall(starpu_block_data_register,
+                Void,
+                (Ptr{Void}, Cint, Ptr{Void},
+                    UInt32, UInt32, UInt32, UInt32,
+                    UInt32, Csize_t),
+                output, STARPU_MAIN_RAM, data_pointer,
+                height, height * width,
+                height, width, depth,
+                sizeof(T)
+            )
+
+    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)
+end
+
+
+
+function starpu_data_register(ref :: Ref{T}) where T
+
+    output = Ref{Ptr{Void}}(0)
+
+    @starpucall(starpu_variable_data_register,
+                Void,
+                (Ptr{Void}, Cint, Ptr{Void}, Csize_t),
+                output, STARPU_MAIN_RAM, ref, sizeof(T)
+            )
+
+    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)
+end
+
+
+
+function starpu_data_register(x1, x2, next_args...)
+
+    handle_1 = starpu_data_register(x1)
+    handle_2 = starpu_data_register(x2)
+
+    next_handles = map(starpu_data_register, next_args)
+
+    return [handle_1, handle_2, next_handles...]
+end
+
+
+
+
+
+#function starpu_data_get_sub_data(root_data :: StarpuDataHandle, ids...)
+#
+#    nb_ids = lengths(ids)
+#    type_args = Tuple((Cint for i in (1 : nb_ids)))
+
+#    ccall((:starpu_data_get_sub_data, "libjlstarpu_c_wrapper"),
+#            Ptr{Void}, (Ptr{Void}, Cuint, type_args...),
+#            root_data, nb_ids, ids...
+#        )
+#end
+
+export starpu_data_get_sub_data
+
+function starpu_data_get_sub_data(root_data :: StarpuDataHandle, id)
+
+    output = @starpucall(starpu_data_get_sub_data,
+                        Ptr{Void}, (Ptr{Void}, Cuint, Cuint),
+                        root_data.object, 1, id - 1
+                    )
+
+    return StarpuNewDataHandle(output)
+end
+
+
+function starpu_data_get_sub_data(root_data :: StarpuDataHandle, idx, idy)
+
+    output = @starpucall(starpu_data_get_sub_data,
+                        Ptr{Void}, (Ptr{Void}, Cuint, Cuint, Cuint),
+                        root_data.object, 2, idx - 1, idy - 1
+                    )
+
+    return StarpuNewDataHandle(output)
+end
+
+import Base.getindex
+
+
+
+function Base.getindex(handle :: StarpuDataHandle, indexes...)
+     starpu_data_get_sub_data(handle, indexes...)
+ end
+
+
+
+
+export StarpuDataFilterFunc
+export STARPU_MATRIX_FILTER_VERTICAL_BLOCK, STARPU_MATRIX_FILTER_BLOCK
+
+@enum(StarpuDataFilterFunc,
+
+    STARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
+    STARPU_MATRIX_FILTER_BLOCK = 1
+)
+
+export StarpuDataFilter
+mutable struct StarpuDataFilter
+
+    filter_func :: StarpuDataFilterFunc
+    nchildren :: Cuint
+
+    function StarpuDataFilter(filter_func, nchildren)
+        output = new()
+        output.filter_func = filter_func
+        output.nchildren = Cuint(nchildren)
+        return output
+    end
+
+end
+
+
+function starpu_data_unpartition_pointer(ptr :: StarpuDataHandlePointer)
+    @starpucall(starpu_data_unpartition, Void, (Ptr{Void}, Cuint), ptr, STARPU_MAIN_RAM)
+end
+
+export starpu_data_partition
+function starpu_data_partition(handle :: StarpuDataHandle, filter :: StarpuDataFilter)
+
+    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
+
+    @starpucall(jlstarpu_data_partition,
+            Void, (Ptr{Void}, Ptr{Void}),
+            handle.object, Ref{StarpuDataFilter}(filter)
+        )
+end
+
+
+export starpu_data_unpartition
+function starpu_data_unpartition(handles :: StarpuDataHandle...)
+
+    for h in handles
+        starpu_execute_destructor!(h, starpu_data_unpartition_pointer)
+    end
+
+    return nothing
+end
+
+
+
+export starpu_data_map_filters
+
+function starpu_data_map_filters(handle :: StarpuDataHandle, filter :: StarpuDataFilter)
+
+    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
+
+    @starpucall(jlstarpu_data_map_filters_1_arg,
+            Void, (Ptr{Void}, Ptr{Void}),
+            handle.object, Ref{StarpuDataFilter}(filter)
+    )
+end
+
+
+function starpu_data_map_filters(handle :: StarpuDataHandle, filter_1 :: StarpuDataFilter, filter_2 :: StarpuDataFilter)
+
+    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
+
+    @starpucall(jlstarpu_data_map_filters_2_arg,
+            Void, (Ptr{Void}, Ptr{Void}, Ptr{Void}),
+            handle.object,
+            Ref{StarpuDataFilter}(filter_1),
+            Ref{StarpuDataFilter}(filter_2)
+    )
+
+end

+ 53 - 0
julia/src/Wrapper/Julia/starpu_define.jl

@@ -0,0 +1,53 @@
+
+
+
+
+STARPU_MAXIMPLEMENTATIONS = 1 # TODO : good value
+STARPU_NMAXBUFS = 8 # TODO : good value
+
+
+STARPU_CPU = 1 << 1
+STARPU_CUDA = 1 << 3
+
+
+macro starpufunc(symbol)
+    :($symbol, "libjlstarpu_c_wrapper")
+end
+
+macro starpucall(func, ret_type, arg_types, args...)
+    return Expr(:call, :ccall, (func, "libjlstarpu_c_wrapper"), esc(ret_type), esc(arg_types), map(esc, args)...)
+end
+
+
+export @debugprint
+macro debugprint(x...)
+
+    expr = Expr(:call, :println, "\x1b[32m", map(esc, x)..., "\x1b[0m")
+
+    quote
+        $expr
+        flush(STDOUT)
+    end
+end
+
+
+
+function Cstring_from_String(str :: String)
+    return Cstring(pointer(str))
+end
+
+
+
+function jlstarpu_set_to_zero(x :: T) :: Ptr{Void} where {T}
+    @starpucall(memset,
+          Ptr{Void}, (Ptr{Void}, Cint, Csize_t),
+          Ref{T}(x), 0, sizeof(x)
+        )
+end
+
+
+
+
+macro mutableview(t)
+    :(unsafe_wrap( Vector{eltype($t)}, Ptr{eltype($t)}(pointer_from_objref($t)), length($t)))
+end

+ 147 - 0
julia/src/Wrapper/Julia/starpu_destructible.jl

@@ -0,0 +1,147 @@
+
+
+
+
+mutable struct StarpuDestructible{T}
+
+    object :: T
+    destructors :: LinkedList{Function}
+
+end
+
+starpu_block_list = Vector{LinkedList{StarpuDestructible}}()
+
+
+
+function StarpuDestructible(obj :: T, destructors :: Function...) where T
+
+    if (isempty(starpu_block_list))
+        error("Creation of a StarpuDestructible object while not beeing in a @starpu_block")
+    end
+
+    l = LinkedList{Function}()
+
+    for destr in destructors
+        add_to_tail!(l, destr)
+    end
+
+    output = StarpuDestructible{T}(obj, l)
+    add_to_head!(starpu_block_list[end], output)
+
+    return output
+end
+
+
+
+
+function starpu_enter_new_block()
+
+    push!(starpu_block_list, LinkedList{StarpuDestructible}())
+end
+
+
+function starpu_destruct!(x :: StarpuDestructible)
+
+    for destr in x.destructors
+        destr(x.object)
+    end
+
+    empty!(x.destructors)
+
+    return nothing
+end
+
+
+function starpu_exit_block()
+
+    destr_list = pop!(starpu_block_list)
+
+    for x in destr_list
+        starpu_destruct!(x)
+    end
+
+end
+
+
+function starpu_add_destructor!(x :: StarpuDestructible, destrs :: Function...)
+
+    for d in destrs
+        add_to_head!(x.destructors, d)
+    end
+
+    return nothing
+end
+
+
+function starpu_remove_destructor!(x :: StarpuDestructible, destr :: Function)
+
+    @foreach_asc x.destructors lnk begin
+
+        if (lnk.data == destr)
+            remove_link!(lnk)
+            break
+        end
+    end
+
+    return nothing
+end
+
+function starpu_execute_destructor!(x :: StarpuDestructible, destr :: Function)
+
+    starpu_remove_destructor!(x, destr)
+    return destr(x.object)
+end
+
+
+export @starpu_block
+macro starpu_block(expr)
+    quote
+        starpu_enter_new_block()
+        $(esc(expr))
+        starpu_exit_block()
+    end
+end
+
+
+
+if false
+
+@starpu_block let
+    println("Begining of block")
+    x = StarpuDestructible(1, println)
+    println("End of block")
+end
+
+
+
+@starpu_block let
+    println("Begining of block")
+    x = StarpuDestructible(2, (x -> @show x), println)
+    println("End of block")
+end
+
+
+@starpu_block let
+    println("Begining of block")
+    x = StarpuDestructible(3, (x -> @show x), println)
+    starpu_add_destructor!(x, (x -> @show x+1))
+    println("End of block")
+end
+
+@starpu_block let
+    println("Begining of block")
+    x = StarpuDestructible(4, (x -> @show x), println)
+    starpu_add_destructor!(x, (x -> @show x+1))
+    starpu_remove_destructor!(x, println)
+    println("End of block")
+end
+
+@starpu_block let
+    println("Begining of block")
+    x = StarpuDestructible(4, (x -> @show x), println)
+    starpu_add_destructor!(x, (x -> @show x+1))
+    starpu_execute_destructor!(x, println)
+    println("End of block")
+end
+
+end

+ 21 - 0
julia/src/Wrapper/Julia/starpu_include.jl

@@ -0,0 +1,21 @@
+
+__precompile__()
+
+module StarPU
+
+
+    include("starpu_define.jl")
+    include("static_structures.jl")
+    include("starpu_simple_functions.jl")
+    include("starpu_perfmodel.jl")
+    include("starpu_codelet.jl")
+
+    include("linked_list.jl")
+    include("starpu_destructible.jl")
+    include("starpu_data_handle.jl")
+
+    include("starpu_task.jl")
+    include("starpu_task_submit.jl")
+    include("starpu_init_shutdown.jl")
+
+end

+ 25 - 0
julia/src/Wrapper/Julia/starpu_init_shutdown.jl

@@ -0,0 +1,25 @@
+
+export starpu_init
+function starpu_init(; extern_task_path = "")
+
+    if (!isempty(extern_task_path))
+        global starpu_tasks_library_handle = Libdl.dlopen(extern_task_path)
+    else
+        global starpu_tasks_library_handle = Libdl.dlopen("libjlstarpu_c_wrapper.so")
+    end
+
+    output = @starpucall jlstarpu_init Cint ()
+
+    starpu_enter_new_block()
+
+    return output
+end
+
+
+export starpu_shutdown
+function starpu_shutdown()
+    starpu_exit_block()
+    @starpucall starpu_shutdown Void ()
+    jlstarpu_free_allocated_structures()
+    return nothing
+end

+ 90 - 0
julia/src/Wrapper/Julia/starpu_perfmodel.jl

@@ -0,0 +1,90 @@
+
+export StarpuPerfmodelType
+export STARPU_PERFMODEL_INVALID, STARPU_PER_ARCH, STARPU_COMMON
+export STARPU_HISTORY_BASED, STARPU_REGRESSION_BASED
+export STARPU_NL_REGRESSION_BASED, STARPU_MULTIPLE_REGRESSION_BASED
+
+@enum(StarpuPerfmodelType,
+    STARPU_PERFMODEL_INVALID = 0,
+	STARPU_PER_ARCH = 1,
+	STARPU_COMMON = 2,
+	STARPU_HISTORY_BASED = 3,
+	STARPU_REGRESSION_BASED = 4,
+	STARPU_NL_REGRESSION_BASED = 5,
+	STARPU_MULTIPLE_REGRESSION_BASED = 6
+)
+
+
+mutable struct StarpuPerfmodel_c
+
+    perf_type :: StarpuPerfmodelType
+
+    cost_function :: Ptr{Void}
+    arch_cost_function :: Ptr{Void}
+
+    size_base :: Ptr{Void}
+    footprint :: Ptr{Void}
+
+    symbol :: Cstring
+
+    is_loaded :: Cuint
+    benchmarking :: Cuint
+    is_init :: Cuint
+
+    parameters :: Ptr{Void}
+    parameters_names :: Ptr{Void}
+    nparameters :: Cuint
+    combinations :: Ptr{Void}
+    ncombinations :: Cuint
+
+    state :: Ptr{Void}
+
+
+    function StarpuPerfmodel_c()
+
+        output = new()
+        jlstarpu_set_to_zero(output)
+
+        return output
+    end
+
+end
+
+
+
+export StarpuPerfmodel
+struct StarpuPerfmodel
+
+    perf_type :: StarpuPerfmodelType
+    symbol :: String
+
+    c_perfmodel :: Ptr{StarpuPerfmodel_c}
+end
+
+
+
+
+function StarpuPerfmodel(; perf_type = STARPU_PERFMODEL_INVALID, symbol = "")
+
+    if (perf_type == STARPU_PERFMODEL_INVALID)
+        return StarpuPerfmodel(perf_type, symbol, Ptr{StarpuPerfmodel_c}(C_NULL))
+    end
+
+    if (isempty(symbol))
+        error("Field \"symbol\" can't be empty when creating a StarpuPerfmodel")
+    end
+
+    c_perfmodel = StarpuPerfmodel_c()
+    c_perfmodel.perf_type = perf_type
+    c_perfmodel.symbol = Cstring_from_String(symbol)
+
+    c_perfmodel_ptr = jlstarpu_allocate_and_store(c_perfmodel)
+
+    return StarpuPerfmodel(perf_type, symbol, c_perfmodel_ptr)
+end
+
+
+function show_c_perfmodel(x :: StarpuPerfmodel)
+    x_c = unsafe_load(x.c_perfmodel)
+    println(x_c)
+end

+ 25 - 0
julia/src/Wrapper/Julia/starpu_simple_functions.jl

@@ -0,0 +1,25 @@
+
+
+macro starpu_noparam_function(func_name, ret_type)
+
+    func = Symbol(func_name)
+
+    quote
+        export $func
+        global $func() = ccall(($func_name, "libjlstarpu_c_wrapper"),
+                                $ret_type, ()) :: $ret_type
+    end
+end
+
+
+global starpu_tasks_library_handle = C_NULL
+
+
+
+@starpu_noparam_function "starpu_is_initialized" Cint
+
+
+
+@starpu_noparam_function "starpu_cublas_init" Void
+@starpu_noparam_function "starpu_cublas_set_stream" Void
+@starpu_noparam_function "starpu_cublas_shutdown" Void

+ 198 - 0
julia/src/Wrapper/Julia/starpu_task.jl

@@ -0,0 +1,198 @@
+
+export StarpuTask
+mutable struct StarpuTask
+
+    cl :: StarpuCodelet
+    handles :: Vector{StarpuDataHandle}
+    handle_pointers :: Vector{StarpuDataHandlePointer}
+    synchronous :: Bool
+    cl_arg :: Union{Ref, Void}
+
+    c_task :: Ptr{Void}
+
+    #=function StarpuTask()
+
+        output = new()
+        output.handles = StarpuDataHandle[]
+        output.handle_pointers = StarpuDataHandlePointer[]
+        output.synchronous = false
+        output.cl_arg = nothing
+
+
+        c_task = @starpucall starpu_task_create Ptr{Void} ()
+
+        if (c_task == C_NULL)
+            error("Couldn't create new task: starpu_task_create() returned NULL")
+    	end
+
+        output.c_task = c_task
+
+        return output
+    end=#
+
+
+    """
+        StarpuTask(; cl :: StarpuCodelet, handles :: Vector{StarpuDataHandle}, cl_arg :: Ref)
+
+        Creates a new task which will run the specified codelet on handle buffers and cl_args data
+    """
+    function StarpuTask(; cl :: Union{Void, StarpuCodelet} = nothing, handles :: Vector{StarpuDataHandle} = StarpuDataHandle[], cl_arg :: Union{Ref, Void} = nothing)
+
+        if (cl == nothing)
+            error("\"cl\" field can't be empty when creating a StarpuTask")
+        end
+
+        output = new()
+
+        output.cl = cl
+        output.handles = handles
+        output.cl_arg = cl_arg
+
+        output.synchronous = false
+        output.handle_pointers = StarpuDataHandlePointer[]
+
+        c_task = @starpucall starpu_task_create Ptr{Void} ()
+
+        if (c_task == C_NULL)
+            error("Couldn't create new task: starpu_task_create() returned NULL")
+        end
+
+        output.c_task = c_task
+
+        starpu_c_task_update(output)
+
+        return output
+    end
+
+end
+
+
+function starpu_c_task_update(task :: StarpuTask)
+
+    task_translator = StarpuTaskTranslator(task)
+
+    @starpucall(jlstarpu_task_update,
+                Void, (Ptr{Void}, Ptr{Void}),
+                Ref{StarpuTaskTranslator}(task_translator),
+                task.c_task
+            )
+end
+
+
+mutable struct StarpuTaskTranslator
+
+    cl :: Ptr{Void}
+    handles :: Ptr{Void}
+    synchronous :: Cuint
+
+    cl_arg :: Ptr{Void}
+    cl_arg_size :: Csize_t
+
+    function StarpuTaskTranslator(task :: StarpuTask)
+
+        output = new()
+
+        output.cl = task.cl.c_codelet
+
+        task.handle_pointers = map((x -> x.object), task.handles)
+        output.handles = pointer(task.handle_pointers)
+        output.synchronous = Cuint(task.synchronous)
+
+        if (task.cl_arg == nothing)
+            output.cl_arg = C_NULL
+            output.cl_arg_size = 0
+        else
+            output.cl_arg = pointer_from_objref(task.cl_arg) #TODO : Libc.malloc and cl_arg_free set to 1 ? but it should be done only when submitting
+            output.cl_arg_size = sizeof(eltype(task.cl_arg))
+        end
+
+        return output
+    end
+
+end
+
+
+export StarpuTag
+const StarpuTag = UInt64
+
+
+export starpu_tag_declare_deps
+function starpu_tag_declare_deps(id :: StarpuTag, dep :: StarpuTag, other_deps :: StarpuTag...)
+
+    v = [dep, other_deps...]
+
+    @starpucall(starpu_tag_declare_deps_array,
+                Void, (StarpuTag, Cuint, Ptr{StarpuTag}),
+                id, length(v), pointer(v)
+        )
+end
+
+
+export starpu_iteration_push
+function starpu_iteration_push(iteration)
+
+    @starpucall(starpu_iteration_push,
+                Void, (Culong,), iteration
+        )
+end
+
+
+export starpu_iteration_pop
+function starpu_iteration_pop()
+    @starpucall starpu_iteration_pop Void ()
+end
+
+
+export starpu_tag_wait
+function starpu_tag_wait(id :: StarpuTag)
+    @starpucall starpu_tag_wait Cint (StarpuTag,) id
+end
+
+
+function starpu_tag_wait(ids :: Vector{StarpuTag})
+
+    @starpucall(starpustarpu_tag_wait_array,
+                Cint, (Cuint, Ptr{StarpuTag}),
+                length(ids), pointer(ids)
+        )
+end
+
+
+export starpu_task_destroy
+function starpu_task_destroy(task :: StarpuTask)
+    @starpucall starpu_task_destroy Void (Ptr{Void},) task.c_task
+end
+
+
+export starpu_task_wait_for_n_submitted
+
+"""
+    Block until there are n submitted tasks left (to the current context or the global one if there is no current context) to
+    be executed. It does not destroy these tasks.
+"""
+function starpu_task_wait_for_n_submitted(n)
+    @starpucall starpu_task_wait_for_n_submitted Cint (Cuint,) n
+end
+
+
+export starpu_task_declare_deps
+
+"""
+    starpu_task_declare_deps(task :: StarpuTask, dep :: StarpuTask [, other_deps :: StarpuTask...])
+
+    Declare task dependencies between a task and the following provided ones. This function must be called
+    prior to the submission of the task, but it may called after the submission or the execution of the tasks in the array,
+    provided the tasks are still valid (i.e. they were not automatically destroyed). Calling this function on a task that was
+    already submitted or with an entry of task_array that is no longer a valid task results in an undefined behaviour.
+"""
+function starpu_task_declare_deps(task :: StarpuTask, dep :: StarpuTask, other_deps :: StarpuTask...)
+
+    task_array = [dep.c_task, map((t -> t.c_task), other_deps)...]
+
+    @starpucall(starpu_task_declare_deps_array,
+                Void, (Ptr{Void}, Cuint, Ptr{Void}),
+                task.c_task,
+                length(task_array),
+                pointer(task_array)
+            )
+end

+ 48 - 0
julia/src/Wrapper/Julia/starpu_task_submit.jl

@@ -0,0 +1,48 @@
+
+
+
+export starpu_task_submit
+function starpu_task_submit(task :: StarpuTask)
+
+    if (length(task.handles) != length(task.cl.modes))
+        error("Invalid number of handles for task : $(length(task.handles)) where given while codelet has $(output.cl.nbuffers) modes")
+    end
+
+    starpu_c_task_update(task)
+
+    @starpucall starpu_task_submit Cint (Ptr{Void},) task.c_task
+
+end
+
+
+export @starpu_async_cl
+macro starpu_async_cl(expr)
+
+    if (!isa(expr, Expr) || expr.head != :call)
+        error("Invalid task submit syntax")
+    end
+
+    cl = expr.args[1]
+    handles = Expr(:vect, expr.args[2:end]...)
+
+    quote
+        task = StarpuTask(cl = $(esc(cl)), handles = $(esc(handles)))
+        starpu_task_submit(task)
+    end
+end
+
+
+export starpu_task_wait_for_all
+function starpu_task_wait_for_all()
+    @threadcall(@starpufunc(:starpu_task_wait_for_all),
+                          Cint, ())
+end
+
+
+export @starpu_sync_tasks
+macro starpu_sync_tasks(expr)
+    quote
+        $(esc(expr))
+        starpu_task_wait_for_all()
+    end
+end

+ 38 - 0
julia/src/Wrapper/Julia/starpu_worker.jl

@@ -0,0 +1,38 @@
+
+
+
+@enum(StarpuWorkerArchtype,
+
+	STARPU_CPU_WORKER,
+	STARPU_CUDA_WORKER,
+	STARPU_OPENCL_WORKER,
+	STARPU_MIC_WORKER,
+	STARPU_SCC_WORKER,
+	STARPU_MPI_MS_WORKER,
+	STARPU_ANY_WORKER
+)
+
+
+function starpu_worker_get_count_by_type(arch_type :: StarpuWorkerArchtype)
+    @starpucall(starpu_worker_get_count_by_type,
+            Cint, (StarpuWorkerArchtype,), arch_type
+        )
+end
+
+
+#= TODO : NOT C_NULL but stdout FILE *
+function starpu_worker_display_names(arch_type :: StarpuWorkerArchtype)
+	@starpucall(starpu_worker_display_names,
+            Void, (Ptr{Void}, StarpuWorkerArchtype),
+			C_NULL, arch_type
+        )
+end
+=#
+
+
+
+
+@starpu_noparam_function "starpu_worker_get_id" Cint
+
+@starpu_noparam_function "starpu_cpu_worker_get_count" Cuint
+@starpu_noparam_function "starpu_cuda_worker_get_count" Cuint

+ 28 - 0
julia/src/Wrapper/Julia/static_structures.jl

@@ -0,0 +1,28 @@
+
+
+
+const jlstarpu_allocated_structures = Vector{Ptr{Void}}([])
+
+
+
+function jlstarpu_allocate_and_store(x_c :: T) where {T}
+
+    allocated_ptr = Ptr{T}(Libc.malloc(sizeof(T)))
+
+    if (allocated_ptr == C_NULL)
+        error("Base.Libc.malloc returned NULL")
+    end
+
+    unsafe_store!(allocated_ptr, x_c)
+    push!(jlstarpu_allocated_structures, Ptr{Void}(allocated_ptr))
+
+    return allocated_ptr
+end
+
+
+
+function jlstarpu_free_allocated_structures()
+    map(Libc.free, jlstarpu_allocated_structures)
+    empty!(jlstarpu_allocated_structures)
+    return nothing
+end

+ 29 - 0
julia/tst/cpu_cuda_mult.jl

@@ -0,0 +1,29 @@
+
+include("../src/Compiler/include.jl")
+
+starpu_new_cpu_kernel_file("build/generated_cpu_mult.c")
+starpu_new_cuda_kernel_file("build/generated_cuda_mult.cu")
+
+@cpu_cuda_kernel function matrix_mult(m1 :: Matrix{Float32}, m2 :: Matrix{Float32}, m3 :: Matrix{Float32}) :: Void
+
+    width_m2 :: Int64 = width(m2)
+    height_m1 :: Int64 = height(m1)
+    width_m1 :: Int64 = width(m1)
+
+    @indep for j in (1 : width_m2)
+        @indep for i in (1 : height_m1)
+
+            sum :: Float32 = 0.
+
+            for k in (1 : width_m1)
+                sum = sum + m1[i, k] * m2[k, j]
+            end
+
+            m3[i, j] = sum
+        end
+    end
+end
+
+compile_cpu_kernels("build/generated_cpu_mult.so")
+compile_cuda_kernels("build/generated_cuda_mult.so")
+combine_kernel_files("build/generated_tasks.so", ["build/generated_cpu_mult.so", "build/generated_cuda_mult.so"])

+ 54 - 0
julia/tst/cpu_mult.c

@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <starpu.h>
+
+/*
+ * The codelet is passed 3 matrices, the "descr" union-type field gives a
+ * description of the layout of those 3 matrices in the local memory (ie. RAM
+ * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have
+ * registered data with the "matrix" data interface, we use the matrix macros.
+ */
+
+void cpu_mult(void *descr[], void *arg)
+{
+	(void)arg;
+	float *subA, *subB, *subC;
+	uint32_t nxC, nyC, nyA;
+	uint32_t ldA, ldB, ldC;
+
+	/* .blas.ptr gives a pointer to the first element of the local copy */
+	subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]);
+	subB = (float *)STARPU_MATRIX_GET_PTR(descr[1]);
+	subC = (float *)STARPU_MATRIX_GET_PTR(descr[2]);
+
+
+	/* .blas.nx is the number of rows (consecutive elements) and .blas.ny
+	 * is the number of lines that are separated by .blas.ld elements (ld
+	 * stands for leading dimension).
+	 * NB: in case some filters were used, the leading dimension is not
+	 * guaranteed to be the same in main memory (on the original matrix)
+	 * and on the accelerator! */
+	nxC = STARPU_MATRIX_GET_NX(descr[2]);
+	nyC = STARPU_MATRIX_GET_NY(descr[2]);
+	nyA = STARPU_MATRIX_GET_NY(descr[0]);
+
+	ldA = STARPU_MATRIX_GET_LD(descr[0]);
+	ldB = STARPU_MATRIX_GET_LD(descr[1]);
+	ldC = STARPU_MATRIX_GET_LD(descr[2]);
+
+	/* we assume a FORTRAN-ordering! */
+	unsigned i,j,k;
+	for (i = 0; i < nyC; i++)
+	{
+		for (j = 0; j < nxC; j++)
+		{
+			float sum = 0.0;
+
+			for (k = 0; k < nyA; k++)
+			{
+				sum += subA[j+k*ldA]*subB[k+i*ldB];
+			}
+
+			subC[j + i*ldC] = sum;
+		}
+	}
+}

+ 68 - 0
julia/tst/gpu_mult.cu

@@ -0,0 +1,68 @@
+#include <starpu.h>
+#include <stdint.h>
+#include <stdio.h>
+
+
+
+
+__global__ void gpuMultKernel
+(
+		uint32_t nxC, uint32_t nyC, uint32_t nyA,
+		uint32_t ldA, uint32_t ldB, uint32_t ldC,
+		float * subA, float * subB, float * subC
+)
+{
+	uint32_t id, i, j, k;
+	float sum;
+
+	id = blockIdx.x * blockDim.x + threadIdx.x;
+	i = id % nxC;
+	j = id / nxC;
+
+	if (j >= nyC){
+		return;
+	}
+
+	sum = 0.;
+
+	for (k = 0 ; k < nyA ; k++){
+		sum += subA[i + k * ldA] * subB[k + j * ldB];
+	}
+
+	subC[i + j * ldC] = sum;
+
+}
+
+
+
+#define THREADS_PER_BLOCK 64
+
+extern "C" void gpu_mult(void * descr[], void * args)
+{
+
+	float * d_subA, * d_subB, * d_subC;
+	uint32_t nxC, nyC, nyA;
+	uint32_t ldA, ldB, ldC;
+	uint32_t nblocks;
+
+	d_subA = (float *) STARPU_MATRIX_GET_PTR(descr[0]);
+	d_subB = (float *) STARPU_MATRIX_GET_PTR(descr[1]);
+	d_subC = (float *) STARPU_MATRIX_GET_PTR(descr[2]);
+
+	nxC = STARPU_MATRIX_GET_NX(descr[2]);
+	nyC = STARPU_MATRIX_GET_NY(descr[2]);
+	nyA = STARPU_MATRIX_GET_NY(descr[0]);
+
+	ldA = STARPU_MATRIX_GET_LD(descr[0]);
+	ldB = STARPU_MATRIX_GET_LD(descr[1]);
+	ldC = STARPU_MATRIX_GET_LD(descr[2]);
+
+	nblocks = (nxC * nyC + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK;
+
+	gpuMultKernel
+		<<< nblocks, THREADS_PER_BLOCK, 0, starpu_cuda_get_local_stream()
+		>>> (nxC, nyC, nyA, ldA, ldB, ldC, d_subA, d_subB, d_subC);
+
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+
+}

+ 278 - 0
julia/tst/mult.c

@@ -0,0 +1,278 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012-2013                                Inria
+ * Copyright (C) 2009-2011,2013-2015                      Université de Bordeaux
+ * Copyright (C) 2010                                     Mehdi Juhoor
+ * Copyright (C) 2010-2013,2015,2017                      CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This example shows a simple implementation of a blocked matrix
+ * multiplication. Note that this is NOT intended to be an efficient
+ * implementation of sgemm! In this example, we show:
+ *  - how to declare dense matrices (starpu_matrix_data_register)
+ *  - how to manipulate matrices within codelets (eg. descr[0].blas.ld)
+ *  - how to use filters to partition the matrices into blocks
+ *    (starpu_data_partition and starpu_data_map_filters)
+ *  - how to unpartition data (starpu_data_unpartition) and how to stop
+ *    monitoring data (starpu_data_unregister)
+ *  - how to manipulate subsets of data (starpu_data_get_sub_data)
+ *  - how to construct an autocalibrated performance model (starpu_perfmodel)
+ *  - how to submit asynchronous tasks
+ */
+
+#include <string.h>
+#include <math.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include <starpu.h>
+
+
+
+/*
+ * That program should compute C = A * B
+ *
+ *   A of size (z,y)
+ *   B of size (x,z)
+ *   C of size (x,y)
+
+              |---------------|
+            z |       B       |
+              |---------------|
+       z              x
+     |----|   |---------------|
+     |    |   |               |
+     |    |   |               |
+     | A  | y |       C       |
+     |    |   |               |
+     |    |   |               |
+     |----|   |---------------|
+
+ */
+
+
+
+
+
+void gpu_mult(void **, void *);
+void cpu_mult(void **, void *);
+
+
+
+
+
+static struct starpu_perfmodel model =
+{
+		.type = STARPU_HISTORY_BASED,
+		.symbol = "history_perf"
+};
+
+static struct starpu_codelet cl =
+{
+		.cpu_funcs = {cpu_mult},
+		.cpu_funcs_name = {"cpu_mult"},
+		.cuda_funcs = {gpu_mult},
+		.nbuffers = 3,
+		.modes = {STARPU_R, STARPU_R, STARPU_W},
+		.model = &model
+};
+
+
+
+
+
+
+void multiply_with_starpu(float *A, float *B, float *C,  unsigned xdim,  unsigned ydim,  unsigned zdim, unsigned nslicesx, unsigned nslicesy)
+{
+	starpu_data_handle_t A_handle, B_handle, C_handle;
+
+
+	starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A,
+			ydim, ydim, zdim, sizeof(float));
+	starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B,
+			zdim, zdim, xdim, sizeof(float));
+	starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C,
+			ydim, ydim, xdim, sizeof(float));
+
+
+	struct starpu_data_filter vert =
+	{
+			.filter_func = starpu_matrix_filter_vertical_block,
+			.nchildren = nslicesx
+	};
+
+	struct starpu_data_filter horiz =
+	{
+			.filter_func = starpu_matrix_filter_block,
+			.nchildren = nslicesy
+	};
+
+
+	starpu_data_partition(B_handle, &vert);
+	starpu_data_partition(A_handle, &horiz);
+	starpu_data_map_filters(C_handle, 2, &vert, &horiz);
+
+	unsigned taskx, tasky;
+
+	for (taskx = 0; taskx < nslicesx; taskx++){
+		for (tasky = 0; tasky < nslicesy; tasky++){
+
+			struct starpu_task *task = starpu_task_create();
+
+			task->cl = &cl;
+			task->handles[0] = starpu_data_get_sub_data(A_handle, 1, tasky);
+			task->handles[1] = starpu_data_get_sub_data(B_handle, 1, taskx);
+			task->handles[2] = starpu_data_get_sub_data(C_handle, 2, taskx, tasky);
+
+			starpu_task_submit(task);
+
+		}
+	}
+
+	starpu_task_wait_for_all();
+
+
+	starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(B_handle, STARPU_MAIN_RAM);
+	starpu_data_unpartition(C_handle, STARPU_MAIN_RAM);
+
+	starpu_data_unregister(A_handle);
+	starpu_data_unregister(B_handle);
+	starpu_data_unregister(C_handle);
+
+}
+
+
+
+void init_rand(float * m, unsigned width, unsigned height)
+{
+	unsigned i,j;
+
+	for (j = 0 ; j < height ; j++){
+		for (i = 0 ; i < width ; i++){
+			m[j+i*height] = (float)(starpu_drand48());
+		}
+	}
+}
+
+
+void init_zero(float * m, unsigned width, unsigned height)
+{
+	memset(m, 0, sizeof(float) * width * height);
+}
+
+
+
+void sort(unsigned int size, double t[])
+{
+	unsigned int j;
+
+	int is_sort = 0;
+
+	while(!is_sort){
+
+		is_sort = 1;
+
+		for (j = 0 ; j < size - 1 ; j++){
+
+			if (t[j] > t[j+1]){
+				double tmp = t[j];
+				t[j] = t[j+1];
+				t[j+1] = tmp;
+				is_sort = 0;
+			}
+		}
+	}
+
+
+}
+
+
+double median_time(unsigned nb_test, unsigned xdim, unsigned ydim, unsigned zdim, unsigned nsclicesx, unsigned nsclicesy)
+{
+	unsigned i;
+
+	float * A = (float *) malloc(zdim*ydim*sizeof(float));
+	float * B = (float *) malloc(xdim*zdim*sizeof(float));
+	float * C = (float *) malloc(xdim*ydim*sizeof(float));
+
+	double exec_times[nb_test];
+
+	for (i = 0 ; i < nb_test ; i++){
+
+		double start, stop, exec_t;
+
+		init_rand(A, zdim, ydim);
+		init_rand(B, xdim, zdim);
+		init_zero(C, xdim, ydim);
+
+		start = starpu_timing_now();
+		multiply_with_starpu(A, B, C, xdim, ydim, zdim, nsclicesx, nsclicesy);
+		stop = starpu_timing_now();
+
+		exec_t = (stop - start)/1.e6;
+		exec_times[i] = exec_t;
+	}
+
+	sort(nb_test, exec_times);
+
+	free(A);
+	free(B);
+	free(C);
+
+	return exec_times[nb_test/2];
+}
+
+
+void display_times(unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nb_tests, unsigned nsclicesx, unsigned nsclicesy)
+{
+	unsigned dim;
+
+	for (dim = start_dim ; dim <= stop_dim ; dim += step_dim){
+		double t = median_time(nb_tests, dim, dim, dim, nsclicesx, nsclicesy);
+		printf("%u ; %f\n", dim, t);
+	}
+
+}
+
+
+int main(int argc, char * argv[])
+{
+
+	if (argc != 7){
+		printf("Usage : %s start_dim step_dim stop_dim nb_tests nsclicesx nsclicesy\n", argv[0]);
+		return 1;
+	}
+
+
+	if (starpu_init(NULL) != EXIT_SUCCESS){
+		fprintf(stderr, "ERROR\n");
+		return 77;
+	}
+
+	unsigned start_dim = (unsigned) atoi(argv[1]);
+	unsigned step_dim = (unsigned) atoi(argv[2]);
+	unsigned stop_dim = (unsigned) atoi(argv[3]);
+	unsigned nb_tests = (unsigned) atoi(argv[4]);
+	unsigned nsclicesx = (unsigned) atoi(argv[5]);
+	unsigned nsclicesy = (unsigned) atoi(argv[6]);
+
+	display_times(start_dim, step_dim, stop_dim, nb_tests, nsclicesx, nsclicesy);
+
+	starpu_shutdown();
+
+	return 0;
+}
+

+ 107 - 0
julia/tst/mult_def.jl

@@ -0,0 +1,107 @@
+
+
+#   A of size (y,z)
+#   B of size (z,x)
+#   C of size (y,x)
+
+
+#              |---------------|
+#            z |       B       |
+#              |---------------|
+#       z              x
+#     |----|   |---------------|
+#     |    |   |               |
+#     |    |   |               |
+#     | A  | y |       C       |
+#     |    |   |               |
+#     |    |   |               |
+#     |----|   |---------------|
+#
+
+
+
+
+
+function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy)
+
+    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslicesx)
+    horiz = StarpuDataFilter(STARPU_MATRIX_FILTER_BLOCK, nslicesy)
+
+    @starpu_block let
+
+        hA,hB,hC = starpu_data_register(A, B, C)
+
+        starpu_data_partition(hB, vert)
+        starpu_data_partition(hA, horiz)
+        starpu_data_map_filters(hC, vert, horiz)
+
+        @starpu_sync_tasks for taskx in (1 : nslicesx)
+            for tasky in (1 : nslicesy)
+                @starpu_async_cl cl(hA[tasky], hB[taskx], hC[taskx, tasky])
+            end
+        end
+    end
+
+    return nothing
+end
+
+
+
+
+function approximately_equals(
+    A :: Matrix{Cfloat},
+    B :: Matrix{Cfloat},
+    eps = 1e-2
+)
+    (height, width) = size(A)
+
+    for j in (1 : width)
+        for i in (1 : height)
+            if (abs(A[i,j] - B[i,j]) > eps * max(abs(B[i,j]), abs(A[i,j])))
+                println("A[$i,$j] : $(A[i,j]), B[$i,$j] : $(B[i,j])")
+                return false
+            end
+        end
+    end
+
+    return true
+end
+
+
+
+function median_time(nb_tests, xdim, zdim, ydim, nslicesx, nslicesy)
+
+    exec_times = Float64[]
+
+    for i in (1 : nb_tests)
+
+        A = Array(rand(Cfloat, ydim, zdim))
+        B = Array(rand(Cfloat, zdim, xdim))
+        C = zeros(Float32, ydim, xdim)
+        D  = A * B
+
+        tic()
+        multiply_with_starpu(A, B, C, nslicesx, nslicesy)
+        t = toq()
+
+        if (!approximately_equals(D, C))
+            error("Invalid result")
+        end
+
+        push!(exec_times, t)
+    end
+
+    sort!(exec_times)
+
+    return exec_times[div(nb_tests, 2)]
+end
+
+
+
+function display_times(start_dim, step_dim, stop_dim, nb_tests, nslicesx, nslicesy)
+
+    for dim in (start_dim : step_dim : stop_dim)
+        mt = median_time(nb_tests, dim, dim, dim, nslicesx, nslicesy)
+        println("$dim ; $mt")
+    end
+end

+ 30 - 0
julia/tst/mult_extern.jl

@@ -0,0 +1,30 @@
+
+if length(ARGS) != 6
+    println("Usage : julia prog.jl start_dim step_dim stop_dim nb_tests nslicesx nslicesy")
+    quit()
+end
+
+include("../src/Wrapper/Julia/starpu_include.jl")
+using StarPU
+
+@debugprint "starpu_init"
+starpu_init(extern_task_path = "build/extern_tasks.so")
+
+perfmodel = StarpuPerfmodel(
+    perf_type = STARPU_HISTORY_BASED,
+    symbol = "history_perf"
+)
+
+cl = StarpuCodelet(
+    cpu_func = "cpu_mult",
+    gpu_func = "gpu_mult",
+    modes = [STARPU_R, STARPU_R, STARPU_W],
+    perfmodel = perfmodel
+)
+
+include("mult_def.jl")
+
+display_times(map((x -> parse(Int64,x)), ARGS)...)
+
+@debugprint "starpu_shutdown"
+starpu_shutdown()

+ 34 - 0
julia/tst/mult_generated.jl

@@ -0,0 +1,34 @@
+
+if length(ARGS) != 6
+    println("Usage : julia prog.jl start_dim step_dim stop_dim nb_tests nslicesx nslicesy")
+    quit()
+end
+
+
+include("../src/Wrapper/Julia/starpu_include.jl")
+using StarPU
+
+
+
+
+@debugprint "starpu_init"
+starpu_init(extern_task_path = "build/generated_tasks.so")
+
+perfmodel = StarpuPerfmodel(
+    perf_type = STARPU_HISTORY_BASED,
+    symbol = "history_perf"
+)
+
+cl = StarpuCodelet(
+    cpu_func = "matrix_mult",
+    gpu_func = "CUDA_matrix_mult",
+    modes = [STARPU_R, STARPU_R, STARPU_W],
+    perfmodel = perfmodel
+)
+
+include("mult_def.jl")
+
+display_times(map( (x -> parse(Int64,x)) , ARGS)...)
+
+@debugprint "starpu_shutdown"
+starpu_shutdown()

+ 9 - 4
mpi/src/mpi/starpu_mpi_tag.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2017                                CNRS
- * Copyright (C) 2011-2015,2017                           Université de Bordeaux
+ * Copyright (C) 2011-2015,2017-2018                           Université de Bordeaux
  * Copyright (C) 2014                                     Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -62,7 +62,7 @@ starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(starpu_mpi_tag_t d
 	struct handle_tag_entry *ret;
 
 	_starpu_spin_lock(&registered_tag_handles_lock);
-	HASH_FIND_INT(registered_tag_handles, &data_tag, ret);
+	HASH_FIND(hh, registered_tag_handles, &data_tag, sizeof(ret->data_tag), ret);
 	_starpu_spin_unlock(&registered_tag_handles_lock);
 
 	if (ret)
@@ -95,7 +95,12 @@ void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, starpu_mpi_tag_t
 	entry->data_tag = data_tag;
 
 	_starpu_spin_lock(&registered_tag_handles_lock);
-	HASH_ADD_INT(registered_tag_handles, data_tag, entry);
+#ifndef STARPU_NO_ASSERT
+	struct handle_tag_entry *old;
+	HASH_FIND(hh, registered_tag_handles, &data_tag, sizeof(entry->data_tag), old);
+	STARPU_ASSERT_MSG(!old, "tag %"PRIi64" being registered for data %p, but is already used by data %p!\n", data_tag, handle, old->handle);
+#endif
+	HASH_ADD(hh, registered_tag_handles, data_tag, sizeof(entry->data_tag), entry);
 	_starpu_spin_unlock(&registered_tag_handles_lock);
 }
 
@@ -110,7 +115,7 @@ int _starpu_mpi_tag_data_release(starpu_data_handle_t handle)
 		struct handle_tag_entry *tag_entry;
 
 		_starpu_spin_lock(&registered_tag_handles_lock);
-		HASH_FIND_INT(registered_tag_handles, &(((struct _starpu_mpi_data *)(handle->mpi_data))->node_tag.data_tag), tag_entry);
+		HASH_FIND(hh, registered_tag_handles, &(((struct _starpu_mpi_data *)(handle->mpi_data))->node_tag.data_tag), sizeof(tag_entry->data_tag), tag_entry);
 		STARPU_ASSERT_MSG((tag_entry != NULL),"Data handle %p with tag %"PRIi64"d isn't in the hashmap !", handle, data_tag);
 
 		HASH_DEL(registered_tag_handles, tag_entry);

+ 2 - 1
src/core/combined_workers.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2013,2015,2017                           Inria
  * Copyright (C) 2013                                     Simon Archipoff
- * Copyright (C) 2010-2015                                Université de Bordeaux
+ * Copyright (C) 2010-2015, 2018                                Université de Bordeaux
  * Copyright (C) 2010-2011,2013-2017                      CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  *
@@ -88,6 +88,7 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 	 * safe because this method should only be called when the scheduler
 	 * is being initialized. */
 	new_workerid = basic_worker_count + combined_worker_id;
+	STARPU_ASSERT_MSG(new_workerid < STARPU_NMAXWORKERS, "Too many combined workers for parallel task execution. Please use configure option --enable-maxcpus to increase it beyond the current value %d", STARPU_MAXCPUS);
 	config->topology.ncombinedworkers++;
 
 //	fprintf(stderr, "COMBINED WORKERS ");

+ 1 - 0
src/core/dependencies/data_concurrency.c

@@ -280,6 +280,7 @@ static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned st
 				continue;
 		}
 
+                STARPU_ASSERT(j->task->status == STARPU_TASK_BLOCKED || j->task->status == STARPU_TASK_BLOCKED_ON_TAG || j->task->status == STARPU_TASK_BLOCKED_ON_TASK || j->task->status == STARPU_TASK_BLOCKED_ON_DATA);
                 j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
 
 		if(handle->arbiter)

+ 3 - 0
src/core/jobs.c

@@ -300,6 +300,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 #endif
 
 	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
+	STARPU_ASSERT(task->status == STARPU_TASK_RUNNING);
 #ifdef STARPU_OPENMP
 	if (continuation)
 	{
@@ -514,6 +515,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 #endif
 			{
 				/* We reuse the same job structure */
+				task->status = STARPU_TASK_BLOCKED;
 				int ret = _starpu_submit_job(j);
 				STARPU_ASSERT(!ret);
 			}
@@ -593,6 +595,7 @@ static unsigned _starpu_not_all_task_deps_are_fulfilled(struct _starpu_job *j)
 
 	if (!j->submitted || (job_successors->ndeps != job_successors->ndeps_completed))
 	{
+		STARPU_ASSERT(j->task->status == STARPU_TASK_BLOCKED || j->task->status == STARPU_TASK_BLOCKED_ON_TAG);
                 j->task->status = STARPU_TASK_BLOCKED_ON_TASK;
 		ret = 1;
 	}

+ 2 - 1
src/core/perfmodel/perfmodel_bus.c

@@ -740,6 +740,7 @@ static void benchmark_all_gpu_devices(void)
 
 #ifdef STARPU_HAVE_HWLOC
 	hwloc_topology_init(&hwtopology);
+	_starpu_topology_filter(hwtopology);
 	hwloc_topology_load(hwtopology);
 #endif
 
@@ -2762,7 +2763,7 @@ static void write_bus_platform_file_content(int version)
 	}
 #endif
 
-#if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX && defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
+#if defined(HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX && defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
 	/* If we have enough hwloc information, write PCI bandwidths and routes */
 	if (!starpu_get_env_number_default("STARPU_PCI_FLAT", 0))
 	{

+ 4 - 0
src/core/sched_ctx.c

@@ -2258,6 +2258,10 @@ void starpu_sched_ctx_revert_task_counters(unsigned sched_ctx_id, double ready_f
 
 void starpu_sched_ctx_move_task_to_ctx_locked(struct starpu_task *task, unsigned sched_ctx, unsigned with_repush)
 {
+	/* Restore state just like out of dependency layers */
+	STARPU_ASSERT(task->status == STARPU_TASK_READY);
+	task->status = STARPU_TASK_BLOCKED;
+
 	/* TODO: make something cleaner which differentiates between calls
 	   from push or pop (have mutex or not) and from another worker or not */
 	task->sched_ctx = sched_ctx;

+ 2 - 0
src/core/sched_policy.c

@@ -422,6 +422,7 @@ int _starpu_repush_task(struct _starpu_job *j)
 	_STARPU_LOG_IN();
 
 	unsigned can_push = _starpu_increment_nready_tasks_of_sched_ctx(task->sched_ctx, task->flops, task);
+	STARPU_ASSERT(task->status == STARPU_TASK_BLOCKED || task->status == STARPU_TASK_BLOCKED_ON_TAG || task->status == STARPU_TASK_BLOCKED_ON_TASK || task->status == STARPU_TASK_BLOCKED_ON_DATA);
 	task->status = STARPU_TASK_READY;
 	STARPU_AYU_ADDTOTASKQUEUE(j->job_id, -1);
 	/* if the context does not have any workers save the tasks in a temp list */
@@ -457,6 +458,7 @@ int _starpu_repush_task(struct _starpu_job *j)
 	 * corresponding dependencies */
 	if (task->cl == NULL || task->where == STARPU_NOWHERE)
 	{
+		task->status = STARPU_TASK_RUNNING;
 		if (task->prologue_callback_pop_func)
 			task->prologue_callback_pop_func(task->prologue_callback_pop_arg);
 

+ 7 - 0
src/core/task.c

@@ -540,6 +540,11 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 	unsigned is_sync = task->synchronous;
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
 
+	if (task->status == STARPU_TASK_STOPPED || task->status == STARPU_TASK_FINISHED)
+		task->status = STARPU_TASK_INVALID;
+	else
+		STARPU_ASSERT(task->status == STARPU_TASK_INVALID);
+
 	if (j->internal)
 	{
 		// Internal tasks are submitted to initial context
@@ -782,6 +787,7 @@ int _starpu_task_submit_nodeps(struct starpu_task *task)
 	if (task->cl)
 		/* This would be done by data dependencies checking */
 		_starpu_job_set_ordered_buffers(j);
+	STARPU_ASSERT(task->status == STARPU_TASK_BLOCKED);
 	task->status = STARPU_TASK_READY;
 	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
 
@@ -822,6 +828,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task);
 	_starpu_job_set_ordered_buffers(j);
 
+	STARPU_ASSERT(task->status == STARPU_TASK_INVALID);
 	task->status = STARPU_TASK_READY;
 	_starpu_profiling_set_task_push_start_time(task);
 

+ 9 - 0
src/core/topology.c

@@ -1191,6 +1191,15 @@ void _starpu_topology_filter(hwloc_topology_t topology)
 #else
 	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
 #endif
+#ifdef HAVE_HWLOC_TOPOLOGY_SET_COMPONENTS
+#  ifndef STARPU_USE_CUDA
+	hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "cuda");
+	hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "nvml");
+#  endif
+#  ifndef STARPU_USE_OPENCL
+	hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "opencl");
+#  endif
+#endif
 }
 #endif
 

+ 1 - 0
src/core/workers.c

@@ -2213,6 +2213,7 @@ int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum star
 
 void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond)
 {
+	STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS);
 	*sched_cond = &_starpu_config.workers[workerid].sched_cond;
 	*sched_mutex = &_starpu_config.workers[workerid].sched_mutex;
 }

+ 3 - 1
src/drivers/driver_common/driver_common.c

@@ -53,10 +53,12 @@ void _starpu_driver_start_job(struct _starpu_worker *worker, struct _starpu_job
 		_starpu_sched_pre_exec_hook(task);
 
 	_starpu_set_worker_status(worker, STATUS_EXECUTING);
-	task->status = STARPU_TASK_RUNNING;
 
 	if (rank == 0)
 	{
+		STARPU_ASSERT(task->status == STARPU_TASK_READY);
+		task->status = STARPU_TASK_RUNNING;
+
 		STARPU_AYU_RUNTASK(j->job_id);
 		cl->per_worker_stats[workerid]++;