Просмотр исходного кода

julia: Implement automatic translation of starpu headers.

Pierre Huchant лет назад: 5
Родитель
Сommit
d46257dbf7

+ 0 - 8
julia/Makefile

@@ -1,8 +0,0 @@
-SRCS=src/jlstarpu_task_submit.c src/jlstarpu_simple_functions.c src/jlstarpu_data_handles.c
-CC = gcc
-CFLAGS += $(shell pkg-config --cflags starpu-1.3)
-LDFLAGS += $(shell pkg-config --libs starpu-1.3)
-
-lib/libjlstarpu_c_wrapper.so: ${SRCS}
-	test -d lib || mkdir lib
-	$(CC) -O3 -shared -fPIC $(CFLAGS) $^ -o $@ $(LDFLAGS)

+ 100 - 0
julia/Manifest.toml

@@ -1,4 +1,104 @@
 # This file is machine-generated - editing it directly is not advised
 
+[[Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+
+[[CBinding]]
+deps = ["Libdl", "Random", "Test"]
+git-tree-sha1 = "6f457df38ae2ba239d5e43b80493bb907de826b2"
+repo-rev = "655e9862947d17423f2fb91ea1014e1cb73c1be1"
+repo-url = "https://github.com/analytech-solutions/CBinding.jl.git"
+uuid = "d43a6710-96b8-4a2d-833c-c424785e5374"
+version = "0.8.1"
+
+[[CEnum]]
+git-tree-sha1 = "62847acab40e6855a9b5905ccb99c2b5cf6b3ebb"
+uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
+version = "0.2.0"
+
+[[Clang]]
+deps = ["CEnum", "DataStructures", "LLVM_jll", "Libdl"]
+git-tree-sha1 = "45013227beea038ecc17e8c07cd7c7b05ed26067"
+repo-rev = "master"
+repo-url = "https://github.com/phuchant/Clang.jl.git"
+uuid = "40e3b903-d033-50b4-a0cc-940c62c95e31"
+version = "0.11.0"
+
+[[DataStructures]]
+deps = ["InteractiveUtils", "OrderedCollections"]
+git-tree-sha1 = "6166ecfaf2b8bbf2b68d791bc1d54501f345d314"
+uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+version = "0.17.15"
+
+[[Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+
+[[Distributed]]
+deps = ["Random", "Serialization", "Sockets"]
+uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+
+[[InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+
+[[LLVM_jll]]
+deps = ["Libdl", "Pkg"]
+git-tree-sha1 = "c037c15f36c185c613e5b2589d5833720dab3f76"
+uuid = "86de99a1-58d6-5da7-8064-bd56ce2e322c"
+version = "8.0.1+0"
+
+[[LibGit2]]
+deps = ["Printf"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+
 [[Libdl]]
 uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[[Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+
+[[OrderedCollections]]
+git-tree-sha1 = "12ce190210d278e12644bcadf5b21cbdcf225cd3"
+uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+version = "1.2.0"
+
+[[Pkg]]
+deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+
+[[Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+
+[[REPL]]
+deps = ["InteractiveUtils", "Markdown", "Sockets"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[[Random]]
+deps = ["Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+
+[[Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+
+[[Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+
+[[Test]]
+deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[[UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+
+[[Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

+ 2 - 0
julia/Project.toml

@@ -4,4 +4,6 @@ authors = ["barthou "]
 version = "0.1.0"
 
 [deps]
+CBinding = "d43a6710-96b8-4a2d-833c-c424785e5374"
+Clang = "40e3b903-d033-50b4-a0cc-940c62c95e31"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

+ 0 - 2
julia/REQUIRE

@@ -1,2 +0,0 @@
-julia 1.0
-Libdl

+ 2 - 0
julia/examples/Makefile.am

@@ -76,6 +76,8 @@ endif
 #      Examples      #
 ######################
 
+SHELL_TESTS	+=	check_deps/check_deps.sh
+
 STARPU_JULIA_EXAMPLES	+=	mult/mult
 mult_mult_SOURCES	=	mult/mult.c mult/cpu_mult.c
 SHELL_TESTS		+=	mult/mult_starpu.sh

+ 11 - 0
julia/examples/check_deps/check_deps.jl

@@ -0,0 +1,11 @@
+import Pkg
+
+try
+    using CBinding
+    using Clang
+catch
+    Pkg.activate((@__DIR__)*"/../..")
+    Pkg.instantiate()
+    using Clang
+    using CBinding
+end

+ 20 - 0
julia/examples/check_deps/check_deps.sh

@@ -0,0 +1,20 @@
+#!/bin/bash
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+$(dirname $0)/../execute.sh check_deps/check_deps.jl
+
+

+ 2 - 1
julia/examples/execute.sh.in

@@ -16,7 +16,8 @@
 #
 
 set -x
-export JULIA_LOAD_PATH=@STARPU_SRC_DIR@/julia/src:$JULIA_LOAD_PATH
+export JULIA_LOAD_PATH=@STARPU_SRC_DIR@/julia:$JULIA_LOAD_PATH
+export STARPU_INCLUDE_DIR=@STARPU_BUILD_DIR@/include
 export STARPU_JULIA_LIB=@STARPU_BUILD_DIR@/julia/src/.libs/libstarpujulia-1.3.so
 export STARPU_JULIA_BUILD=@STARPU_BUILD_DIR@/julia
 srcdir=@STARPU_SRC_DIR@/julia/examples

+ 1 - 1
julia/examples/mandelbrot/mandelbrot.jl

@@ -64,7 +64,7 @@ end
 starpu_init()
 
 function mandelbrot_with_starpu(A ::Matrix{Int64}, cr ::Float64, ci ::Float64, dim ::Int64, nslicesx ::Int64)
-    horiz = StarpuDataFilter(STARPU_MATRIX_FILTER_BLOCK, nslicesx)
+    horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nslicesx)
     @starpu_block let
 	hA = starpu_data_register(A)
 	starpu_data_partition(hA,horiz)

+ 6 - 6
julia/examples/mult/mult.jl

@@ -74,19 +74,19 @@ starpu_init()
 function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy, stride)
     scale= 3
     tmin=0
-    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslicesx)
-    horiz = StarpuDataFilter(STARPU_MATRIX_FILTER_BLOCK, nslicesy)
+    vert = starpu_data_filter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslicesx)
+    horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nslicesy)
     @starpu_block let
         hA,hB,hC = starpu_data_register(A, B, C)
         starpu_data_partition(hB, vert)
         starpu_data_partition(hA, horiz)
         starpu_data_map_filters(hC, vert, horiz)
         tmin=0
-        perfmodel = StarpuPerfmodel(
-            perf_type = STARPU_HISTORY_BASED,
+        perfmodel = starpu_perfmodel(
+            perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
             symbol = "history_perf"
         )
-        cl = StarpuCodelet(
+        cl = starpu_codelet(
             cpu_func = CPU_CODELETS["matrix_mult"],
             # cuda_func = CUDA_CODELETS["matrix_mult"],
             #opencl_func="ocl_matrix_mult",
@@ -100,7 +100,7 @@ function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: M
                 for taskx in (1 : nslicesx)
                     for tasky in (1 : nslicesy)
                         handles = [hA[tasky], hB[taskx], hC[taskx, tasky]]
-                        task = StarpuTask(cl = cl, handles = handles, cl_arg=(Int32(stride),))
+                        task = starpu_task(cl = cl, handles = handles, cl_arg=(Int32(stride),))
                         starpu_task_submit(task)
                         #@starpu_async_cl matrix_mult(hA[tasky], hB[taskx], hC[taskx, tasky])
                     end

+ 12 - 5
julia/examples/task_insert_color/task_insert_color.jl

@@ -29,24 +29,31 @@ function task_insert_color_with_starpu(val ::Ref{Int32})
     @starpu_block let
 	hVal = starpu_data_register(val)
 
-        cl1 = StarpuCodelet(
+        perfmodel = starpu_perfmodel(
+            perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
+            symbol = "history_perf"
+        )
+
+        cl1 = starpu_codelet(
             cpu_func = CPU_CODELETS["task_insert_color"],
-            modes = [STARPU_RW]
+            modes = [STARPU_RW],
+            perfmodel = perfmodel
         )
 
-        cl2 = StarpuCodelet(
+        cl2 = starpu_codelet(
             cpu_func = CPU_CODELETS["task_insert_color"],
             modes = [STARPU_RW],
+            perfmodel = perfmodel,
             color = 0x0000FF
         )
 
 	@starpu_sync_tasks begin
 
             # In the trace file, the following task should be green (executed on CPU)
-            starpu_task_submit(StarpuTask(cl = cl1, handles = [hVal]))
+            starpu_task_submit(starpu_task(cl = cl1, handles = [hVal]))
 
             # In the trace file, the following task will be blue as specified by the field color of cl2
-            starpu_task_submit(StarpuTask(cl = cl2, handles = [hVal]))
+            starpu_task_submit(starpu_task(cl = cl2, handles = [hVal]))
 
             # In the trace file, the following tasks will be red as specified in @starpu_async_cl
             @starpu_async_cl task_insert_color(hVal) [STARPU_RW] () 0xFF0000

+ 4 - 4
julia/examples/vector_scal/vector_scal.jl

@@ -36,11 +36,11 @@ function vector_scal_with_starpu(v :: Vector{Float32}, m :: Int32, k :: Float32,
     @starpu_block let
         hV = starpu_data_register(v)
         tmin=0
-        perfmodel = StarpuPerfmodel(
-            perf_type = STARPU_HISTORY_BASED,
+        perfmodel = starpu_perfmodel(
+            perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
             symbol = "history_perf"
         )
-        cl = StarpuCodelet(
+        cl = starpu_codelet(
             cpu_func = CPU_CODELETS["vector_scal"],
             # cuda_func = CUDA_CODELETS["vector_scal"],
             #opencl_func="ocl_matrix_mult",
@@ -52,7 +52,7 @@ function vector_scal_with_starpu(v :: Vector{Float32}, m :: Int32, k :: Float32,
             t=time_ns()
             @starpu_sync_tasks begin
                 handles = [hV]
-                task = StarpuTask(cl = cl, handles = handles, cl_arg=(m, k, l))
+                task = starpu_task(cl = cl, handles = handles, cl_arg=(m, k, l))
                 starpu_task_submit(task)
             end
             # @starpu_sync_tasks for task in (1:1)

+ 1 - 3
julia/src/Makefile.am

@@ -33,6 +33,4 @@ libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined
   -version-info $(LIBSTARPUJULIA_INTERFACE_CURRENT):$(LIBSTARPUJULIA_INTERFACE_REVISION):$(LIBSTARPUJULIA_INTERFACE_AGE)
 
 libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 						\
-	jlstarpu_task_submit.c		\
-	jlstarpu_simple_functions.c 	\
-	jlstarpu_data_handles.c
+	dummy.c

Разница между файлами не показана из-за своего большого размера
+ 22 - 1280
julia/src/StarPU.jl


+ 0 - 12
julia/src/compiler/expressions.jl

@@ -13,18 +13,6 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
-global starpu_type_traduction_dict = Dict(
-    Int32 => "int32_t",
-    UInt32 => "uint32_t",
-    Float32 => "float",
-    Int64 => "int64_t",
-    UInt64 => "uint64_t",
-    Float64 => "double",
-    Nothing => "void"
-)
-export starpu_type_traduction_dict
-
-
 #======================================================
                 AFFECTATION
 ======================================================#

+ 0 - 19
julia/src/compiler/file_generation.jl

@@ -13,15 +13,6 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
-
-
-
-global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
-
-
-
-global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
-
 const cpu_kernel_file_start = "#include <stdio.h>
 #include <stdint.h>
 #include <starpu.h>
@@ -105,16 +96,6 @@ macro target(x)
     end
 end
 
-export CPU_CODELETS
-global CPU_CODELETS=Dict{String,String}()
-export CUDA_CODELETS
-global CUDA_CODELETS=Dict{String,String}()
-
-export CODELETS_SCALARS
-global CODELETS_SCALARS=Dict{String,Any}()
-export CODELETS_PARAMS_STRUCT
-global CODELETS_PARAMS_STRUCT=Dict{String,Any}()
-
 """
 	    Executes @cuda_kernel and @cpu_kernel
         """

+ 200 - 0
julia/src/data.jl

@@ -0,0 +1,200 @@
+const StarpuDataHandlePointer = Ptr{Cvoid}
+StarpuDataHandle = StarpuDestructible{StarpuDataHandlePointer}
+
+@enum(StarpuDataFilterFunc,
+      STARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
+      STARPU_MATRIX_FILTER_BLOCK = 1,
+      STARPU_VECTOR_FILTER_BLOCK = 2,
+)
+
+export starpu_data_filter
+function starpu_data_filter(filter_func ::StarpuDataFilterFunc, nchildren ::Integer)
+    output = starpu_data_filter(zero)
+    output.nchildren = UInt32(nchildren)
+
+    if filter_func == STARPU_MATRIX_FILTER_VERTICAL_BLOCK
+        output.filter_func = Libdl.dlsym(starpu_wrapper_library_handle, "starpu_matrix_filter_vertical_block")
+    elseif filter_func == STARPU_MATRIX_FILTER_BLOCK
+        output.filter_func = Libdl.dlsym(starpu_wrapper_library_handle, "starpu_matrix_filter_block")
+    else filter_func == STARPU_VECTOR_FILTER_BLOCK
+        output.filter_func = Libdl.dlsym(starpu_wrapper_library_handle, "starpu_vector_filter_block")
+    end
+
+    return output
+end
+
+function starpu_memory_pin(data :: Union{Vector{T}, Matrix{T}}) where T
+    starpu_memory_pin(data, sizeof(data))::Cint
+end
+
+function starpu_memory_unpin(data :: Union{Vector{T}, Matrix{T}}) where T
+    starpu_memory_unpin(data, sizeof(data))::Cint
+end
+
+function StarpuNewDataHandle(ptr :: StarpuDataHandlePointer, destr :: Function...) :: StarpuDataHandle
+    return StarpuDestructible(ptr, destr...)
+end
+
+
+
+function starpu_data_unregister_pointer(ptr :: StarpuDataHandlePointer)
+    starpu_data_unregister(ptr)
+end
+
+function starpu_data_unregister(handles :: StarpuDataHandle...)
+    for h in handles
+        starpu_execute_destructor!(h, starpu_data_unregister_pointer)
+    end
+end
+
+function starpu_data_register(v :: Vector{T}) where T
+    output = Ref{Ptr{Cvoid}}(0)
+    data_pointer = pointer(v)
+
+    starpu_vector_data_register(output, STARPU_MAIN_RAM, data_pointer, length(v), sizeof(T))
+    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer])
+end
+
+function starpu_data_register(m :: Matrix{T}) where T
+
+    output = Ref{Ptr{Cvoid}}(0)
+    data_pointer = pointer(m)
+    (height, width) = size(m)
+
+    starpu_matrix_data_register(output, STARPU_MAIN_RAM, data_pointer, height, height, width, sizeof(T))
+    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer])
+end
+
+function starpu_data_register(block :: Array{T,3}) where T
+
+    output = Ref{Ptr{Cvoid}}(0)
+    data_pointer = pointer(block)
+    (height, width, depth) = size(block)
+
+    starpu_block_data_register(output, STARPU_MAIN_RAM, data_pointer, height, height * width, height, width, depth, sizeof(T))
+    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)
+end
+
+function starpu_data_register(ref :: Ref{T}) where T
+
+    output = Ref{Ptr{Cvoid}}(0)
+
+    starpu_variable_data_register(output, STARPU_MAIN_RAM, ref, sizeof(T))
+    return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)
+end
+
+function starpu_data_register(x1, x2, next_args...)
+
+    handle_1 = starpu_data_register(x1)
+    handle_2 = starpu_data_register(x2)
+
+    next_handles = map(starpu_data_register, next_args)
+
+    return [handle_1, handle_2, next_handles...]
+end
+
+import Base.getindex
+function Base.getindex(handle :: StarpuDataHandle, indexes...)
+    output = starpu_data_get_sub_data(handle.object, length(indexes),
+                                      map(x->x-1, indexes)...)
+    return StarpuNewDataHandle(output)
+end
+
+function starpu_data_unpartition_pointer(ptr :: StarpuDataHandlePointer)
+    starpu_data_unpartition(ptr, STARPU_MAIN_RAM)
+end
+
+function starpu_data_partition(handle :: StarpuDataHandle, filter :: starpu_data_filter)
+
+    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
+    starpu_data_partition(handle.object, pointer_from_objref(filter))
+end
+
+function starpu_data_unpartition(handles :: StarpuDataHandle...)
+
+    for h in handles
+        starpu_execute_destructor!(h, starpu_data_unpartition_pointer)
+    end
+
+    return nothing
+end
+
+function starpu_data_map_filters(handle :: StarpuDataHandle, filter :: starpu_data_filter)
+    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
+    starpu_data_map_filters(handle.object, 1, pointer_from_objref(filter))
+end
+
+function starpu_data_map_filters(handle :: StarpuDataHandle, filter_1 :: starpu_data_filter, filter_2 :: starpu_data_filter)
+    starpu_add_destructor!(handle, starpu_data_unpartition_pointer)
+    starpu_data_map_filters(handle.object, 2, pointer_from_objref(filter_1), pointer_from_objref(filter_2))
+end
+
+function repl(x::Symbol)
+    return x
+end
+function repl(x::Number)
+    return x
+end
+function repl(x :: Expr)
+    if (x.head == :call && x.args[1] == :+)
+        if (x.args[2] == :_)
+            return x.args[3]
+        elseif (x.args[3] == :_)
+            return x.args[2]
+        else return Expr(:call,:+,repl(x.args[2]),repl(x.args[3]))
+        end
+    elseif (x.head == :call && x.args[1] == :-)
+        if (x.args[2] == :_)
+            return Expr(:call,:-,x.args[3])
+        elseif (x.args[3] == :_)
+            return x.args[2]
+        else return Expr(:call,:-,repl(x.args[2]),repl(x.args[3]))
+        end
+    else return Expr(:call,x.args[1],repl(x.args[2]),repl(x.args[3]))
+    end
+end
+"""
+    Declares a subarray.
+    Ex : @starpu_filter ha = A[ _:_+1, : ] 
+ 
+"""
+macro starpu_filter(expr)
+    #dump(expr, maxdepth=20)
+    if (expr.head==Symbol("="))
+        region = expr.args[2]
+        if (region.head == Symbol("ref"))
+            farray = expr.args[1]
+            println("starpu filter")
+            index = 0
+            filter2=nothing
+            filter3=nothing
+            if (region.args[2]==Symbol(":"))
+                index = 3
+                filter2=:(STARPU_MATRIX_FILTER_BLOCK)
+            elseif (region.args[3] == Symbol(":"))
+                index = 2
+                filter3=:(STARPU_MATRIX_FILTER_VERTICAL_BLOCK)
+            else
+            end
+            ex = repl(region.args[index].args[3])
+            if (region.args[index].args[2] != Symbol("_"))
+                throw(AssertionError("LHS must be _"))
+            end
+            ret = quote
+                # escape and not global for farray!
+                $(esc(farray)) = starpu_data_register($(esc(region.args[1])))
+                starpu_data_partition( $(esc(farray)),starpu_data_filter($(esc(filter)),$(esc(ex))))
+            end
+            return ret
+        else
+            ret = quote
+                $(esc(farray))= starpu_data_register($(esc(region.args[1])))
+            end
+            
+            dump("coucou"); #dump(region.args[2])
+            #                dump(region.args[2])
+            #                dump(region.args[3])
+            return ret
+        end
+    end
+end

+ 111 - 0
julia/src/destructible.jl

@@ -0,0 +1,111 @@
+"""
+        Object used to store a lot of function which must
+        be applied to and object
+    """
+mutable struct StarpuDestructible{T}
+
+    object :: T
+    destructors :: LinkedList{Function}
+
+end
+
+starpu_block_list = Vector{LinkedList{StarpuDestructible}}()
+
+"""
+    Declares a block of code. Every declared StarpuDestructible in this code
+    will execute its destructors on its object, once the block is exited
+"""
+macro starpu_block(expr)
+    quote
+        starpu_enter_new_block()
+        local z=$(esc(expr))
+        starpu_exit_block()
+        z
+    end
+end
+
+
+function StarpuDestructible(obj :: T, destructors :: Function...) where T
+
+    if (isempty(starpu_block_list))
+        error("Creation of a StarpuDestructible object while not beeing in a @starpu_block")
+    end
+
+    l = LinkedList{Function}()
+
+    for destr in destructors
+        add_to_tail!(l, destr)
+    end
+
+    output = StarpuDestructible{T}(obj, l)
+    add_to_head!(starpu_block_list[end], output)
+
+    return output
+end
+
+function starpu_enter_new_block()
+
+    push!(starpu_block_list, LinkedList{StarpuDestructible}())
+end
+
+function starpu_destruct!(x :: StarpuDestructible)
+
+    @foreach_asc  x.destructors destr begin
+        destr.data(x.object)
+    end
+
+    empty!(x.destructors)
+
+    return nothing
+end
+
+
+function starpu_exit_block()
+
+    destr_list = pop!(starpu_block_list)
+
+    @foreach_asc destr_list x begin
+        starpu_destruct!(x.data)
+    end
+end
+
+"""
+    Adds new destructors to the list of function. They will be executed before
+        already stored ones when calling starpu_destruct!
+"""
+function starpu_add_destructor!(x :: StarpuDestructible, destrs :: Function...)
+
+    for d in destrs
+        add_to_head!(x.destructors, d)
+    end
+
+    return nothing
+end
+
+"""
+    Removes detsructor without executing it
+"""
+function starpu_remove_destructor!(x :: StarpuDestructible, destr :: Function)
+
+    @foreach_asc x.destructors lnk begin
+
+        if (lnk.data == destr)
+            remove_link!(lnk)
+            break
+        end
+    end
+
+    return nothing
+end
+
+"""
+    Executes "destr" function. If it was one of the stored destructors, it
+    is removed.
+    This function can be used to allow user to execute a specific action manually
+        (ex : explicit call to starpu_data_unpartition() without unregistering)
+"""
+function starpu_execute_destructor!(x :: StarpuDestructible, destr :: Function)
+
+    starpu_remove_destructor!(x, destr)
+    return destr(x.object)
+end

+ 9 - 0
julia/src/dummy.c

@@ -0,0 +1,9 @@
+#include <starpu.h>
+
+void  *dummy_function_list[] = {
+				starpu_matrix_filter_vertical_block,
+				starpu_matrix_filter_block,
+				starpu_vector_filter_block,
+				starpu_init,
+};
+

+ 34 - 0
julia/src/globals.jl

@@ -0,0 +1,34 @@
+
+global starpu_wrapper_library_handle = C_NULL
+
+global starpu_tasks_library_handle = C_NULL
+
+global starpu_target=STARPU_CPU
+
+global generated_cuda_kernel_file_name = "PRINT TO STDOUT"
+global generated_cpu_kernel_file_name = "PRINT TO STDOUT"
+
+export CPU_CODELETS
+global CPU_CODELETS=Dict{String,String}()
+
+export CUDA_CODELETS
+global CUDA_CODELETS=Dict{String,String}()
+
+export CODELETS_SCALARS
+global CODELETS_SCALARS=Dict{String,Any}()
+
+export CODELETS_PARAMS_STRUCT
+global CODELETS_PARAMS_STRUCT=Dict{String,Any}()
+
+global starpu_type_traduction_dict = Dict(
+    Int32 => "int32_t",
+    UInt32 => "uint32_t",
+    Float32 => "float",
+    Int64 => "int64_t",
+    UInt64 => "uint64_t",
+    Float64 => "double",
+    Nothing => "void"
+)
+export starpu_type_traduction_dict
+
+global perfmodels = Vector{starpu_perfmodel}()

+ 46 - 0
julia/src/init.jl

@@ -0,0 +1,46 @@
+"""
+    Must be called before any other starpu function. Field extern_task_path is the
+    shared library path which will be used to find StarpuCodelet
+    cpu and gpu function names
+"""
+function starpu_init()
+    debug_print("starpu_init")
+
+    if (get(ENV,"JULIA_TASK_LIB",0)!=0)
+        global starpu_tasks_library_handle= Libdl.dlopen(ENV["JULIA_TASK_LIB"])
+        debug_print("Loading external codelet library")
+        ff = Libdl.dlsym(starpu_tasks_library_handle,:starpu_find_function)
+        dump(ff)
+        for k in keys(CUDA_CODELETS)
+            CPU_CODELETS[k]=unsafe_string(ccall(ff,Cstring, (Cstring,Cstring),Cstring_from_String(string(k)),Cstring_from_String("cpu")))
+            print(k,">>>>",CPU_CODELETS[k],"\n")
+        end
+    else
+        srcdir=get(ENV,"STARPU_JULIA_BUILD",0)
+        if (srcdir == 0)
+            error("Must define environment variable STARPU_JULIA_BUILD")
+        end
+        makefile=string(srcdir, "/src/dynamic_compiler/Makefile")
+        debug_print("generating codelet library with ")
+        debug_print(makefile)
+        run(`make -f $makefile generated_tasks.so`)
+        global starpu_tasks_library_handle=Libdl.dlopen("generated_tasks.so")
+    end
+    global starpu_wrapper_library_handle= Libdl.dlopen(starpu_wrapper_library_name)
+    output = starpu_init(C_NULL)
+
+    starpu_enter_new_block()
+
+    return output
+end
+
+"""
+    Must be called at the end of the program
+"""
+function starpu_shutdown()
+    debug_print("starpu_shutdown")
+
+    starpu_exit_block()
+    @starpucall starpu_shutdown Cvoid ()
+    return nothing
+end

+ 0 - 35
julia/src/jlstarpu.h

@@ -1,35 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-/*
- * jlstarpu.h
- *
- *  Created on: 27 juin 2018
- *      Author: ajuven
- */
-
-#ifndef JLSTARPU_H_
-#define JLSTARPU_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <starpu.h>
-#include <pthread.h>
-
-#include "jlstarpu_utils.h"
-#include "jlstarpu_task.h"
-
-#endif /* JLSTARPU_H_ */

+ 0 - 134
julia/src/jlstarpu_data_handles.c

@@ -1,134 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include "jlstarpu.h"
-
-enum jlstarpu_data_filter_func
-{
-	JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0,
-	JLSTARPU_MATRIX_FILTER_BLOCK,
-	JLSTARPU_VECTOR_FILTER_BLOCK,
-};
-
-struct jlstarpu_data_filter
-{
-	enum jlstarpu_data_filter_func func;
-	unsigned int nchildren;
-
-};
-
-
-void * jlstarpu_translate_data_filter_func(enum jlstarpu_data_filter_func func)
-{
-	switch (func){
-	case JLSTARPU_MATRIX_FILTER_VERTICAL_BLOCK:
-		return starpu_matrix_filter_vertical_block;
-	case JLSTARPU_MATRIX_FILTER_BLOCK:
-		return starpu_matrix_filter_block;
-	case JLSTARPU_VECTOR_FILTER_BLOCK:
-		return starpu_vector_filter_block;
-	default:
-		return NULL;
-	}
-
-}
-
-void jlstarpu_translate_data_filter(const struct jlstarpu_data_filter * const input,struct starpu_data_filter * output)
-{
-	memset(output, 0, sizeof(struct starpu_data_filter));
-	output->filter_func = jlstarpu_translate_data_filter_func(input->func);
-	output->nchildren = input->nchildren;
-}
-
-void jlstarpu_data_partition(starpu_data_handle_t handle,const struct jlstarpu_data_filter * const jl_filter)
-{
-	struct starpu_data_filter filter;
-	jlstarpu_translate_data_filter(jl_filter, &filter);
-	starpu_data_partition(handle, &filter);
-}
-
-
-void jlstarpu_data_map_filters_1_arg(starpu_data_handle_t handle,
-	const struct jlstarpu_data_filter * const jl_filter
-	)
-{
-	struct starpu_data_filter filter;
-	jlstarpu_translate_data_filter(jl_filter, &filter);
-
-	starpu_data_map_filters(handle, 1, &filter);
-
-}
-
-
-void jlstarpu_data_map_filters_2_arg
-(
-	starpu_data_handle_t handle,
-	const struct jlstarpu_data_filter * const jl_filter_1,
-	const struct jlstarpu_data_filter * const jl_filter_2
-	)
-{
-	struct starpu_data_filter filter_1;
-	jlstarpu_translate_data_filter(jl_filter_1, &filter_1);
-
-	struct starpu_data_filter filter_2;
-	jlstarpu_translate_data_filter(jl_filter_2, &filter_2);
-
-
-	starpu_data_map_filters(handle, 2, &filter_1, &filter_2);
-
-}
-
-
-
-
-#define JLSTARPU_GET(interface, field, ret_type)			\
-									\
-	ret_type jlstarpu_##interface##_get_##field(const struct starpu_##interface##_interface * const x) \
-	{								\
-		return (ret_type) x->field;				\
-	}								\
-
-
-
-
-
-JLSTARPU_GET(vector, ptr, void *)
-JLSTARPU_GET(vector, nx, uint32_t)
-JLSTARPU_GET(vector, elemsize, size_t)
-
-
-
-JLSTARPU_GET(matrix, ptr, void *)
-JLSTARPU_GET(matrix, ld, uint32_t)
-JLSTARPU_GET(matrix, nx, uint32_t)
-JLSTARPU_GET(matrix, ny, uint32_t)
-JLSTARPU_GET(matrix, elemsize, size_t)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

+ 0 - 27
julia/src/jlstarpu_simple_functions.c

@@ -1,27 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-#include "jlstarpu.h"
-
-int jlstarpu_init(void)
-{
-	return starpu_init(NULL);
-}
-
-void jlstarpu_set_to_zero(void * ptr, unsigned int size)
-{
-	memset(ptr, 0, size);
-}

+ 0 - 76
julia/src/jlstarpu_task.h

@@ -1,76 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-/*
- * jlstarpu_task.h
- *
- *  Created on: 27 juin 2018
- *      Author: ajuven
- */
-
-#ifndef JLSTARPU_TASK_H_
-#define JLSTARPU_TASK_H_
-
-
-#include "jlstarpu.h"
-
-struct jlstarpu_codelet
-{
-	uint32_t where;
-
-  	uint32_t color;
-
-	starpu_cpu_func_t cpu_func;
-	char * cpu_func_name;
-
-	starpu_cuda_func_t cuda_func;
-	starpu_opencl_func_t opencl_func;
-
-	int nbuffer;
-	enum starpu_data_access_mode * modes;
-
-	struct starpu_perfmodel * model;
-
-};
-
-
-
-struct jlstarpu_task
-{
-	struct starpu_codelet * cl;
-	starpu_data_handle_t * handles;
-	unsigned int synchronous;
-
-	void * cl_arg;
-	size_t cl_arg_size;
-};
-
-
-#if 0
-
-struct cl_args_decorator
-{
-	struct jlstarpu_function_launcher * launcher;
-	void * cl_args;
-};
-
-#endif
-
-
-
-
-
-#endif /* JLSTARPU_TASK_H_ */

+ 0 - 211
julia/src/jlstarpu_task_submit.c

@@ -1,211 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-/*
- * jlstarpu_task_submit.c
- *
- *  Created on: 27 juin 2018
- *      Author: ajuven
- */
-
-
-#include "jlstarpu.h"
-
-
-struct starpu_codelet * jlstarpu_new_codelet()
-{
-	struct starpu_codelet * output;
-	TYPE_MALLOC(output, 1);
-
-	starpu_codelet_init(output);
-
-	return output;
-}
-
-
-#if 0
-struct starpu_codelet * jlstarpu_translate_codelet(struct jlstarpu_codelet * const input)
-{
-	struct starpu_codelet * output;
-	TYPE_MALLOC(output, 1);
-
-	starpu_codelet_init(output);
-
-	output->where = input->where;
-	output->cpu_funcs[0] = input->cpu_func;
-	output->cpu_funcs_name[0] = input->cpu_func_name;
-
-	output->cuda_funcs[0] = input->cuda_func;
-	output->opencl_funcs[0] = input->opencl_func;
-
-	output->nbuffers = input->nbuffer;
-	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
-
-	output->model = input->model;
-
-	return output;
-}
-#endif
-
-void jlstarpu_codelet_update(const struct jlstarpu_codelet * const input, struct starpu_codelet * const output)
-{
-	output->where = input->where;
-
-	output->color = input->color;
-
-	output->cpu_funcs[0] = input->cpu_func;
-	output->cpu_funcs_name[0] = input->cpu_func_name;
-
-	output->cuda_funcs[0] = input->cuda_func;
-	output->opencl_funcs[0] = input->opencl_func;
-
-	output->nbuffers = input->nbuffer;
-	memcpy(&(output->modes), input->modes, input->nbuffer * sizeof(enum starpu_data_access_mode));
-
-	output->model = input->model;
-
-}
-#if 0
-void jlstarpu_free_codelet(struct starpu_codelet * cl)
-{
-	free(cl);
-}
-#endif
-
-void jlstarpu_hello() {
-	fprintf(stderr,"coucou !");
-}
-
-#if 0
-struct starpu_task * jlstarpu_translate_task(const struct jlstarpu_task * const input)
-{
-	struct starpu_task * output = starpu_task_create();
-
-	if (output == NULL){
-		return NULL;
-	}
-
-	output->cl = input->cl;
-	memcpy(&(output->handles), input->handles, input->cl->nbuffers * sizeof(starpu_data_handle_t));
-	output->synchronous = input->synchronous;
-
-
-	return output;
-}
-#endif
-
-char *starpu_find_function(char *name, char *device) {
-	return NULL;
-}
-
-void jlstarpu_task_update(const struct jlstarpu_task * const input, struct starpu_task * const output)
-{
-	output->cl = input->cl;
-	memcpy(&(output->handles), input->handles, input->cl->nbuffers * sizeof(starpu_data_handle_t));
-	output->synchronous = input->synchronous;
-	output->cl_arg = input->cl_arg;
-	output->cl_arg_size = input->cl_arg_size;
-}
-
-/*
-
-void print_perfmodel(struct starpu_perfmodel * p)
-{
-	printf("Perfmodel at address %p:\n");
-	printf("\ttype : %u\n", p->type);
-	printf("\tcost_function : %p\n", p->cost_function);
-	printf("\tarch_cost_function : %p\n", p->arch_cost_function);
-	printf("\tsize_base : %p\n", p->size_base);
-	printf("\tfootprint : %p\n", p->footprint);
-	printf("\tsymbol : %s\n", p->symbol);
-	printf("\tis_loaded : %u\n", p->is_loaded);
-	printf("\tbenchmarking : %u\n", p->benchmarking);
-	printf("\tis_init : %u\n", p->is_init);
-	printf("\tparameters : %p\n", p->parameters);
-	printf("\tparameters_names : %p\n", p->parameters_names);
-	printf("\tnparameters : %u\n", p->nparameters);
-	printf("\tcombinations : %p\n", p->combinations);
-	printf("\tncombinations : %u\n", p->ncombinations);
-	printf("\tstate : %p\n", p->state);
-
-}
-
-
-*/
-
-#if 0
-/*
- * TODO : free memory
- */
-int jlstarpu_task_submit(const struct jlstarpu_task * const jl_task)
-{
-	DEBUG_PRINT("Inside C wrapper");
-
-	struct starpu_task * task;
-	int ret_code;
-
-
-	DEBUG_PRINT("Translating task...");
-	task = jlstarpu_translate_task(jl_task);
-
-	if (task == NULL){
-		fprintf(stderr, "Error while creating the task.\n");
-		return EXIT_FAILURE;
-	}
-
-	DEBUG_PRINT("Task translated");
-	DEBUG_PRINT("Submitting task to StarPU...");
-	ret_code = starpu_task_submit(task);
-	DEBUG_PRINT("starpu_task_submit has returned");
-
-
-	if (ret_code != 0){
-		fprintf(stderr, "Error while submitting task.\n");
-		return ret_code;
-	}
-
-
-	DEBUG_PRINT("Done");
-	DEBUG_PRINT("END OF STARPU FUNCTION");
-
-
-	return ret_code;
-}
-
-#endif
-
-
-
-
-
-
-
-#define JLSTARPU_UPDATE_FUNC(type, field)\
-	\
-	void jlstarpu_##type##_update_##field(const struct jlstarpu_##type * const input, struct starpu_##type * const output)\
-	{\
-		output->field = input->field;\
-	}
-
-
-
-
-
-
-
-
-
-

+ 0 - 68
julia/src/jlstarpu_utils.h

@@ -1,68 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2018       Alexis Juven
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-/*
- * jlstarpu_utils.h
- *
- *  Created on: 27 juin 2018
- *      Author: ajuven
- */
-
-#ifndef JLSTARPU_UTILS_H_
-#define JLSTARPU_UTILS_H_
-
-#include "jlstarpu.h"
-
-
-#define TYPE_MALLOC(ptr, nb_elements) \
-		do {\
-			if ((nb_elements) == 0){ \
-				ptr = NULL; \
-			} else { \
-				ptr = malloc((nb_elements) * sizeof(*(ptr))); \
-				if (ptr == NULL){ \
-					fprintf(stderr, "\033[31mCRITICAL : MALLOC HAS RETURNED NULL\n\033[0m");\
-					fflush(stderr);\
-					exit(1);\
-				} \
-			} \
-		} while(0)
-
-
-
-//#define DEBUG
-#ifdef DEBUG
-
-#define DEBUG_PRINT(...)\
-		do {\
-			fprintf(stderr, "\x1B[34m%s : \x1B[0m", __FUNCTION__);\
-			fprintf(stderr, __VA_ARGS__);\
-			fprintf(stderr, "\n");\
-			fflush(stderr);\
-		} while (0)
-
-
-
-
-#else
-
-#define DEBUG_PRINT(...)
-
-#endif
-
-
-
-#endif /* JLSTARPU_UTILS_H_ */

+ 11 - 0
julia/src/perfmodel.jl

@@ -0,0 +1,11 @@
+function starpu_perfmodel(; perf_type::starpu_perfmodel_type, symbol::String)
+    output = starpu_perfmodel(zero)
+    output.type = perf_type
+    output.symbol = Cstring_from_String(symbol)
+
+    # Performance models must not be garbage collected before starpu_shutdown
+    # is called.
+    push!(perfmodels, output)
+
+    return output
+end

+ 216 - 0
julia/src/task.jl

@@ -0,0 +1,216 @@
+struct jl_starpu_codelet
+    c_codelet :: starpu_codelet
+    perfmodel :: starpu_perfmodel
+    cpu_func :: String
+    cuda_func :: String
+    opencl_func :: String
+    modes
+end
+
+function starpu_codelet(;
+                        cpu_func :: String = "",
+                        cuda_func :: String = "",
+                        opencl_func :: String = "",
+                        modes = [],
+                        perfmodel :: starpu_perfmodel,
+                        where_to_execute :: Union{Cvoid, UInt32} = nothing,
+                        color :: UInt32 = 0x00000000
+                        )
+
+    if (length(modes) > STARPU_NMAXBUFS)
+        error("Codelet has too much buffers ($(length(modes)) but only $STARPU_NMAXBUFS are allowed)")
+    end
+
+
+    if (where_to_execute == nothing)
+        real_where = ((cpu_func != "") * STARPU_CPU) | ((cuda_func != "") * STARPU_CUDA)
+    else
+        real_where = where_to_execute
+    end
+
+    output = jl_starpu_codelet(starpu_codelet(zero), perfmodel, cpu_func, cuda_func, opencl_func, modes)
+    ## TODO: starpu_codelet_init
+
+    output.c_codelet.where = real_where
+
+    for i in 1:length(modes)
+        output.c_codelet.modes[i] = modes[i]
+    end
+    output.c_codelet.nbuffers = length(modes)
+    output.c_codelet.model = pointer_from_objref(perfmodel)
+    output.c_codelet.color = color
+    output.c_codelet.cpu_func = load_starpu_function_pointer(cpu_func)
+    output.c_codelet.cuda_func = load_starpu_function_pointer(cuda_func)
+    output.c_codelet.opencl_func = load_starpu_function_pointer(opencl_func)
+
+    return output
+end
+
+mutable struct jl_starpu_task
+
+    cl :: jl_starpu_codelet
+    handles :: Vector{StarpuDataHandle}
+    handle_pointers :: Vector{StarpuDataHandlePointer}
+    synchronous :: Bool
+    cl_arg # type depends on codelet
+    c_task :: starpu_task
+end
+
+"""
+            starpu_task(; cl :: jl_starpu_codelet, handles :: Vector{StarpuDataHandle}, cl_arg :: Ref)
+
+            Creates a new task which will run the specified codelet on handle buffers and cl_args data
+        """
+function starpu_task(; cl :: Union{Cvoid, jl_starpu_codelet} = nothing, handles :: Vector{StarpuDataHandle} = StarpuDataHandle[], cl_arg = ())
+
+    if (cl == nothing)
+        error("\"cl\" field can't be empty when creating a StarpuTask")
+    end
+
+    output = jl_starpu_task(cl, handles, map((x -> x.object), handles), false, nothing, starpu_task(zero))
+
+    # handle scalar_parameters
+    codelet_name = cl.cpu_func
+    if isempty(codelet_name)
+        codelet_name = cl.cuda_func
+    end
+    if isempty(codelet_name)
+        codelet_name = cl.opencl_func
+    end
+    if isempty(codelet_name)
+        error("No function provided with codelet.")
+    end
+    scalar_parameters = get(CODELETS_SCALARS, codelet_name, nothing)
+    if scalar_parameters != nothing
+        nb_scalar_required = length(scalar_parameters)
+        nb_scalar_provided = tuple_len(cl_arg)
+        if (nb_scalar_provided != nb_scalar_required)
+            error("$nb_scalar_provided scalar parameters provided but $nb_scalar_required are required by $codelet_name.")
+        end
+        output.cl_arg = create_param_struct_from_clarg(codelet_name, cl_arg)
+    else
+        output.cl_arg = cl_arg
+    end
+
+    starpu_task_init(Ref(output.c_task))
+    output.c_task.cl = pointer_from_objref(cl.c_codelet)
+    output.c_task.synchronous = false
+
+    ## TODO: check num handles equals num codelet buffers
+    for i in 1:length(handles)
+        output.c_task.handles[i] = output.handle_pointers[i]
+    end
+    if tuple_len(cl_arg) > 0
+        output.c_task.cl_arg = Base.unsafe_convert(Ptr{Cvoid}, Ref(output.cl_arg))
+        output.c_task.cl_arg_size = sizeof(output.cl_arg)
+    end
+    return output
+end
+
+
+function create_param_struct_from_clarg(name, cl_arg)
+    struct_params_name = CODELETS_PARAMS_STRUCT[name]
+
+    if struct_params_name == false
+        error("structure name not found in CODELET_PARAMS_STRUCT")
+    end
+
+    nb_scalar_provided = length(cl_arg)
+    create_struct_param_str = "output = $struct_params_name("
+    for i in 1:nb_scalar_provided-1
+        arg = cl_arg[i]
+        create_struct_param_str *= "$arg, "
+        end
+    if (nb_scalar_provided > 0)
+        arg = cl_arg[nb_scalar_provided]
+        create_struct_param_str *= "$arg"
+    end
+    create_struct_param_str *= ")"
+    eval(Meta.parse(create_struct_param_str))
+    return output
+end
+
+"""
+    Launches task execution, if "synchronous" task field is set to "false", call
+    returns immediately
+"""
+function starpu_task_submit(task :: jl_starpu_task)
+    if (length(task.handles) != length(task.cl.modes))
+        error("Invalid number of handles for task : $(length(task.handles)) where given while codelet has $(task.cl.modes) modes")
+    end
+
+    starpu_task_submit(Ref(task.c_task))
+end
+
+function starpu_modes(x :: Symbol)
+    if (x == Symbol("STARPU_RW"))
+        return STARPU_RW
+    elseif (x == Symbol("STARPU_R"))
+        return STARPU_R
+    else return STARPU_W
+    end
+end
+
+"""
+    Creates and submits an asynchronous task running cl Codelet function.
+    Ex : @starpu_async_cl cl(handle1, handle2)
+"""
+macro starpu_async_cl(expr, modes, cl_arg=(), color ::UInt32=0x00000000)
+
+    if (!isa(expr, Expr) || expr.head != :call)
+        error("Invalid task submit syntax")
+    end
+    if (!isa(expr, Expr)||modes.head != :vect)
+        error("Invalid task submit syntax")
+    end
+    perfmodel = starpu_perfmodel(
+        perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
+        symbol = "history_perf"
+    )
+    println(CPU_CODELETS[string(expr.args[1])])
+    cl = starpu_codelet(
+        cpu_func = CPU_CODELETS[string(expr.args[1])],
+        # cuda_func = CUDA_CODELETS[string(expr.args[1])],
+        #opencl_func="ocl_matrix_mult",
+        ### TODO: CORRECT !
+        modes = map((x -> starpu_modes(x)),modes.args),
+        perfmodel = perfmodel,
+        color = color
+    )
+    handles = Expr(:vect, expr.args[2:end]...)
+    #dump(handles)
+    quote
+        task = starpu_task(cl = $(esc(cl)), handles = $(esc(handles)), cl_arg=$(esc(cl_arg)))
+        starpu_task_submit(task)
+    end
+end
+
+"""
+    Blocks until every submitted task has finished.
+"""
+function starpu_task_wait_for_all()
+    @threadcall(@starpufunc(:starpu_task_wait_for_all),
+                          Cint, ())
+end
+
+"""
+    Blocks until every submitted task has finished.
+    Ex : @starpu_sync_tasks begin
+                [...]
+                starpu_task_submit(task)
+                [...]
+        end
+
+    TODO : Make the macro only wait for tasks declared inside the following expression.
+            (similar mechanism as @starpu_block)
+"""
+macro starpu_sync_tasks(expr)
+    quote
+        $(esc(expr))
+        starpu_task_wait_for_all()
+    end
+end
+
+function starpu_task_destroy(task :: jl_starpu_task)
+    starpu_task_destroy(Ref(task.c_task))
+end

+ 19 - 0
julia/src/task_dep.jl

@@ -0,0 +1,19 @@
+function starpu_tag_declare_deps(id :: starpu_tag_t, dep :: starpu_tag_t, other_deps :: starpu_tag_t...)
+
+    v = [dep, other_deps...]
+    starpu_tag_declare_deps_array(id, length(v), pointer(v))
+end
+
+"""
+    starpu_task_declare_deps(task :: StarpuTask, dep :: StarpuTask [, other_deps :: StarpuTask...])
+
+    Declare task dependencies between a task and the following provided ones. This function must be called
+    prior to the submission of the task, but it may called after the submission or the execution of the tasks in the array,
+    provided the tasks are still valid (i.e. they were not automatically destroyed). Calling this function on a task that was
+    already submitted or with an entry of task_array that is no longer a valid task results in an undefined behaviour.
+"""
+function starpu_task_declare_deps(task :: jl_starpu_task, dep :: jl_starpu_task, other_deps :: jl_starpu_task...)
+
+    task_array = [dep.c_task, map((t -> t.c_task), other_deps)...]
+    starpu_task_declare_deps_array(task.c_task, length(task_array), pointer(task_array))
+end

+ 73 - 0
julia/src/translate_headers.jl

@@ -0,0 +1,73 @@
+using Clang
+using Clang.LibClang.LLVM_jll
+
+function translate_starpu_headers()
+    if isfile((@__DIR__)*"/../gen/libstarpu_common.jl") && isfile((@__DIR__)*"/../gen/libstarpu_api.jl")
+        return
+    end
+
+    if !isdir((@__DIR__)*"/../gen")
+        mkdir((@__DIR__)*"/../gen")
+    end
+
+    debug_print("Translating StarPU headers...")
+
+    STARPU_INCLUDE=fstarpu_include_dir()
+    STARPU_HEADERS = [joinpath(STARPU_INCLUDE, header) for header in readdir(STARPU_INCLUDE) if endswith(header, ".h")]
+    LIBCLANG_INCLUDE = joinpath(dirname(LLVM_jll.libclang_path), "..", "include", "clang-c") |> normpath
+
+    clang_args = ["-I", STARPU_INCLUDE]
+
+    for header in find_std_headers()
+        push!(clang_args, "-I")
+        push!(clang_args, header)
+    end
+
+    only_select_symbols = Set(["starpu_task",
+                               "starpu_codelet",
+                               "starpu_data_filter",
+                               "starpu_tag_t",
+                               "starpu_perfmodel",
+                               "starpu_perfmodel_type",
+                               "starpu_data_handle_t",
+                               "starpu_init",
+                               "starpu_data_unregister",
+                               "starpu_data_partition",
+                               "starpu_data_unpartition",
+                               "starpu_data_get_sub_data",
+                               "starpu_data_map_filters",
+                               "starpu_matrix_data_register",
+                               "starpu_block_data_register",
+                               "starpu_vector_data_register",
+                               "starpu_variable_data_register",
+                               "starpu_memory_pin",
+                               "starpu_memory_unpin",
+                               "starpu_task_init",
+                               "starpu_task_destroy",
+                               "starpu_task_submit",
+                               "starpu_task_wait_for_n_submitted",
+                               "starpu_tag_wait",
+                               "starpu_tag_declare_deps_array",
+                               "starpu_task_declare_deps_array",
+                               "starpu_iteration_push",
+                               "starpu_iteration_pop",
+                               "STARPU_CPU",
+                               "STARPU_CUDA",
+                               "STARPU_OPENCL",
+                               "STARPU_MAIN_RAM",
+                               "STARPU_NMAXBUFS"])
+
+    wc = init(; headers = STARPU_HEADERS,
+              output_file = joinpath(@__DIR__, "../gen/libstarpu_api.jl"),
+              common_file = joinpath(@__DIR__, "../gen/libstarpu_common.jl"),
+              clang_includes = vcat(LIBCLANG_INCLUDE, CLANG_INCLUDE),
+              clang_args = clang_args,
+              header_library = x->"starpu_wrapper_library_name",
+              clang_diagnostics = false,
+              rewriter = x->x,
+              only_select_symbols = only_select_symbols,
+              fields_align = Dict((:starpu_pthread_spinlock_t,:taken) => 16)
+              )
+
+    run(wc)
+end

+ 77 - 0
julia/src/utils.jl

@@ -0,0 +1,77 @@
+function fstarpu_task_library_name()
+    x=get(ENV, "STARPU_JULIA_LIB", C_NULL)
+    if (x == C_NULL)
+        error("Environment variable STARPU_JULIA_LIB must be defined")
+    end
+    return x
+end
+
+function fstarpu_include_dir()
+    x=get(ENV, "STARPU_INCLUDE_DIR", C_NULL)
+    if (x == C_NULL)
+        error("Environment variable STARPU_INCLUDE_DIR must be defined")
+    end
+    return x
+end
+
+macro starpufunc(symbol)
+    :($symbol, starpu_wrapper_library_name)
+end
+
+"""
+    Used to call a StarPU function compiled inside "libjlstarpu_c_wrapper.so"
+    Works as ccall function
+"""
+macro starpucall(func, ret_type, arg_types, args...)
+    return Expr(:call, :ccall, (func, starpu_wrapper_library_name), esc(ret_type), esc(arg_types), map(esc, args)...)
+end
+
+function debug_print(x...)
+    println("\x1b[32m", x..., "\x1b[0m")
+    flush(stdout)
+end
+
+function Cstring_from_String(str :: String)
+    return Cstring(pointer(str))
+end
+
+tuple_len(::NTuple{N, Any}) where {N} = N
+
+function starpu_find_function(name :: String, device :: String ) 
+    s=ccall(:starpu_find_function,Cstring, (Cstring,Cstring),Cstring_from_String(name),Cstring_from_String(device))
+    if  s == C_NULL
+        print("NULL STRING\n")
+        error("dead")
+    end
+    return s
+end
+
+function load_starpu_function_pointer(func_name :: String)
+
+    if (isempty(func_name))
+        return C_NULL
+    end
+    #func_pointer = ccall(:dlsym,"libdl",Ptr{Cvoid});
+    func_pointer=Libdl.dlsym(starpu_tasks_library_handle, func_name)
+
+    if (func_pointer == C_NULL)
+        error("Couldn't find function symbol $func_name into extern library file $starpu_tasks_library")
+    end
+
+    return func_pointer
+end
+
+"""
+    Declares a Julia function which is just calling the StarPU function
+    having the same name.
+"""
+macro starpu_noparam_function(func_name, ret_type)
+
+    func = Symbol(func_name)
+
+    quote
+        export $func
+        global $func() = ccall(($func_name, starpu_wrapper_library_name),
+                                $ret_type, ()) :: $ret_type
+    end
+end