Bläddra i källkod

julia: Implement starpu_task_insert().

Pierre Huchant 5 år sedan
förälder
incheckning
b7f4e19047

+ 5 - 16
julia/examples/axpy/axpy.jl

@@ -41,19 +41,6 @@ function axpy(N, NBLOCKS, alpha, display = true)
 
     block_filter = starpu_data_filter(STARPU_VECTOR_FILTER_BLOCK, NBLOCKS)
 
-    perfmodel = starpu_perfmodel(
-        perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
-        symbol = "history_perf"
-    )
-
-    cl = starpu_codelet(
-        cpu_func = "axpy",
-        cuda_func = "axpy",
-        #cuda_func = STARPU_SAXPY,
-        modes = [STARPU_R, STARPU_RW],
-        perfmodel = perfmodel
-    )
-
     if display
         println("BEFORE x[0] = ", X[1])
         println("BEFORE y[0] = ", Y[1])
@@ -68,9 +55,11 @@ function axpy(N, NBLOCKS, alpha, display = true)
         starpu_data_partition(hY, block_filter)
 
         for b in 1:NBLOCKS
-            task = starpu_task(cl = cl, handles = [hX[b],hY[b]], cl_arg=(Float32(alpha),),
-                               tag=starpu_tag_t(b))
-            starpu_task_submit(task)
+            starpu_task_insert(codelet_name = "axpy",
+                               handles = [hX[b], hY[b]],
+                               cl_arg = (Float32(alpha),),
+                               tag = starpu_tag_t(b),
+                               modes = [STARPU_R, STARPU_RW])
         end
 
         starpu_task_wait_for_all()

+ 5 - 2
julia/examples/callback/callback.jl

@@ -45,8 +45,11 @@ function variable_with_starpu(val ::Ref{Int32})
     @starpu_block let
 	hVal = starpu_data_register(val)
 
-        task = starpu_task(cl = cl, handles = [hVal], callback=callback, callback_arg=(cl, [hVal]))
-        starpu_task_submit(task)
+        starpu_task_insert(codelet_name = "variable",
+                           cl = cl,
+                           handles = [hVal],
+                           callback = callback,
+                           callback_arg = (cl, [hVal]))
 
         starpu_task_wait_for_all()
     end

+ 4 - 14
julia/examples/gemm/gemm.jl

@@ -41,26 +41,16 @@ function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: M
         starpu_data_partition(hA, horiz)
         starpu_data_map_filters(hC, vert, horiz)
         tmin=0
-        perfmodel = starpu_perfmodel(
-            perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
-            symbol = "history_perf"
-        )
-        cl = starpu_codelet(
-            cpu_func = "gemm",
-            cuda_func = "gemm",
-            modes = [STARPU_R, STARPU_R, STARPU_RW],
-            perfmodel = perfmodel
-        )
 
         for i in (1 : 10 )
             t=time_ns()
             @starpu_sync_tasks begin
                 for taskx in (1 : nslicesx)
                     for tasky in (1 : nslicesy)
-                        handles = [hA[tasky], hB[taskx], hC[taskx, tasky]]
-                        task = starpu_task(cl = cl, handles = handles, cl_arg=(alpha, beta))
-                        starpu_task_submit(task)
-                        #@starpu_async_cl matrix_mult(hA[tasky], hB[taskx], hC[taskx, tasky])
+                        starpu_task_insert(codelet_name = "gemm",
+                                           handles = [hA[tasky], hB[taskx], hC[taskx, tasky]],
+                                           cl_arg = (alpha, beta),
+                                           modes = [STARPU_R, STARPU_R, STARPU_RW])
                     end
                 end
             end

+ 4 - 1
julia/examples/mandelbrot/mandelbrot.jl

@@ -70,7 +70,10 @@ function mandelbrot_with_starpu(A ::Matrix{Int64}, cr ::Float64, ci ::Float64, d
 	starpu_data_partition(hA,horiz)
 
 	@starpu_sync_tasks for taskx in (1 : nslicesx)
-                @starpu_async_cl mandelbrot(hA[taskx]) [STARPU_W] (cr, ci, Int64((taskx-1)*dim/nslicesx), dim)
+            starpu_task_insert(codelet_name = "mandelbrot",
+                               handles = [hA[taskx]],
+                               modes = [STARPU_W],
+                               cl_arg = (cr, ci, Int64((taskx-1)*dim/nslicesx), dim))
 	end
     end
 end

+ 4 - 15
julia/examples/mult/mult.jl

@@ -82,27 +82,16 @@ function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: M
         starpu_data_partition(hA, horiz)
         starpu_data_map_filters(hC, vert, horiz)
         tmin=0
-        perfmodel = starpu_perfmodel(
-            perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
-            symbol = "history_perf"
-        )
-        cl = starpu_codelet(
-            cpu_func = "matrix_mult",
-            cuda_func = "matrix_mult",
-            #opencl_func="matrix_mult",
-            modes = [STARPU_R, STARPU_R, STARPU_W],
-            perfmodel = perfmodel
-        )
 
         for i in (1 : 10 )
             t=time_ns()
             @starpu_sync_tasks begin
                 for taskx in (1 : nslicesx)
                     for tasky in (1 : nslicesy)
-                        handles = [hA[tasky], hB[taskx], hC[taskx, tasky]]
-                        task = starpu_task(cl = cl, handles = handles, cl_arg=(Int32(stride),))
-                        starpu_task_submit(task)
-                        #@starpu_async_cl matrix_mult(hA[tasky], hB[taskx], hC[taskx, tasky])
+                        starpu_task_insert(codelet_name = "matrix_mult",
+                                           modes = [STARPU_R, STARPU_R, STARPU_W],
+                                           handles = [hA[tasky], hB[taskx], hC[taskx, tasky]],
+                                           cl_arg = (Int32(stride),))
                     end
                 end
             end

+ 4 - 17
julia/examples/vector_scal/vector_scal.jl

@@ -36,28 +36,15 @@ function vector_scal_with_starpu(v :: Vector{Float32}, m :: Int32, k :: Float32,
     @starpu_block let
         hV = starpu_data_register(v)
         tmin=0
-        perfmodel = starpu_perfmodel(
-            perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
-            symbol = "history_perf"
-        )
-        cl = starpu_codelet(
-            cpu_func = "vector_scal",
-            # cuda_func = "vector_scal",
-            #opencl_func="",
-            modes = [STARPU_RW],
-            perfmodel = perfmodel
-        )
 
         for i in (1 : 1)
             t=time_ns()
             @starpu_sync_tasks begin
-                handles = [hV]
-                task = starpu_task(cl = cl, handles = handles, cl_arg=(m, k, l))
-                starpu_task_submit(task)
+                starpu_task_insert(codelet_name = "vector_scal",
+                                   modes = [STARPU_RW],
+                                   handles = [hV],
+                                   cl_arg=(m, k, l))
             end
-            # @starpu_sync_tasks for task in (1:1)
-            #     @starpu_async_cl vector_scal(hV, STARPU_RW, [m, k, l])
-            # end
             t=time_ns()-t
             if (tmin==0 || tmin>t)
                 tmin=t

+ 1 - 0
julia/src/StarPU.jl

@@ -85,6 +85,7 @@ export starpu_data_get_sub_data
 export starpu_data_partition
 export starpu_data_unpartition
 export starpu_data_map_filters
+export starpu_task_insert
 export starpu_task_wait_for_all
 export starpu_task_submit
 export starpu_task_end_dep_add

+ 87 - 3
julia/src/task.jl

@@ -104,9 +104,17 @@ task_list = Vector{jl_starpu_task}()
 
             Creates a new task which will run the specified codelet on handle buffers and cl_args data
         """
-function starpu_task(; cl :: Union{Cvoid, jl_starpu_codelet} = nothing, handles :: Vector{StarpuDataHandle} = StarpuDataHandle[], cl_arg = (),
-                     callback :: Union{Cvoid, Function} = nothing, callback_arg = nothing, tag :: Union{Cvoid, starpu_tag_t} = nothing,
-                     sequential_consistency = true, detach = 1)
+function starpu_task(;
+                     cl :: Union{Cvoid, jl_starpu_codelet} = nothing,
+                     handles :: Vector{StarpuDataHandle} = StarpuDataHandle[],
+                     cl_arg = (),
+                     callback :: Union{Cvoid, Function} = nothing,
+                     callback_arg = nothing,
+                     tag :: Union{Cvoid, starpu_tag_t} = nothing,
+                     sequential_consistency = true,
+                     detach = 1,
+                     color :: Union{Cvoid, UInt32} = nothing,
+                     where :: Union{Cvoid, Int32} = nothing)
     if (cl == nothing)
         error("\"cl\" field can't be empty when creating a StarpuTask")
     end
@@ -159,6 +167,14 @@ function starpu_task(; cl :: Union{Cvoid, jl_starpu_codelet} = nothing, handles
         output.c_task.use_tag = 1
     end
 
+    if color != nothing
+        output.c_task.color = color
+    end
+
+    if where != nothing
+        output.c_task.where = where
+    end
+
     # Tasks must not be garbage collected before starpu_task_wait_for_all is called.
     # This is necessary in particular for tasks created inside callback functions.
     lock(mutex)
@@ -232,6 +248,74 @@ function starpu_modes(x :: Symbol)
     end
 end
 
+default_codelet = Dict{String, jl_starpu_codelet}()
+default_perfmodel = Dict{String, starpu_perfmodel}()
+
+function get_default_perfmodel(name)
+    if name in keys(default_perfmodel)
+        return default_perfmodel[name]
+    end
+
+    perfmodel = starpu_perfmodel(
+        perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
+        symbol = name
+    )
+    default_perfmodel[name] = perfmodel
+    return perfmodel
+end
+
+function get_default_codelet(codelet_name, perfmodel, modes) :: jl_starpu_codelet
+    if codelet_name in keys(default_codelet)
+        return default_codelet[codelet_name]
+    end
+
+    cl = starpu_codelet(
+        cpu_func  = codelet_name in keys(CPU_CODELETS) ? codelet_name : "",
+        cuda_func = codelet_name in keys(CUDA_CODELETS) ? codelet_name : "",
+        modes = modes,
+        perfmodel = perfmodel,
+    )
+    default_codelet[codelet_name] = cl
+    return cl
+end
+
+function starpu_task_insert(;
+                            codelet_name :: Union{Cvoid, String} = nothing,
+                            cl :: Union{Cvoid, jl_starpu_codelet} = nothing,
+                            perfmodel :: Union{starpu_perfmodel, Cvoid} = nothing,
+                            handles :: Vector{StarpuDataHandle} = StarpuDataHandle[],
+                            cl_arg = (),
+                            callback :: Union{Cvoid, Function} = nothing,
+                            callback_arg = nothing,
+                            tag :: Union{Cvoid, starpu_tag_t} = nothing,
+                            sequential_consistency = true,
+                            detach = 1,
+                            where :: Union{Cvoid, Int32} = nothing,
+                            color :: Union{Cvoid, UInt32} = nothing,
+                            modes = nothing)
+    if cl == nothing && codelet_name == nothing
+        error("At least one of the two parameters codelet_name or cl must be provided when calling starpu_task_insert.")
+
+    end
+    if cl == nothing && modes == nothing
+        error("Modes must be defined when calling starpu_task_insert without a codelet.")
+    end
+
+    if perfmodel == nothing
+        perfmodel = get_default_perfmodel(codelet_name == nothing ? "default" : codelet_name)
+    end
+
+    if cl == nothing
+        cl = get_default_codelet(codelet_name, perfmodel, modes)
+    end
+
+    task = starpu_task(cl = cl, handles = handles, cl_arg = cl_arg, callback = callback,
+                       callback_arg = callback_arg, tag = tag, sequential_consistency = sequential_consistency,
+                       detach = detach, color = color, where = where)
+
+    starpu_task_submit(task)
+end
+
 """
     Creates and submits an asynchronous task running cl Codelet function.
     Ex : @starpu_async_cl cl(handle1, handle2)