Explorar o código

Merge branch 'master' of git+ssh://scm.gforge.inria.fr/gitroot/starpu/starpu

Samuel Thibault %!s(int64=5) %!d(string=hai) anos
pai
achega
404af1268a

+ 9 - 10
julia/StarPU.jl/src/StarPU.jl

@@ -30,16 +30,11 @@ macro starpucall(func, ret_type, arg_types, args...)
     return Expr(:call, :ccall, (func, starpu_task_library_name), esc(ret_type), esc(arg_types), map(esc, args)...)
 end
 
-export @debugprint
-macro debugprint(x...)
-    quote
-        println("\x1b[32m", $x..., "\x1b[0m")
-        flush(stdout)
-    end
+function debug_print(x...)
+    println("\x1b[32m", x..., "\x1b[0m")
+    flush(stdout)
 end
 
-
-
 function Cstring_from_String(str :: String)
     return Cstring(pointer(str))
 end
@@ -619,9 +614,11 @@ end
     cpu and gpu function names
 """
 function starpu_init()
+    debug_print("starpu_init")
+
     if (get(ENV,"JULIA_TASK_LIB",0)!=0)
         global starpu_tasks_library_handle= Libdl.dlopen(ENV["JULIA_TASK_LIB"])
-        @debugprint "Loading external codelet library"
+        debug_print("Loading external codelet library")
         ff = Libdl.dlsym(starpu_tasks_library_handle,:starpu_find_function)
         dump(ff)
         for k in keys(CUDA_CODELETS)
@@ -629,7 +626,7 @@ function starpu_init()
             print(k,">>>>",CPU_CODELETS[k],"\n")
         end
     else
-        @debugprint "generating codelet library"
+        debug_print("generating codelet library")
         run(`make generated_tasks.so`);
         global starpu_tasks_library_handle=Libdl.dlopen("generated_tasks.so")
     end
@@ -644,6 +641,8 @@ end
     Must be called at the end of the program
 """
 function starpu_shutdown()
+    debug_print("starpu_shutdown")
+
     starpu_exit_block()
     @starpucall starpu_shutdown Cvoid ()
     jlstarpu_free_allocated_structures()

+ 8 - 1
julia/StarPU.jl/src/compiler/c.jl

@@ -172,6 +172,13 @@ function substitute_args(expr :: StarpuExprFunction)
             push!(function_start_affectations, post_affect)
             new_body = substitute_argument_usage(new_body, buffer_id, buffer_arg_name, expr.args[i].name, var_name)
             buffer_id += 1
+        elseif (expr.args[i].typ <: Ref)
+            func_interface = :STARPU_VARIABLE_GET_PTR
+            type_in_arg = eltype(expr.args[i].typ)
+            new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$buffer_id])) )
+            push!(function_start_affectations, new_affect)
+            new_body = substitute_argument_usage(new_body, buffer_id, buffer_arg_name, expr.args[i].name, Symbol("(*$var_name)"))
+            buffer_id += 1
         elseif (expr.args[i].typ <: Number || expr.args[i].typ <: AbstractChar)
             type_in_arg = eltype(expr.args[i].typ)
             field_name = scalar_parameters[scalar_id][1]
@@ -182,7 +189,7 @@ function substitute_args(expr :: StarpuExprFunction)
             push!(function_start_affectations, post_affect)
             scalar_id += 1
         else
-            error("Task arguments must be either vector or matrix or scalr (got $(expr.args[i].typ))")
+            error("Task arguments must be either matrix, vector, ref or scalar (got $(expr.args[i].typ))")
         end
 
 

+ 25 - 27
julia/StarPU.jl/src/compiler/file_generation.jl

@@ -112,35 +112,33 @@ macro codelet(x)
     parse_scalar_parameters(parsed, cpu_name, cuda_name)
     c_struct_param_decl = generate_c_struct_param_declaration(name)
     cpu_expr = transform_to_cpu_kernel(parsed)
-    prekernel, kernel = transform_to_cuda_kernel(parsed)
+
+    if (starpu_target & STARPU_CUDA != 0)
+        prekernel, kernel = transform_to_cuda_kernel(parsed)
+    end
+
     generated_cpu_kernel_file_name=string("genc_",string(x.args[1].args[1].args[1]),".c")
     generated_cuda_kernel_file_name=string("gencuda_",string(x.args[1].args[1].args[1]),".cu")
-    targets=starpu_target
-    return quote
-        
-        if ($targets&$STARPU_CPU!=0)
-            kernel_file = open($(esc(generated_cpu_kernel_file_name)), "w")
-            @debugprint "generating " $(generated_cpu_kernel_file_name)
-            print(kernel_file, $(esc(cpu_kernel_file_start)))
-            print(kernel_file, $c_struct_param_decl)
-            print(kernel_file, $cpu_expr)
-            close(kernel_file)
-            CPU_CODELETS[$name]=$cpu_name
-        end
-        
-        if ($targets&$STARPU_CUDA!=0)
-            kernel_file = open($(esc(generated_cuda_kernel_file_name)), "w")
-            @debugprint "generating " $(generated_cuda_kernel_file_name)
-            print(kernel_file, $(esc(cuda_kernel_file_start)))
-            print(kernel_file, "__global__ ", $kernel)
-            print(kernel_file, $c_struct_param_decl) # TODO: extern C ?
-            print(kernel_file, "\nextern \"C\" ", $prekernel)
-            close(kernel_file)
-            CUDA_CODELETS[$name]=$cuda_name
-        end
-        print("end generation")
-        #starpu_task_library_name="generated_tasks"
-        #global starpu_task_library_name
+
+    if (starpu_target & STARPU_CPU != 0)
+        kernel_file = open(generated_cpu_kernel_file_name, "w")
+        debug_print("generating ", generated_cpu_kernel_file_name)
+        print(kernel_file, cpu_kernel_file_start)
+        print(kernel_file, c_struct_param_decl)
+        print(kernel_file, cpu_expr)
+        close(kernel_file)
+        CPU_CODELETS[name]=cpu_name
+    end
+
+    if starpu_target & STARPU_CUDA!=0
+        kernel_file = open(generated_cuda_kernel_file_name, "w")
+        debug_print("generating ", generated_cuda_kernel_file_name)
+        print(kernel_file, cuda_kernel_file_start)
+        print(kernel_file, "__global__ ", kernel)
+        print(kernel_file, c_struct_param_decl)
+        print(kernel_file, "\nextern \"C\" ", prekernel)
+        close(kernel_file)
+        CUDA_CODELETS[name]=cuda_name
     end
 end
 

+ 0 - 2
julia/black_scholes/black_scholes.jl

@@ -115,8 +115,6 @@ using StarPU
     return 0
 end
 
-
-@debugprint "starpu_init"
 starpu_init()
 
 function black_scholes_starpu(data ::Matrix{Float64}, res ::Matrix{Float64}, nslices ::Int64)

+ 0 - 2
julia/mandelbrot/mandelbrot.jl

@@ -46,7 +46,6 @@ using LinearAlgebra
     return
 end
 
-@debugprint "starpu_init"
 starpu_init()
 
 function mandelbrot_with_starpu(A ::Matrix{Int64}, cr ::Float64, ci ::Float64, dim ::Int64, nslicesx ::Int64)
@@ -101,6 +100,5 @@ end
 
 display_time(-0.800671,-0.158392,32,32,4096,4)
 
-@debugprint "starpu_shutdown"
 starpu_shutdown()
 

+ 1 - 2
julia/mult/mult.jl

@@ -57,7 +57,6 @@ const STRIDE = 72
 end
 
 
-@debugprint "starpu_init"
 starpu_init()
 
 function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy)
@@ -141,6 +140,6 @@ end
 io=open(ARGS[1],"w")
 compute_times(io,16*STRIDE,4*STRIDE,4096,2,2)
 close(io)
-@debugprint "starpu_shutdown"
+
 starpu_shutdown()
 

+ 0 - 5
julia/mult/mult_native.jl

@@ -5,9 +5,6 @@ using LinearAlgebra
 #shoud be the same as in the makefile
 const STRIDE = 72
 
-@debugprint "starpu_init"
-starpu_init()
-
 function multiply_without_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy)
     tmin = 0
     for i in (1 : 10 )
@@ -39,6 +36,4 @@ end
 io=open(ARGS[1],"w")
 compute_times(io,16*STRIDE,4*STRIDE,4096,2,2)
 close(io)
-@debugprint "starpu_shutdown"
-starpu_shutdown()
 

+ 58 - 0
julia/variable/Makefile

@@ -0,0 +1,58 @@
+CC=gcc
+NVCC=nvcc
+ENABLE_CUDA=no
+LD=$(CC)
+
+ifeq ($(ENABLE_CUDA),yes)
+        LD := ${NVCC}
+endif
+
+CFLAGS = -O3 -g $(shell pkg-config --cflags starpu-1.3)
+CPU_CFLAGS = ${CFLAGS} -Wall -mavx -fomit-frame-pointer -march=native -ffast-math
+CUDA_CFLAGS = ${CFLAGS}
+LDFLAGS +=$(shell pkg-config --libs starpu-1.3)
+
+EXTERNLIB=extern_tasks.so
+GENERATEDLIB=generated_tasks.so
+
+C_OBJECTS=$(patsubst %.c,%.o,$(wildcard gen*.c))
+CUDA_OBJECTS=$(patsubst %.cu,%.o,$(wildcard gen*.cu))
+ifneq ($(ENABLE_CUDA),yes)
+	CUDA_OBJECTS:=
+endif
+
+LIBPATH=${PWD}/../StarPU.jl/lib
+
+all: ${EXTERNLIB}
+
+variable: variable.c cpu_variable.o #gpu_variable.o
+	$(CC) $(CPU_CFLAGS) $^ -o $@ $(LDFLAGS)
+
+%.o: %.c
+	$(CC) -c -fPIC $(CPU_CFLAGS) $^ -o $@
+
+%.o: %.cu
+	$(NVCC) -dc $(CUDA_CFLAGS) $^ --shared --compiler-options '-fPIC' -o $@ $(LDFLAGS)
+
+${EXTERNLIB}: cpu_variable.c
+	$(CC) $(CFLAGS) -shared -fPIC $(LDFLAGS) $^ -o $@
+
+${GENERATEDLIB}: $(C_OBJECTS) $(CUDA_OBJECTS)
+	$(LD) -shared $(LDFLAGS) $^ -o $@
+
+.PHONY: clean
+
+clean:
+	rm -f variable *.so *.o genc_*.c gencuda_*.cu *.dat
+
+# Performance Tests
+cstarpu.dat: variable
+	STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 ./variable -0.800671 -0.158392 32 32 4096 4 > $@
+julia_generatedc.dat:
+	LD_LIBRARY_PATH+=${LIBPATH} STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia variable.jl $@
+julia_native.dat:
+	LD_LIBRARY_PATH+=${LIBPATH} STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia variable_native.jl $@
+julia_calllib.dat: ${EXTERNLIB}
+	LD_LIBRARY_PATH+=${LIBPATH} JULIA_TASK_LIB="${EXTERNLIB}" STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia variable.jl julia_calllib.dat
+
+test: cstarpu.dat julia_generatedc.dat julia_native.dat julia_calllib.dat

+ 38 - 0
julia/variable/variable.jl

@@ -0,0 +1,38 @@
+import Libdl
+using StarPU
+
+@target STARPU_CPU
+@codelet function variable(val ::Ref{Float32}) :: Nothing
+    val[] = val[] + 1
+
+    return
+end
+
+starpu_init()
+
+function variable_with_starpu(val ::Ref{Float32}, niter)
+    @starpu_block let
+	hVal = starpu_data_register(val)
+
+	@starpu_sync_tasks for task in (1 : niter)
+                @starpu_async_cl variable(hVal) [STARPU_RW]
+	end
+    end
+end
+
+function display(niter)
+    foo = Ref(0.0f0)
+
+    variable_with_starpu(foo, niter)
+
+    println("variable -> ", foo[])
+    if foo[] == niter
+        println("result is correct")
+    else
+        println("result is incorret")
+    end
+end
+
+display(10)
+
+starpu_shutdown()

+ 26 - 0
julia/variable/variable_native.jl

@@ -0,0 +1,26 @@
+function variable(val ::Ref{Float32}) :: Nothing
+    val[] = val[] + 1
+
+    return
+end
+
+function variable_without_starpu(val ::Ref{Float32}, niter)
+    for i = 1:niter
+        variable(val)
+    end
+end
+
+function display(niter)
+    foo = Ref(0.0f0)
+
+    variable_without_starpu(foo, niter)
+
+    println("variable -> ", foo[])
+    if foo[] == niter
+        println("result is correct")
+    else
+        println("result is incorret")
+    end
+end
+
+display(10)

+ 1 - 2
julia/vector_scal/vector_scal.jl

@@ -13,7 +13,6 @@ using LinearAlgebra
 end
 
 
-@debugprint "starpu_init"
 starpu_init()
 
 function vector_scal_with_starpu(v :: Vector{Float32}, m :: Int32, k :: Float32, l :: Float32)
@@ -71,6 +70,6 @@ end
 io=open(ARGS[1],"w")
 compute_times(io,1024,1024,4096)
 close(io)
-@debugprint "starpu_shutdown"
+
 starpu_shutdown()
 

+ 6 - 5
mpi/tests/temporary.c

@@ -129,12 +129,13 @@ int main(int argc, char **argv)
 	starpu_data_unregister(tmp);
 	starpu_data_unregister(tmp2);
 
-skip:
-	starpu_mpi_shutdown();
-
 	if (rank == 0)
-		STARPU_ASSERT_MSG(val0 == 24, "%d should be %d\n", val0, 24);
+		STARPU_ASSERT_MSG(val0 == 24, "[rank 0] %d should be %d\n", val0, 24);
 	if (rank == 1)
-		STARPU_ASSERT_MSG(val1 == 24, "%d should be %d\n", val1, 24);
+		STARPU_ASSERT_MSG(val1 == 24, "[rank 1] %d should be %d\n", val1, 24);
+
+ skip:
+	starpu_mpi_shutdown();
+
 	return 0;
 }