Ver código fonte

julia: Add vector_scal example.

Pierre Huchant 5 anos atrás
pai
commit
01d3d84404

+ 50 - 0
julia/vector_scal/Makefile

@@ -0,0 +1,50 @@
+# ICC compiler
+#CC =icc
+#CFLAGS=-restrict -unroll4 -ipo -falign-loops=256 -O3 -DSTRIDE=${STRIDE} -march=native $(shell pkg-config --cflags starpu-1.3)
+# GCC compiler
+CC=gcc
+CFLAGS += -g -O3 -mavx -fomit-frame-pointer -march=native -ffast-math $(shell pkg-config --cflags starpu-1.3)
+
+LDFLAGS +=$(shell pkg-config --libs starpu-1.3)
+EXTERNLIB=extern_tasks.so
+GENERATEDLIB=generated_tasks.so
+OBJECTS=$(patsubst %.c,%.o,$(wildcard gen*.c))
+LIBPATH=${PWD}/../StarPU.jl/lib
+
+all: ${EXTERNLIB}
+
+vector_scal: vector_scal.c cpu_vector_scal.o #gpu_vector_scal.o
+	$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS)
+
+gpu_vector_scal.o: gpu_vector_scal.cu
+	nvcc -c $(CFLAGS) $^ -o $@
+
+%.o: %.c
+	$(CC) -c $(CFLAGS) $^ -o $@
+
+${EXTERNLIB}: cpu_vector_scal.c
+	$(CC) $(CFLAGS) -shared -fPIC $(LDFLAGS) $^ -o $@
+
+gpu_vector_scal.so: gpu_vector_scal.o
+	nvcc $(CFLAGS) $^ --shared --compiler-options '-fPIC' -o $@ $(LDFLAGS)
+
+cpu_vector_scal_sa: cpu_vector_scal_sa.o
+	$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS)
+
+${GENERATEDLIB}: ${OBJECTS}
+	$(CC) $(CFLAGS) -shared -fPIC $(LDFLAGS) $^ -o $@
+
+clean:
+	rm -f vector_scal *.so *.o c_*.genc gencuda_*.cu *.dat
+
+# Performance Tests
+cstarpu.dat: vector_scal
+	STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 ./vector_scal > $@
+julia_generatedc.dat:
+	LD_LIBRARY_PATH+=${LIBPATH} STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia vector_scal.jl $@
+julia_native.dat:
+	LD_LIBRARY_PATH+=${LIBPATH} STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia vector_scal_native.jl $@
+julia_calllib.dat: ${EXTERNLIB}
+	LD_LIBRARY_PATH+=${LIBPATH} JULIA_TASK_LIB="${EXTERNLIB}" STARPU_NOPENCL=0 STARPU_SCHED=dmda STARPU_CALIBRATE=1 julia vector_scal.jl julia_calllib.dat
+
+test: cstarpu.dat julia_generatedc.dat julia_native.dat julia_calllib.dat

+ 42 - 0
julia/vector_scal/cpu_vector_scal.c

@@ -0,0 +1,42 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <starpu.h>
+#include <math.h>
+
+struct params {
+  int32_t m;
+  float k;
+  float l;
+};
+
+float vector_scal(void *buffers[], void *cl_arg)
+{
+  /* get scalar parameters from cl_arg */
+  struct params *scalars = (struct params *) cl_arg;
+  int m = scalars->m;
+  float k = scalars->k;
+  float l = scalars->l;
+
+  struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0];
+
+  /* length of the vector */
+  unsigned n = STARPU_VECTOR_GET_NX(vector);
+
+  /* get a pointer to the local copy of the vector : note that we have to
+   * cast it in (float *) since a vector could contain any type of
+   * elements so that the .ptr field is actually a uintptr_t */
+  float *val = (float *)STARPU_VECTOR_GET_PTR(vector);
+
+  /* scale the vector */
+  for (unsigned i = 0; i < n; i++)
+    val[i] = val[i] * k + l + m;
+
+  return 0.0;
+}
+
+char* CPU = "cpu_mult";
+char* GPU = "gpu_mult";
+extern char *starpu_find_function(char *name, char *device) {
+	if (!strcmp(device,"gpu")) return GPU;
+	return CPU;
+}

+ 78 - 0
julia/vector_scal/vector_scal.jl

@@ -0,0 +1,78 @@
+import Libdl
+using StarPU
+using LinearAlgebra
+
+@target STARPU_CPU+STARPU_CUDA
+@codelet function vector_scal(m::Int32, v :: Vector{Float32}, k :: Float32, l :: Float32) :: Float32
+
+    N :: Int32 = length(v)
+    # Naive version
+    @parallel for i in (1 : N)
+        v[i] = v[i] * k + l + m
+    end
+
+    return 0. :: Float32
+end
+
+
+@debugprint "starpu_init"
+starpu_init()
+
+function vector_scal_with_starpu(v :: Vector{Float32}, m :: Int32, k :: Float32, l :: Float32)
+    tmin=0
+
+    @starpu_block let
+        hV = starpu_data_register(v)
+        tmin=0
+        perfmodel = StarpuPerfmodel(
+            perf_type = STARPU_HISTORY_BASED,
+            symbol = "history_perf"
+        )
+        cl = StarpuCodelet(
+            cpu_func = CPU_CODELETS["vector_scal"],
+            #cuda_func = "matrix_mult",
+            #opencl_func="ocl_matrix_mult",
+            modes = [STARPU_RW],
+            perfmodel = perfmodel
+        )
+
+        for i in (1 : 10)
+            t=time_ns()
+            @starpu_sync_tasks begin
+                handles = [hV]
+                task = StarpuTask(cl = cl, handles = handles, cl_arg=[m, k, l])
+                starpu_task_submit(task)
+            end
+            # @starpu_sync_tasks for task in (1:1)
+            #     @starpu_async_cl vector_scal(hV, STARPU_RW, [m, k, l])
+            # end
+            t=time_ns()-t
+            if (tmin==0 || tmin>t)
+                tmin=t
+            end
+        end
+    end
+    return tmin
+end
+
+function compute_times(io,start_dim, step_dim, stop_dim)
+    for size in (start_dim : step_dim : stop_dim)
+        V = Array(rand(Cfloat, size))
+        m :: Int32 = 10
+        k :: Float32 = 2.
+        l :: Float32 = 3.
+        println("INPUT ", V[1:10])
+        mt =  vector_scal_with_starpu(V, m, k, l)
+        println("OUTPUT ", V[1:10])
+        println(io,"$size $mt")
+        println("$size $mt")
+    end
+end
+
+
+io=open(ARGS[1],"w")
+compute_times(io,1024,1024,4096)
+close(io)
+@debugprint "starpu_shutdown"
+starpu_shutdown()
+