5 years ago · 28d0b4272e
--- a/julia/tst/black_scholes/black_scholes.c
+++ b/julia/tst/black_scholes/black_scholes.c
--- a/julia/tst/black_scholes/cpu_cuda_black_scholes.jl
+++ b/julia/tst/black_scholes/cpu_cuda_black_scholes.jl
@@ -1,13 +1,8 @@
 
				-include("../../src/Compiler/include.jl")
			
 
				+import Libdl
			
 
				+using StarPU
			
 
				 
			
 
				-starpu_new_cpu_kernel_file("../build/generated_cpu_black_scholes.c")
			
 
				-starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-@cpu_cuda_kernel function black_scholes(data ::Matrix{Float64}, res ::Matrix{Float64}) ::Void
			
 
				+@target STARPU_CPU+STARPU_CUDA
			
 
				+@codelet function black_scholes(data ::Matrix{Float64}, res ::Matrix{Float64}) :: Float32
			
 
				     
			
 
				     widthn ::Int64 = width(data)
			
 
				         
			
@@ -25,7 +20,7 @@ starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
 
				     b5 ::Float64 = 1.330274428
			
 
				 
			
 
				     
			
 
				-    @indep for i = 1:widthn
			
 
				+    @parallel for i = 1:widthn
			
 
				         
			
 
				 
			
 
				         d1 ::Float64 = (log(data[1,i] / data[2,i]) + (data[3,i] + pow(data[5,i], 2.0) * 0.5) * data[4,i]) / (data[5,i] * sqrt(data[4,i]))
			
@@ -117,8 +112,83 @@ starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
 
				         res[2,i] = -data[1,i] * (normd1n) + data[2,i]*exp(-data[3,i]*data[4,i]) * (normd2n) # -S * N(-d1) + r*exp(-r*T) * norm(-d2)
			
 
				         
			
 
				     end
			
 
				+    return 0
			
 
				+end
			
 
				+
			
 
				+
			
 
				+@debugprint "starpu_init"
			
 
				+starpu_init()
			
 
				+
			
 
				+function black_scholes_starpu(data ::Matrix{Float64}, res ::Matrix{Float64}, nslices ::Int64)
			
 
				+    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslices)
			
 
				+
			
 
				+    @starpu_block let
			
 
				+        dat_handle, res_handle = starpu_data_register(data, res)
			
 
				+
			
 
				+        starpu_data_partition(dat_handle, vert)
			
 
				+        starpu_data_partition(res_handle, vert)
			
 
				+        
			
 
				+        #Compute the price of call and put option in the res matrix
			
 
				+        @starpu_sync_tasks for task in (1:nslices)
			
 
				+            @starpu_async_cl black_scholes(dat_handle[task], res_handle[task]) [STARPU_RW, STARPU_RW] 
			
 
				+        end
			
 
				+    end
			
 
				+    return 0
			
 
				 end
			
 
				 
			
 
				-compile_cpu_kernels("../build/generated_cpu_black_scholes.so")
			
 
				-compile_cuda_kernels("../build/generated_cuda_black_scholes.so")
			
 
				-combine_kernel_files("../build/generated_tasks_black_scholes.so", ["../build/generated_cpu_black_scholes.so", "../build/generated_cuda_black_scholes.so"])
			
 
				+
			
 
				+function init_data(data, data_nbr);
			
 
				+    for i in 1:data_nbr
			
 
				+        data[1,i] = rand(Float64) * 100
			
 
				+        data[2,i] = rand(Float64) * 100
			
 
				+        data[3,i] = rand(Float64)
			
 
				+        data[4,i] = rand(Float64) * 10
			
 
				+        data[5,i] = rand(Float64) * 10
			
 
				+    end
			
 
				+    return data
			
 
				+end
			
 
				+        
			
 
				+
			
 
				+
			
 
				+function median_times(data_nbr, nslices, nbr_tests)
			
 
				+
			
 
				+    data ::Matrix{Float64} = zeros(5, data_nbr)
			
 
				+    # data[1,1] = 100.0
			
 
				+    # data[2,1] = 100.0
			
 
				+    # data[3,1] = 0.05
			
 
				+    # data[4,1] = 1.0
			
 
				+    # data[5,1] = 0.2
			
 
				+
			
 
				+
			
 
				+    res ::Matrix{Float64} = zeros(2, data_nbr)
			
 
				+
			
 
				+    exec_times ::Vector{Float64} = [0. for i in 1:nbr_tests]
			
 
				+
			
 
				+    for i = 1:nbr_tests
			
 
				+        
			
 
				+        init_data(data, data_nbr)
			
 
				+
			
 
				+        tic()
			
 
				+        black_scholes_starpu(data, res, nslices);
			
 
				+        t = toq()
			
 
				+
			
 
				+        exec_times[i] = t
			
 
				+    end
			
 
				+    sort!(exec_times)
			
 
				+    # println(data)
			
 
				+    # println(res)
			
 
				+    
			
 
				+    return exec_times[1 + div(nbr_tests - 1, 2)]
			
 
				+end
			
 
				+
			
 
				+function display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests)
			
 
				+    i = 1
			
 
				+    open("black_scholes_times.dat", "w") do f 
			
 
				+        for data_nbr in (start_nbr : step_nbr : stop_nbr)
			
 
				+            t = median_times(data_nbr, nslices, nbr_tests)
			
 
				+            println("Number of data:\n$data_nbr\nTimes:\njl: $t\nC: $(mtc[i])\nGen: $(mtcgen[i])")
			
 
				+            write(f, "$data_nbr $(t)\n")
			
 
				+            i = i + 1
			
 
				+        end
			
 
				+    end
			
 
				+end
			
--- a/julia/mandelbrot/mandelbrot.c
+++ b/julia/mandelbrot/mandelbrot.c
@@ -0,0 +1,263 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2019       Mael Keryell
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../display.h"
			
 
				+
			
 
				+void cpu_mandelbrot(void **, void *);
			
 
				+void gpu_mandelbrot(void **, void *);
			
 
				+
			
 
				+struct Params
			
 
				+{
			
 
				+	float cr;
			
 
				+	float ci;
			
 
				+	unsigned taskx;
			
 
				+	unsigned tasky;
			
 
				+	unsigned width;
			
 
				+	unsigned height;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+struct starpu_codelet cl =
			
 
				+{
			
 
				+	.cpu_funcs = {cpu_mandelbrot},
			
 
				+	.cuda_funcs = {gpu_mandelbrot},
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_RW}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+void mandelbrot_with_starpu(int *pixels, float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy)
			
 
				+{
			
 
				+	starpu_data_handle_t p_handle;
			
 
				+
			
 
				+	starpu_matrix_data_register(&p_handle, STARPU_MAIN_RAM, (uintptr_t)pixels, width, width, height, sizeof(int));
			
 
				+
			
 
				+	struct starpu_data_filter vert =
			
 
				+	{
			
 
				+		.filter_func = starpu_matrix_filter_vertical_block,
			
 
				+		.nchildren = nslicesy
			
 
				+	};
			
 
				+
			
 
				+	struct starpu_data_filter horiz =
			
 
				+	{
			
 
				+		.filter_func = starpu_matrix_filter_block,
			
 
				+		.nchildren = nslicesx
			
 
				+	};
			
 
				+
			
 
				+	starpu_data_map_filters(p_handle, 2, &vert, &horiz);
			
 
				+
			
 
				+	unsigned taskx, tasky;
			
 
				+
			
 
				+	struct Params *params = malloc(nslicesx*nslicesy*sizeof(struct Params));
			
 
				+
			
 
				+	for (taskx = 0; taskx < nslicesx; taskx++){
			
 
				+		for (tasky = 0; tasky < nslicesy; tasky++){
			
 
				+			struct starpu_task *task = starpu_task_create();
			
 
				+			
			
 
				+			task->cl = &cl;
			
 
				+			task->handles[0] = starpu_data_get_sub_data(p_handle, 2, tasky, taskx);
			
 
				+			struct Params param = {cr, ci, taskx, tasky, width, height};
			
 
				+
			
 
				+			params[taskx + tasky*nslicesx] = param;
			
 
				+
			
 
				+			task->cl_arg = (params + taskx + tasky * nslicesx);
			
 
				+			task->cl_arg_size = sizeof(struct Params);
			
 
				+			
			
 
				+			starpu_task_submit(task);
			
 
				+		}
			
 
				+	}
			
 
				+	starpu_task_wait_for_all();
			
 
				+
			
 
				+	starpu_data_unpartition(p_handle, STARPU_MAIN_RAM);
			
 
				+
			
 
				+	starpu_data_unregister(p_handle);
			
 
				+
			
 
				+	free(params);
			
 
				+}
			
 
				+
			
 
				+void init_zero(int * pixels, unsigned width, unsigned height)
			
 
				+{
			
 
				+	unsigned i,j;
			
 
				+	for (i = 0; i < height; i++){
			
 
				+		for (j = 0; j < width; j++){
			
 
				+			pixels[j + i*width] = 0;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void sort(double *arr, unsigned nbr_tests)
			
 
				+{
			
 
				+	unsigned j;
			
 
				+	
			
 
				+	int is_sort = 0;
			
 
				+	
			
 
				+	while (!is_sort){
			
 
				+
			
 
				+		is_sort = 1;
			
 
				+		
			
 
				+		for (j = 0; j < nbr_tests - 1; j++){
			
 
				+			if (arr[j] > arr[j+1]){
			
 
				+				is_sort = 0;
			
 
				+				double tmp = arr[j];
			
 
				+				arr[j] = arr[j+1];
			
 
				+				arr[j+1] = tmp;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+double median_time(float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy, unsigned nbr_tests)
			
 
				+{
			
 
				+	int *Pixels = malloc(width*height*sizeof(int));
			
 
				+	
			
 
				+	unsigned i;
			
 
				+
			
 
				+	double exec_times[nbr_tests];
			
 
				+
			
 
				+	double start, stop, exec_t;
			
 
				+	for (i = 0; i < nbr_tests; i++){
			
 
				+		init_zero(Pixels, width, height);
			
 
				+		
			
 
				+		start = starpu_timing_now(); // starpu_timing_now() gives the time in microseconds.
			
 
				+		mandelbrot_with_starpu(Pixels, cr, ci, width, height, nslicesx, nslicesy);
			
 
				+		stop = starpu_timing_now();
			
 
				+		
			
 
				+		exec_t = (stop-start)/1.e6;
			
 
				+		exec_times[i] = exec_t;
			
 
				+	}
			
 
				+	char filename[30];
			
 
				+	sprintf(filename, "PPM/mandelbrot%d.ppm", width);
			
 
				+	printf("%s\n", filename);
			
 
				+
			
 
				+	mandelbrot_graph(filename, Pixels, width, height);
			
 
				+
			
 
				+	free(Pixels);
			
 
				+
			
 
				+	sort(exec_times, nbr_tests);
			
 
				+
			
 
				+	return exec_times[nbr_tests/2];	
			
 
				+}
			
 
				+
			
 
				+void fluctuation_time(float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy, unsigned nbr_tests, double *exec_times)
			
 
				+{
			
 
				+	int *Pixels = malloc(width*height*sizeof(int));
			
 
				+	
			
 
				+	unsigned i;
			
 
				+
			
 
				+	double start, stop, exec_t;
			
 
				+	for (i = 0; i < nbr_tests; i++){
			
 
				+		init_zero(Pixels, width, height);
			
 
				+		
			
 
				+		start = starpu_timing_now(); // starpu_timing_now() gives the time in microseconds.
			
 
				+		mandelbrot_with_starpu(Pixels, cr, ci, width, height, nslicesx, nslicesy);
			
 
				+		stop = starpu_timing_now();
			
 
				+		
			
 
				+		exec_t = (stop-start)/1.e6;
			
 
				+		exec_times[i] = exec_t;
			
 
				+
			
 
				+		/* char filename[33]; */
			
 
				+		/* sprintf(filename, "../PPM/mandelbrot%d.ppm", i + 1); */
			
 
				+		/* printf("%s\n", filename); */
			
 
				+		/* mandelbrot_graph(filename, Pixels, width, height); */
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	free(Pixels);
			
 
				+
			
 
				+
			
 
				+
			
 
				+	
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void display_times(float cr, float ci, unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nslices, unsigned nbr_tests)
			
 
				+{
			
 
				+	
			
 
				+	unsigned dim;
			
 
				+
			
 
				+	FILE *myfile;
			
 
				+	myfile = fopen("DAT/mandelbrot_c_struct_times.dat", "w");
			
 
				+
			
 
				+	for (dim = start_dim; dim <= stop_dim; dim += step_dim){
			
 
				+		printf("Dimension: %u...\n", dim);
			
 
				+		double t = median_time(cr, ci, dim, dim, nslices, nslices, nbr_tests);
			
 
				+		
			
 
				+		printf("w = %u ; h = %u ; t = %f\n", dim, dim, t);
			
 
				+		
			
 
				+		fprintf(myfile, "%f\n", t);
			
 
				+		}
			
 
				+	
			
 
				+	fclose(myfile);
			
 
				+}
			
 
				+
			
 
				+void display_fluctuations(float cr, float ci, unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nslices, unsigned nbr_tests)
			
 
				+{
			
 
				+	
			
 
				+	unsigned dim;
			
 
				+
			
 
				+	FILE *myfile;
			
 
				+	myfile = fopen("DAT/mandelbrot_c_fluctuation.dat", "w");
			
 
				+
			
 
				+	double *exec_times = malloc(nbr_tests * sizeof(double));
			
 
				+	fluctuation_time(cr, ci, start_dim, start_dim, nslices, nslices, nbr_tests, exec_times);
			
 
				+		
			
 
				+	/* printf("w = %u ; h = %u ; t = %f\n", dim, dim, t); */
			
 
				+	unsigned i;
			
 
				+	for (i = 0; i < nbr_tests; i++){
			
 
				+		printf("test %u: %f seconds\n", i, exec_times[i]);
			
 
				+		fprintf(myfile, "%u %f\n", i, exec_times[i]);
			
 
				+	}
			
 
				+	
			
 
				+	fclose(myfile);
			
 
				+	free(exec_times);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+
			
 
				+	if (argc != 8){
			
 
				+		printf("Usage: %s cr ci start_dim step_dim stop_dim nslices(must divide dims) nbr_tests\n", argv[0]);
			
 
				+		return 1;
			
 
				+	}
			
 
				+	if (starpu_init(NULL) != EXIT_SUCCESS){
			
 
				+		fprintf(stderr, "ERROR\n");
			
 
				+		return 77;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	
			
 
				+	float cr = (float) atof(argv[1]);
			
 
				+	float ci = (float) atof(argv[2]);
			
 
				+	unsigned start_dim = (unsigned) atoi(argv[3]);
			
 
				+	unsigned step_dim = (unsigned) atoi(argv[4]);	
			
 
				+	unsigned stop_dim = (unsigned) atoi(argv[5]);
			
 
				+	unsigned nslices = (unsigned) atoi(argv[6]);
			
 
				+	unsigned nbr_tests = (unsigned) atoi(argv[7]);
			
 
				+
			
 
				+	display_times(cr, ci, start_dim, step_dim, stop_dim, nslices, nbr_tests);
			
 
				+	
			
 
				+	
			
 
				+	/* display_fluctuations(cr, ci, start_dim, step_dim, stop_dim, nslices, nbr_tests); */
			
 
				+
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/julia/mandelbrot/mandelbrot.jl
+++ b/julia/mandelbrot/mandelbrot.jl
@@ -0,0 +1,30 @@
 
				+function mandelbrotjl(pixels ::Matrix{Int64}, centerr ::Float64, centeri ::Float64)
			
 
				+    height,width = size(pixels)
			
 
				+    zoom = width * 0.25296875
			
 
				+    val_diverge = 2.0
			
 
				+    max_iterations = (width/2) * 0.049715909 * log10(zoom);
			
 
				+
			
 
				+
			
 
				+    for y = 1:height
			
 
				+        for x = 1:width
			
 
				+            cr = centerr + (x - (width / 2))/zoom
			
 
				+            zr = cr
			
 
				+            ci = centeri + (y - (height / 2))/zoom
			
 
				+            zi = ci
			
 
				+
			
 
				+            n = 0
			
 
				+            while ((n < max_iterations) && (zr*zr + zi*zi < val_diverge*val_diverge))
			
 
				+                tmp = zr*zr - zi*zi + cr
			
 
				+                zi = 2*zr*zi + ci
			
 
				+                zr = tmp
			
 
				+                n = n+1
			
 
				+            end
			
 
				+            
			
 
				+            if (n < max_iterations)
			
 
				+                pixels[y,x] = round(255 * n / max_iterations)
			
 
				+            else
			
 
				+                pixels[y,x] = 0
			
 
				+            end
			
 
				+        end
			
 
				+    end
			
 
				+end
			
--- a/julia/tst/black_scholes/black_scholes_def.jl
+++ b/julia/tst/black_scholes/black_scholes_def.jl
@@ -1,81 +0,0 @@
 
				-function black_scholes_starpu(data ::Matrix{Float64}, res ::Matrix{Float64}, nslices ::Int64)
			
 
				-    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslices)
			
 
				-
			
 
				-    @starpu_block let
			
 
				-        dat_handle, res_handle = starpu_data_register(data, res)
			
 
				-
			
 
				-        starpu_data_partition(dat_handle, vert)
			
 
				-        starpu_data_partition(res_handle, vert)
			
 
				-        
			
 
				-        #Compute the price of call and put option in the res matrix
			
 
				-        @starpu_sync_tasks for task in (1:nslices)
			
 
				-            @starpu_async_cl cl(dat_handle[task], res_handle[task])
			
 
				-        end
			
 
				-    end
			
 
				-end
			
 
				-
			
 
				-
			
 
				-function init_data(data, data_nbr);
			
 
				-    for i in 1:data_nbr
			
 
				-        data[1,i] = rand(Float64) * 100
			
 
				-        data[2,i] = rand(Float64) * 100
			
 
				-        data[3,i] = rand(Float64)
			
 
				-        data[4,i] = rand(Float64) * 10
			
 
				-        data[5,i] = rand(Float64) * 10
			
 
				-    end
			
 
				-    return data
			
 
				-end
			
 
				-        
			
 
				-
			
 
				-
			
 
				-function median_times(data_nbr, nslices, nbr_tests)
			
 
				-
			
 
				-    data ::Matrix{Float64} = zeros(5, data_nbr)
			
 
				-    # data[1,1] = 100.0
			
 
				-    # data[2,1] = 100.0
			
 
				-    # data[3,1] = 0.05
			
 
				-    # data[4,1] = 1.0
			
 
				-    # data[5,1] = 0.2
			
 
				-
			
 
				-
			
 
				-    res ::Matrix{Float64} = zeros(2, data_nbr)
			
 
				-
			
 
				-    exec_times ::Vector{Float64} = [0. for i in 1:nbr_tests]
			
 
				-
			
 
				-    for i = 1:nbr_tests
			
 
				-        
			
 
				-        init_data(data, data_nbr)
			
 
				-
			
 
				-        tic()
			
 
				-        black_scholes_starpu(data, res, nslices);
			
 
				-        t = toq()
			
 
				-
			
 
				-        exec_times[i] = t
			
 
				-    end
			
 
				-    sort!(exec_times)
			
 
				-    # println(data)
			
 
				-    # println(res)
			
 
				-    
			
 
				-    return exec_times[1 + div(nbr_tests - 1, 2)]
			
 
				-end
			
 
				-
			
 
				-function display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests)
			
 
				-
			
 
				-    mtc = map( (x->parse(Float64,x)), open("../DAT/black_scholes_c_times.dat") do f
			
 
				-                  readlines(f)
			
 
				-                  end)
			
 
				-
			
 
				-
			
 
				-    mtcgen = map( (x->parse(Float64,x)), open("../DAT/black_scholes_c_generated_times.dat") do f
			
 
				-                  readlines(f)
			
 
				-                  end)
			
 
				-    i = 1
			
 
				-    open("../DAT/black_scholes_times.dat", "w") do f 
			
 
				-        for data_nbr in (start_nbr : step_nbr : stop_nbr)
			
 
				-            t = median_times(data_nbr, nslices, nbr_tests)
			
 
				-            println("Number of data:\n$data_nbr\nTimes:\njl: $t\nC: $(mtc[i])\nGen: $(mtcgen[i])")
			
 
				-            write(f, "$data_nbr $(t) $(mtcgen[i]) $(mtc[i])\n")
			
 
				-            i = i + 1
			
 
				-        end
			
 
				-    end
			
 
				-end
			
--- a/julia/tst/black_scholes/black_scholes_generated.jl
+++ b/julia/tst/black_scholes/black_scholes_generated.jl
@@ -1,35 +0,0 @@
 
				-if length(ARGS) != 5
			
 
				-    println("Usage: julia prog.jl start_data_nbr step_data_nbr stop_data_nbr nslices nbr_tests")
			
 
				-    quit()
			
 
				-end
			
 
				-
			
 
				-
			
 
				-if (parse(Int64,ARGS[1]) < parse(Int64,ARGS[4]))
			
 
				-    println("The number of slices must be smaller than the number of data")
			
 
				-    quit()
			
 
				-end
			
 
				-
			
 
				-include("../../src/Wrapper/Julia/starpu_include.jl")
			
 
				-using StarPU
			
 
				-
			
 
				-@debugprint "starpu_init"
			
 
				-starpu_init(extern_task_path = "../build/generated_tasks_black_scholes")
			
 
				-
			
 
				-perfmodel = StarpuPerfmodel(
			
 
				-    perf_type = STARPU_HISTORY_BASED,
			
 
				-    symbol = "history_perf"
			
 
				-)
			
 
				-
			
 
				-cl = StarpuCodelet(
			
 
				-cpu_func = "black_scholes",
			
 
				-gpu_func = "CUDA_black_scholes",
			
 
				-modes = [STARPU_RW, STARPU_RW],
			
 
				-perfmodel = perfmodel
			
 
				-)
			
 
				-
			
 
				-include("black_scholes_def.jl")
			
 
				-
			
 
				-display_times(map( (x->parse(Int64, x)), ARGS)...)
			
 
				-
			
 
				-@debugprint "starpu_shutdown"
			
 
				-starpu_shutdown()
			
--- a/julia/tst/black_scholes/black_scholes_with_generated.c
+++ b/julia/tst/black_scholes/black_scholes_with_generated.c
@@ -1,170 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2019       Mael Keryell
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-#include <stdio.h>
			
 
				-#include <stdlib.h>
			
 
				-#include <starpu.h>
			
 
				-#include "../includes/sorting.h"
			
 
				-
			
 
				-void black_scholes(void **, void *);
			
 
				-void CUDA_black_scholes(void **, void*);
			
 
				-
			
 
				-static struct starpu_perfmodel model =
			
 
				-{
			
 
				-	.type = STARPU_HISTORY_BASED,
			
 
				-	.symbol = "history_perf"
			
 
				-};
			
 
				-
			
 
				-struct starpu_codelet cl =
			
 
				-{
			
 
				-	.cpu_funcs = {black_scholes},
			
 
				-	.cuda_funcs = {CUDA_black_scholes},
			
 
				-	.nbuffers = 2,
			
 
				-	.modes = {STARPU_R, STARPU_RW},
			
 
				-	.model = &model
			
 
				-};
			
 
				-
			
 
				-void black_scholes_with_starpu(double *data, double *res, unsigned nslices, unsigned nbr_data)
			
 
				-{
			
 
				-
			
 
				-	starpu_data_handle_t D_handle, RES_handle;
			
 
				-
			
 
				-	starpu_matrix_data_register(&D_handle, STARPU_MAIN_RAM, (uintptr_t)data, 5, 5, nbr_data, sizeof(double));
			
 
				-	starpu_matrix_data_register(&RES_handle, STARPU_MAIN_RAM, (uintptr_t)res, 2, 2, nbr_data, sizeof(double));
			
 
				-
			
 
				-	struct starpu_data_filter vert =
			
 
				-	{
			
 
				-		.filter_func = starpu_matrix_filter_vertical_block,
			
 
				-		.nchildren = nslices
			
 
				-	};
			
 
				-
			
 
				-	starpu_data_partition(D_handle, &vert);
			
 
				-	starpu_data_partition(RES_handle, &vert);
			
 
				-
			
 
				-	unsigned taskx;
			
 
				-	
			
 
				-	for (taskx = 0; taskx < nslices; taskx++){
			
 
				-		struct starpu_task *task = starpu_task_create();
			
 
				-		
			
 
				-		task->cl = &cl;
			
 
				-		task->handles[0] = starpu_data_get_sub_data(D_handle, 1, taskx);
			
 
				-		task->handles[1] = starpu_data_get_sub_data(RES_handle, 1, taskx);
			
 
				-		
			
 
				-		starpu_task_submit(task);
			
 
				-	}
			
 
				-
			
 
				-	starpu_task_wait_for_all();
			
 
				-
			
 
				-
			
 
				-	starpu_data_unpartition(D_handle, STARPU_MAIN_RAM);
			
 
				-	starpu_data_unpartition(RES_handle, STARPU_MAIN_RAM);
			
 
				-
			
 
				-	starpu_data_unregister(D_handle);
			
 
				-	starpu_data_unregister(RES_handle);
			
 
				-}
			
 
				-	
			
 
				-
			
 
				-void init_data(double *data, unsigned nbr_data)
			
 
				-{
			
 
				-	unsigned i;
			
 
				-	for (i = 0; i < nbr_data; i++){
			
 
				-
			
 
				-		data[5*i] = 100. * rand() / (double) RAND_MAX;
			
 
				-		data[5*i + 1] = 100. * rand() / (double) RAND_MAX;
			
 
				-		data[5*i + 2] = rand() / (double) RAND_MAX;
			
 
				-		data[5*i + 3] = 10. * rand() / (double) RAND_MAX;
			
 
				-		data[5*i + 4] = 10. * rand() / (double) RAND_MAX;
			
 
				-		
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-double median_time(unsigned nbr_data, unsigned nslices, unsigned nbr_tests)
			
 
				-{
			
 
				-	double *data = malloc(5 * nbr_data * sizeof(double));
			
 
				-	double *res = calloc(2 * nbr_data, sizeof(double));
			
 
				-	double exec_times[nbr_tests];
			
 
				-	
			
 
				-	/* printf("nbr_data: %u\n", nbr_data); */
			
 
				-	unsigned i;
			
 
				-	for (i = 0; i < nbr_tests; i++){
			
 
				-		
			
 
				-		init_data(data, nbr_data);
			
 
				-		/* data[0] = 100.0; */
			
 
				-		/* data[1] = 100.0; */
			
 
				-		/* data[2] = 0.05; */
			
 
				-		/* data[3] = 1.0; */
			
 
				-		/* data[4] = 0.2; */
			
 
				-
			
 
				-		double start = starpu_timing_now();
			
 
				-		black_scholes_with_starpu(data, res, nslices, nbr_data);
			
 
				-		double stop = starpu_timing_now();
			
 
				-		
			
 
				-		exec_times[i] = (stop-start)/1.e6;
			
 
				-		
			
 
				-		
			
 
				-	}
			
 
				-
			
 
				-	/* printf("RES:\n%f\n%f\n", res[0], res[1]); */
			
 
				-
			
 
				-	free(data);
			
 
				-	free(res);
			
 
				-
			
 
				-	quicksort(exec_times, 0, nbr_tests - 1);
			
 
				-	return exec_times[nbr_tests/2];
			
 
				-}
			
 
				-	
			
 
				-
			
 
				-
			
 
				-void display_times(unsigned start_nbr, unsigned step_nbr, unsigned stop_nbr, unsigned nslices, unsigned nbr_tests){
			
 
				-	
			
 
				-	double t;
			
 
				-	unsigned nbr_data;
			
 
				-
			
 
				-	FILE *myfile;
			
 
				-	myfile = fopen("DAT/black_scholes_c_generated_times.dat", "w");
			
 
				-
			
 
				-	for (nbr_data = start_nbr; nbr_data <= stop_nbr; nbr_data+=step_nbr){
			
 
				-		t = median_time(nbr_data, nslices, nbr_tests);
			
 
				-		printf("Number of data: %u\nTime: %f\n", nbr_data, t);
			
 
				-		fprintf(myfile, "%f\n", t);
			
 
				-	}
			
 
				-	fclose(myfile);
			
 
				-}
			
 
				-
			
 
				-int main(int argc, char *argv[])
			
 
				-{
			
 
				-	if (argc != 6){
			
 
				-		printf("Usage: %s start_nbr step_nbr stop_nbr nslices nbr_tests\n", argv[0]);
			
 
				-		return 1;
			
 
				-	}
			
 
				-	
			
 
				-	if (starpu_init(NULL) != EXIT_SUCCESS){
			
 
				-		fprintf(stderr, "ERROR\n");
			
 
				-		return 77;
			
 
				-	}
			
 
				-
			
 
				-	unsigned start_nbr = (unsigned) atoi(argv[1]);
			
 
				-	unsigned step_nbr = (unsigned) atoi(argv[2]);
			
 
				-	unsigned stop_nbr = (unsigned) atoi(argv[3]);
			
 
				-	unsigned nslices = (unsigned) atoi(argv[4]);
			
 
				-	unsigned nbr_tests = (unsigned) atoi(argv[5]);
			
 
				-
			
 
				-
			
 
				-	display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests);
			
 
				-		
			
 
				-	starpu_shutdown();
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
--- a/julia/tst/black_scholes/cpu_black_scholes.c
+++ b/julia/tst/black_scholes/cpu_black_scholes.c
@@ -1,54 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2019       Mael Keryell
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-#include <stdint.h>
			
 
				-#include <starpu.h>
			
 
				-#include <math.h>
			
 
				-
			
 
				-
			
 
				-static inline double normcdf(double x)
			
 
				-{
			
 
				-	
			
 
				-	return (1.0 + erf(x/sqrt(2.0)))/2.0;
			
 
				-}
			
 
				-
			
 
				-void cpu_black_scholes(void *descr[], void *arg)
			
 
				-{ 
			
 
				-	double *S, *K, *R, *T, *SIG, *CRES, *PRES;
			
 
				-
			
 
				-	uint32_t nxS;
			
 
				-	
			
 
				-	
			
 
				-	S = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
			
 
				-	K = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
			
 
				-	R = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
			
 
				-	T = (double *)STARPU_MATRIX_GET_PTR(descr[3]);
			
 
				-	SIG = (double *)STARPU_MATRIX_GET_PTR(descr[4]);
			
 
				-	CRES = (double *)STARPU_MATRIX_GET_PTR(descr[5]);
			
 
				-	PRES = (double *)STARPU_MATRIX_GET_PTR(descr[6]);
			
 
				-	
			
 
				-	nxS = STARPU_MATRIX_GET_NX(descr[0]);
			
 
				-
			
 
				-	
			
 
				-	uint32_t i;
			
 
				-	for (i = 0; i < nxS; i++){
			
 
				-				
			
 
				-		double d1 = (log(S[i] / K[i]) + (R[i] + pow(SIG[i], 2.0) * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));
			
 
				-		double d2 = (log(S[i] / K[i]) + (R[i] - pow(SIG[i], 2.0) * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));
			
 
				-		
			
 
				-		CRES[i] = S[i] * normcdf(d1) - K[i] * exp(-R[i] * T[i]) * normcdf(d2);
			
 
				-		PRES[i] = -S[i] * normcdf(-d1) + K[i] * exp(-R[i] * T[i]) * normcdf(-d2);
			
 
				-	}
			
 
				-}
			
--- a/julia/tst/black_scholes/gpu_black_scholes.cu
+++ b/julia/tst/black_scholes/gpu_black_scholes.cu
@@ -1,89 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2019       Mael Keryell
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-#include <stdio.h>
			
 
				-#include <stdint.h>
			
 
				-#include <math.h>
			
 
				-#include <starpu.h>
			
 
				-
			
 
				-// __device__ inline double cndGPU(double d)
			
 
				-// {
			
 
				-//   const double A1 = 0.31938153f;
			
 
				-//   const double A2 = -0.356563782f;
			
 
				-//   const double A3 = 1.781477937f;
			
 
				-//   const double A4 = -1.821255978f;
			
 
				-//   const double A5 = 1.330274429f;
			
 
				-//   const float RSQRT2PI = 0.39894228040143267793994605993438f;
			
 
				-
			
 
				-    
			
 
				-//   double K = __fdividef(1.0f, (1.0f + 0.2316419f * fabsf(d)));
			
 
				-
			
 
				-    
			
 
				-//   double cnd = RSQRT2PI * __expf(- 0.5f * d * d) * (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
			
 
				-
			
 
				-//     if (d > 0)
			
 
				-//       cnd = 1.0f - cnd;
			
 
				-
			
 
				-//     return cnd;
			
 
				-// }
			
 
				-
			
 
				-__device__ inline double cndGPU(double d)
			
 
				-{
			
 
				-  return (1.0 + erf(d/sqrt(2.0)))/2.0;
			
 
				-}
			
 
				-
			
 
				-__global__ void gpuBlackScholesKernel(double *S, double *K, double *R, double *T, 
			
 
				-				      double *SIG, double *CRES, double *PRES,
			
 
				-				      uint32_t nxS)
			
 
				-{
			
 
				-  uint32_t i, id;
			
 
				-  
			
 
				-  id = blockIdx.x * blockDim.x + threadIdx.x;
			
 
				-  i = id % nxS;
			
 
				-  
			
 
				-  double sqrtT = __fdividef(1.0F, rsqrtf(T[i]));
			
 
				-  double d1 = (log(S[i] / K[i]) + (R[i] + SIG[i] * SIG[i] * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));  
			
 
				-  double d2 = (log(S[i] / K[i]) + (R[i] - SIG[i] * SIG[i] * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));
			
 
				-  
			
 
				-  CRES[i] = S[i] * (normcdf(d1)) - K[i] * exp(-R[i] * T[i]) * normcdf(d2);
			
 
				-  PRES[i] = -S[i] * (normcdf(-d1)) + K[i] * exp(-R[i] * T[i]) * normcdf(-d2);
			
 
				-}
			
 
				-
			
 
				-#define THREADS_PER_BLOCK 64
			
 
				-
			
 
				-extern "C" void gpu_black_scholes(void *descr[], void *args)
			
 
				-{
			
 
				-  double *S, *K, *R, *T, *SIG, *CRES, *PRES;
			
 
				-  uint32_t nxS;
			
 
				-  uint32_t nblocks;
			
 
				-
			
 
				-  S = (double *) STARPU_MATRIX_GET_PTR(descr[0]);
			
 
				-  K = (double *) STARPU_MATRIX_GET_PTR(descr[1]);
			
 
				-  R = (double *) STARPU_MATRIX_GET_PTR(descr[2]);
			
 
				-  T = (double *) STARPU_MATRIX_GET_PTR(descr[3]);
			
 
				-  SIG = (double *) STARPU_MATRIX_GET_PTR(descr[4]);
			
 
				-  CRES = (double *) STARPU_MATRIX_GET_PTR(descr[5]);
			
 
				-  PRES = (double *) STARPU_MATRIX_GET_PTR(descr[6]);
			
 
				-
			
 
				-  nxS = STARPU_MATRIX_GET_NX(descr[0]);
			
 
				-
			
 
				-  nblocks = (nxS + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
			
 
				-
			
 
				-  gpuBlackScholesKernel
			
 
				-    <<< nblocks, THREADS_PER_BLOCK, 0, starpu_cuda_get_local_stream()
			
 
				-    >>> (S, K, R, T, SIG, CRES, PRES, nxS);
			
 
				-  
			
 
				-  cudaStreamSynchronize(starpu_cuda_get_local_stream());
			
 
				-}