Browse Source

Added black-scholes (not working) and mandelbrot (ok)

Denis Barthou 5 years ago
parent
commit
28d0b4272e

julia/tst/black_scholes/black_scholes.c → julia/black_scholes/black_scholes.c


+ 83 - 13
julia/tst/black_scholes/cpu_cuda_black_scholes.jl

@@ -1,13 +1,8 @@
-include("../../src/Compiler/include.jl")
+import Libdl
+using StarPU
 
-starpu_new_cpu_kernel_file("../build/generated_cpu_black_scholes.c")
-starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
-
-
-
-
-
-@cpu_cuda_kernel function black_scholes(data ::Matrix{Float64}, res ::Matrix{Float64}) ::Void
+@target STARPU_CPU+STARPU_CUDA
+@codelet function black_scholes(data ::Matrix{Float64}, res ::Matrix{Float64}) :: Float32
     
     widthn ::Int64 = width(data)
         
@@ -25,7 +20,7 @@ starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
     b5 ::Float64 = 1.330274428
 
     
-    @indep for i = 1:widthn
+    @parallel for i = 1:widthn
         
 
         d1 ::Float64 = (log(data[1,i] / data[2,i]) + (data[3,i] + pow(data[5,i], 2.0) * 0.5) * data[4,i]) / (data[5,i] * sqrt(data[4,i]))
@@ -117,8 +112,83 @@ starpu_new_cuda_kernel_file("../build/generated_cuda_black_scholes.cu")
         res[2,i] = -data[1,i] * (normd1n) + data[2,i]*exp(-data[3,i]*data[4,i]) * (normd2n) # -S * N(-d1) + r*exp(-r*T) * norm(-d2)
         
     end
+    return 0
+end
+
+
+@debugprint "starpu_init"
+starpu_init()
+
+function black_scholes_starpu(data ::Matrix{Float64}, res ::Matrix{Float64}, nslices ::Int64)
+    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslices)
+
+    @starpu_block let
+        dat_handle, res_handle = starpu_data_register(data, res)
+
+        starpu_data_partition(dat_handle, vert)
+        starpu_data_partition(res_handle, vert)
+        
+        #Compute the price of call and put option in the res matrix
+        @starpu_sync_tasks for task in (1:nslices)
+            @starpu_async_cl black_scholes(dat_handle[task], res_handle[task]) [STARPU_RW, STARPU_RW] 
+        end
+    end
+    return 0
 end
 
-compile_cpu_kernels("../build/generated_cpu_black_scholes.so")
-compile_cuda_kernels("../build/generated_cuda_black_scholes.so")
-combine_kernel_files("../build/generated_tasks_black_scholes.so", ["../build/generated_cpu_black_scholes.so", "../build/generated_cuda_black_scholes.so"])
+
+function init_data(data, data_nbr);
+    for i in 1:data_nbr
+        data[1,i] = rand(Float64) * 100
+        data[2,i] = rand(Float64) * 100
+        data[3,i] = rand(Float64)
+        data[4,i] = rand(Float64) * 10
+        data[5,i] = rand(Float64) * 10
+    end
+    return data
+end
+        
+
+
+function median_times(data_nbr, nslices, nbr_tests)
+
+    data ::Matrix{Float64} = zeros(5, data_nbr)
+    # data[1,1] = 100.0
+    # data[2,1] = 100.0
+    # data[3,1] = 0.05
+    # data[4,1] = 1.0
+    # data[5,1] = 0.2
+
+
+    res ::Matrix{Float64} = zeros(2, data_nbr)
+
+    exec_times ::Vector{Float64} = [0. for i in 1:nbr_tests]
+
+    for i = 1:nbr_tests
+        
+        init_data(data, data_nbr)
+
+        tic()
+        black_scholes_starpu(data, res, nslices);
+        t = toq()
+
+        exec_times[i] = t
+    end
+    sort!(exec_times)
+    # println(data)
+    # println(res)
+    
+    return exec_times[1 + div(nbr_tests - 1, 2)]
+end
+
+function display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests)
+    i = 1
+    open("black_scholes_times.dat", "w") do f 
+        for data_nbr in (start_nbr : step_nbr : stop_nbr)
+            t = median_times(data_nbr, nslices, nbr_tests)
+            println("Number of data:\n$data_nbr\nTimes:\njl: $t\nC: $(mtc[i])\nGen: $(mtcgen[i])")
+            write(f, "$data_nbr $(t)\n")
+            i = i + 1
+        end
+    end
+end

+ 263 - 0
julia/mandelbrot/mandelbrot.c

@@ -0,0 +1,263 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2019       Mael Keryell
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <starpu.h>
+#include "../display.h"
+
+void cpu_mandelbrot(void **, void *);
+void gpu_mandelbrot(void **, void *);
+
+struct Params
+{
+	float cr;
+	float ci;
+	unsigned taskx;
+	unsigned tasky;
+	unsigned width;
+	unsigned height;
+};
+
+
+
+struct starpu_codelet cl =
+{
+	.cpu_funcs = {cpu_mandelbrot},
+	.cuda_funcs = {gpu_mandelbrot},
+	.nbuffers = 1,
+	.modes = {STARPU_RW}
+};
+
+
+void mandelbrot_with_starpu(int *pixels, float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy)
+{
+	starpu_data_handle_t p_handle;
+
+	starpu_matrix_data_register(&p_handle, STARPU_MAIN_RAM, (uintptr_t)pixels, width, width, height, sizeof(int));
+
+	struct starpu_data_filter vert =
+	{
+		.filter_func = starpu_matrix_filter_vertical_block,
+		.nchildren = nslicesy
+	};
+
+	struct starpu_data_filter horiz =
+	{
+		.filter_func = starpu_matrix_filter_block,
+		.nchildren = nslicesx
+	};
+
+	starpu_data_map_filters(p_handle, 2, &vert, &horiz);
+
+	unsigned taskx, tasky;
+
+	struct Params *params = malloc(nslicesx*nslicesy*sizeof(struct Params));
+
+	for (taskx = 0; taskx < nslicesx; taskx++){
+		for (tasky = 0; tasky < nslicesy; tasky++){
+			struct starpu_task *task = starpu_task_create();
+			
+			task->cl = &cl;
+			task->handles[0] = starpu_data_get_sub_data(p_handle, 2, tasky, taskx);
+			struct Params param = {cr, ci, taskx, tasky, width, height};
+
+			params[taskx + tasky*nslicesx] = param;
+
+			task->cl_arg = (params + taskx + tasky * nslicesx);
+			task->cl_arg_size = sizeof(struct Params);
+			
+			starpu_task_submit(task);
+		}
+	}
+	starpu_task_wait_for_all();
+
+	starpu_data_unpartition(p_handle, STARPU_MAIN_RAM);
+
+	starpu_data_unregister(p_handle);
+
+	free(params);
+}
+
+void init_zero(int * pixels, unsigned width, unsigned height)
+{
+	unsigned i,j;
+	for (i = 0; i < height; i++){
+		for (j = 0; j < width; j++){
+			pixels[j + i*width] = 0;
+		}
+	}
+}
+
+void sort(double *arr, unsigned nbr_tests)
+{
+	unsigned j;
+	
+	int is_sort = 0;
+	
+	while (!is_sort){
+
+		is_sort = 1;
+		
+		for (j = 0; j < nbr_tests - 1; j++){
+			if (arr[j] > arr[j+1]){
+				is_sort = 0;
+				double tmp = arr[j];
+				arr[j] = arr[j+1];
+				arr[j+1] = tmp;
+			}
+		}
+	}
+}
+double median_time(float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy, unsigned nbr_tests)
+{
+	int *Pixels = malloc(width*height*sizeof(int));
+	
+	unsigned i;
+
+	double exec_times[nbr_tests];
+
+	double start, stop, exec_t;
+	for (i = 0; i < nbr_tests; i++){
+		init_zero(Pixels, width, height);
+		
+		start = starpu_timing_now(); // starpu_timing_now() gives the time in microseconds.
+		mandelbrot_with_starpu(Pixels, cr, ci, width, height, nslicesx, nslicesy);
+		stop = starpu_timing_now();
+		
+		exec_t = (stop-start)/1.e6;
+		exec_times[i] = exec_t;
+	}
+	char filename[30];
+	sprintf(filename, "PPM/mandelbrot%d.ppm", width);
+	printf("%s\n", filename);
+
+	mandelbrot_graph(filename, Pixels, width, height);
+
+	free(Pixels);
+
+	sort(exec_times, nbr_tests);
+
+	return exec_times[nbr_tests/2];	
+}
+
+void fluctuation_time(float cr, float ci, unsigned width, unsigned height, unsigned nslicesx, unsigned nslicesy, unsigned nbr_tests, double *exec_times)
+{
+	int *Pixels = malloc(width*height*sizeof(int));
+	
+	unsigned i;
+
+	double start, stop, exec_t;
+	for (i = 0; i < nbr_tests; i++){
+		init_zero(Pixels, width, height);
+		
+		start = starpu_timing_now(); // starpu_timing_now() gives the time in microseconds.
+		mandelbrot_with_starpu(Pixels, cr, ci, width, height, nslicesx, nslicesy);
+		stop = starpu_timing_now();
+		
+		exec_t = (stop-start)/1.e6;
+		exec_times[i] = exec_t;
+
+		/* char filename[33]; */
+		/* sprintf(filename, "../PPM/mandelbrot%d.ppm", i + 1); */
+		/* printf("%s\n", filename); */
+		/* mandelbrot_graph(filename, Pixels, width, height); */
+	}
+
+
+	free(Pixels);
+
+
+
+	
+}
+
+
+void display_times(float cr, float ci, unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nslices, unsigned nbr_tests)
+{
+	
+	unsigned dim;
+
+	FILE *myfile;
+	myfile = fopen("DAT/mandelbrot_c_struct_times.dat", "w");
+
+	for (dim = start_dim; dim <= stop_dim; dim += step_dim){
+		printf("Dimension: %u...\n", dim);
+		double t = median_time(cr, ci, dim, dim, nslices, nslices, nbr_tests);
+		
+		printf("w = %u ; h = %u ; t = %f\n", dim, dim, t);
+		
+		fprintf(myfile, "%f\n", t);
+		}
+	
+	fclose(myfile);
+}
+
+void display_fluctuations(float cr, float ci, unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nslices, unsigned nbr_tests)
+{
+	
+	unsigned dim;
+
+	FILE *myfile;
+	myfile = fopen("DAT/mandelbrot_c_fluctuation.dat", "w");
+
+	double *exec_times = malloc(nbr_tests * sizeof(double));
+	fluctuation_time(cr, ci, start_dim, start_dim, nslices, nslices, nbr_tests, exec_times);
+		
+	/* printf("w = %u ; h = %u ; t = %f\n", dim, dim, t); */
+	unsigned i;
+	for (i = 0; i < nbr_tests; i++){
+		printf("test %u: %f seconds\n", i, exec_times[i]);
+		fprintf(myfile, "%u %f\n", i, exec_times[i]);
+	}
+	
+	fclose(myfile);
+	free(exec_times);
+}
+
+
+int main(int argc, char **argv)
+{
+
+	if (argc != 8){
+		printf("Usage: %s cr ci start_dim step_dim stop_dim nslices(must divide dims) nbr_tests\n", argv[0]);
+		return 1;
+	}
+	if (starpu_init(NULL) != EXIT_SUCCESS){
+		fprintf(stderr, "ERROR\n");
+		return 77;
+	}
+
+
+	
+	float cr = (float) atof(argv[1]);
+	float ci = (float) atof(argv[2]);
+	unsigned start_dim = (unsigned) atoi(argv[3]);
+	unsigned step_dim = (unsigned) atoi(argv[4]);	
+	unsigned stop_dim = (unsigned) atoi(argv[5]);
+	unsigned nslices = (unsigned) atoi(argv[6]);
+	unsigned nbr_tests = (unsigned) atoi(argv[7]);
+
+	display_times(cr, ci, start_dim, step_dim, stop_dim, nslices, nbr_tests);
+	
+	
+	/* display_fluctuations(cr, ci, start_dim, step_dim, stop_dim, nslices, nbr_tests); */
+
+
+	starpu_shutdown();
+
+
+	return 0;
+}

+ 30 - 0
julia/mandelbrot/mandelbrot.jl

@@ -0,0 +1,30 @@
+function mandelbrotjl(pixels ::Matrix{Int64}, centerr ::Float64, centeri ::Float64)
+    height,width = size(pixels)
+    zoom = width * 0.25296875
+    val_diverge = 2.0
+    max_iterations = (width/2) * 0.049715909 * log10(zoom);
+
+
+    for y = 1:height
+        for x = 1:width
+            cr = centerr + (x - (width / 2))/zoom
+            zr = cr
+            ci = centeri + (y - (height / 2))/zoom
+            zi = ci
+
+            n = 0
+            while ((n < max_iterations) && (zr*zr + zi*zi < val_diverge*val_diverge))
+                tmp = zr*zr - zi*zi + cr
+                zi = 2*zr*zi + ci
+                zr = tmp
+                n = n+1
+            end
+            
+            if (n < max_iterations)
+                pixels[y,x] = round(255 * n / max_iterations)
+            else
+                pixels[y,x] = 0
+            end
+        end
+    end
+end

+ 0 - 81
julia/tst/black_scholes/black_scholes_def.jl

@@ -1,81 +0,0 @@
-function black_scholes_starpu(data ::Matrix{Float64}, res ::Matrix{Float64}, nslices ::Int64)
-    vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslices)
-
-    @starpu_block let
-        dat_handle, res_handle = starpu_data_register(data, res)
-
-        starpu_data_partition(dat_handle, vert)
-        starpu_data_partition(res_handle, vert)
-        
-        #Compute the price of call and put option in the res matrix
-        @starpu_sync_tasks for task in (1:nslices)
-            @starpu_async_cl cl(dat_handle[task], res_handle[task])
-        end
-    end
-end
-
-
-function init_data(data, data_nbr);
-    for i in 1:data_nbr
-        data[1,i] = rand(Float64) * 100
-        data[2,i] = rand(Float64) * 100
-        data[3,i] = rand(Float64)
-        data[4,i] = rand(Float64) * 10
-        data[5,i] = rand(Float64) * 10
-    end
-    return data
-end
-        
-
-
-function median_times(data_nbr, nslices, nbr_tests)
-
-    data ::Matrix{Float64} = zeros(5, data_nbr)
-    # data[1,1] = 100.0
-    # data[2,1] = 100.0
-    # data[3,1] = 0.05
-    # data[4,1] = 1.0
-    # data[5,1] = 0.2
-
-
-    res ::Matrix{Float64} = zeros(2, data_nbr)
-
-    exec_times ::Vector{Float64} = [0. for i in 1:nbr_tests]
-
-    for i = 1:nbr_tests
-        
-        init_data(data, data_nbr)
-
-        tic()
-        black_scholes_starpu(data, res, nslices);
-        t = toq()
-
-        exec_times[i] = t
-    end
-    sort!(exec_times)
-    # println(data)
-    # println(res)
-    
-    return exec_times[1 + div(nbr_tests - 1, 2)]
-end
-
-function display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests)
-
-    mtc = map( (x->parse(Float64,x)), open("../DAT/black_scholes_c_times.dat") do f
-                  readlines(f)
-                  end)
-
-
-    mtcgen = map( (x->parse(Float64,x)), open("../DAT/black_scholes_c_generated_times.dat") do f
-                  readlines(f)
-                  end)
-    i = 1
-    open("../DAT/black_scholes_times.dat", "w") do f 
-        for data_nbr in (start_nbr : step_nbr : stop_nbr)
-            t = median_times(data_nbr, nslices, nbr_tests)
-            println("Number of data:\n$data_nbr\nTimes:\njl: $t\nC: $(mtc[i])\nGen: $(mtcgen[i])")
-            write(f, "$data_nbr $(t) $(mtcgen[i]) $(mtc[i])\n")
-            i = i + 1
-        end
-    end
-end

+ 0 - 35
julia/tst/black_scholes/black_scholes_generated.jl

@@ -1,35 +0,0 @@
-if length(ARGS) != 5
-    println("Usage: julia prog.jl start_data_nbr step_data_nbr stop_data_nbr nslices nbr_tests")
-    quit()
-end
-
-
-if (parse(Int64,ARGS[1]) < parse(Int64,ARGS[4]))
-    println("The number of slices must be smaller than the number of data")
-    quit()
-end
-
-include("../../src/Wrapper/Julia/starpu_include.jl")
-using StarPU
-
-@debugprint "starpu_init"
-starpu_init(extern_task_path = "../build/generated_tasks_black_scholes")
-
-perfmodel = StarpuPerfmodel(
-    perf_type = STARPU_HISTORY_BASED,
-    symbol = "history_perf"
-)
-
-cl = StarpuCodelet(
-cpu_func = "black_scholes",
-gpu_func = "CUDA_black_scholes",
-modes = [STARPU_RW, STARPU_RW],
-perfmodel = perfmodel
-)
-
-include("black_scholes_def.jl")
-
-display_times(map( (x->parse(Int64, x)), ARGS)...)
-
-@debugprint "starpu_shutdown"
-starpu_shutdown()

+ 0 - 170
julia/tst/black_scholes/black_scholes_with_generated.c

@@ -1,170 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2019       Mael Keryell
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <starpu.h>
-#include "../includes/sorting.h"
-
-void black_scholes(void **, void *);
-void CUDA_black_scholes(void **, void*);
-
-static struct starpu_perfmodel model =
-{
-	.type = STARPU_HISTORY_BASED,
-	.symbol = "history_perf"
-};
-
-struct starpu_codelet cl =
-{
-	.cpu_funcs = {black_scholes},
-	.cuda_funcs = {CUDA_black_scholes},
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_RW},
-	.model = &model
-};
-
-void black_scholes_with_starpu(double *data, double *res, unsigned nslices, unsigned nbr_data)
-{
-
-	starpu_data_handle_t D_handle, RES_handle;
-
-	starpu_matrix_data_register(&D_handle, STARPU_MAIN_RAM, (uintptr_t)data, 5, 5, nbr_data, sizeof(double));
-	starpu_matrix_data_register(&RES_handle, STARPU_MAIN_RAM, (uintptr_t)res, 2, 2, nbr_data, sizeof(double));
-
-	struct starpu_data_filter vert =
-	{
-		.filter_func = starpu_matrix_filter_vertical_block,
-		.nchildren = nslices
-	};
-
-	starpu_data_partition(D_handle, &vert);
-	starpu_data_partition(RES_handle, &vert);
-
-	unsigned taskx;
-	
-	for (taskx = 0; taskx < nslices; taskx++){
-		struct starpu_task *task = starpu_task_create();
-		
-		task->cl = &cl;
-		task->handles[0] = starpu_data_get_sub_data(D_handle, 1, taskx);
-		task->handles[1] = starpu_data_get_sub_data(RES_handle, 1, taskx);
-		
-		starpu_task_submit(task);
-	}
-
-	starpu_task_wait_for_all();
-
-
-	starpu_data_unpartition(D_handle, STARPU_MAIN_RAM);
-	starpu_data_unpartition(RES_handle, STARPU_MAIN_RAM);
-
-	starpu_data_unregister(D_handle);
-	starpu_data_unregister(RES_handle);
-}
-	
-
-void init_data(double *data, unsigned nbr_data)
-{
-	unsigned i;
-	for (i = 0; i < nbr_data; i++){
-
-		data[5*i] = 100. * rand() / (double) RAND_MAX;
-		data[5*i + 1] = 100. * rand() / (double) RAND_MAX;
-		data[5*i + 2] = rand() / (double) RAND_MAX;
-		data[5*i + 3] = 10. * rand() / (double) RAND_MAX;
-		data[5*i + 4] = 10. * rand() / (double) RAND_MAX;
-		
-	}
-}
-
-double median_time(unsigned nbr_data, unsigned nslices, unsigned nbr_tests)
-{
-	double *data = malloc(5 * nbr_data * sizeof(double));
-	double *res = calloc(2 * nbr_data, sizeof(double));
-	double exec_times[nbr_tests];
-	
-	/* printf("nbr_data: %u\n", nbr_data); */
-	unsigned i;
-	for (i = 0; i < nbr_tests; i++){
-		
-		init_data(data, nbr_data);
-		/* data[0] = 100.0; */
-		/* data[1] = 100.0; */
-		/* data[2] = 0.05; */
-		/* data[3] = 1.0; */
-		/* data[4] = 0.2; */
-
-		double start = starpu_timing_now();
-		black_scholes_with_starpu(data, res, nslices, nbr_data);
-		double stop = starpu_timing_now();
-		
-		exec_times[i] = (stop-start)/1.e6;
-		
-		
-	}
-
-	/* printf("RES:\n%f\n%f\n", res[0], res[1]); */
-
-	free(data);
-	free(res);
-
-	quicksort(exec_times, 0, nbr_tests - 1);
-	return exec_times[nbr_tests/2];
-}
-	
-
-
-void display_times(unsigned start_nbr, unsigned step_nbr, unsigned stop_nbr, unsigned nslices, unsigned nbr_tests){
-	
-	double t;
-	unsigned nbr_data;
-
-	FILE *myfile;
-	myfile = fopen("DAT/black_scholes_c_generated_times.dat", "w");
-
-	for (nbr_data = start_nbr; nbr_data <= stop_nbr; nbr_data+=step_nbr){
-		t = median_time(nbr_data, nslices, nbr_tests);
-		printf("Number of data: %u\nTime: %f\n", nbr_data, t);
-		fprintf(myfile, "%f\n", t);
-	}
-	fclose(myfile);
-}
-
-int main(int argc, char *argv[])
-{
-	if (argc != 6){
-		printf("Usage: %s start_nbr step_nbr stop_nbr nslices nbr_tests\n", argv[0]);
-		return 1;
-	}
-	
-	if (starpu_init(NULL) != EXIT_SUCCESS){
-		fprintf(stderr, "ERROR\n");
-		return 77;
-	}
-
-	unsigned start_nbr = (unsigned) atoi(argv[1]);
-	unsigned step_nbr = (unsigned) atoi(argv[2]);
-	unsigned stop_nbr = (unsigned) atoi(argv[3]);
-	unsigned nslices = (unsigned) atoi(argv[4]);
-	unsigned nbr_tests = (unsigned) atoi(argv[5]);
-
-
-	display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests);
-		
-	starpu_shutdown();
-
-	return 0;
-}

+ 0 - 54
julia/tst/black_scholes/cpu_black_scholes.c

@@ -1,54 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2019       Mael Keryell
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-#include <stdint.h>
-#include <starpu.h>
-#include <math.h>
-
-
-static inline double normcdf(double x)
-{
-	
-	return (1.0 + erf(x/sqrt(2.0)))/2.0;
-}
-
-void cpu_black_scholes(void *descr[], void *arg)
-{ 
-	double *S, *K, *R, *T, *SIG, *CRES, *PRES;
-
-	uint32_t nxS;
-	
-	
-	S = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
-	K = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-	R = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-	T = (double *)STARPU_MATRIX_GET_PTR(descr[3]);
-	SIG = (double *)STARPU_MATRIX_GET_PTR(descr[4]);
-	CRES = (double *)STARPU_MATRIX_GET_PTR(descr[5]);
-	PRES = (double *)STARPU_MATRIX_GET_PTR(descr[6]);
-	
-	nxS = STARPU_MATRIX_GET_NX(descr[0]);
-
-	
-	uint32_t i;
-	for (i = 0; i < nxS; i++){
-				
-		double d1 = (log(S[i] / K[i]) + (R[i] + pow(SIG[i], 2.0) * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));
-		double d2 = (log(S[i] / K[i]) + (R[i] - pow(SIG[i], 2.0) * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));
-		
-		CRES[i] = S[i] * normcdf(d1) - K[i] * exp(-R[i] * T[i]) * normcdf(d2);
-		PRES[i] = -S[i] * normcdf(-d1) + K[i] * exp(-R[i] * T[i]) * normcdf(-d2);
-	}
-}

+ 0 - 89
julia/tst/black_scholes/gpu_black_scholes.cu

@@ -1,89 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2019       Mael Keryell
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-#include <stdio.h>
-#include <stdint.h>
-#include <math.h>
-#include <starpu.h>
-
-// __device__ inline double cndGPU(double d)
-// {
-//   const double A1 = 0.31938153f;
-//   const double A2 = -0.356563782f;
-//   const double A3 = 1.781477937f;
-//   const double A4 = -1.821255978f;
-//   const double A5 = 1.330274429f;
-//   const float RSQRT2PI = 0.39894228040143267793994605993438f;
-
-    
-//   double K = __fdividef(1.0f, (1.0f + 0.2316419f * fabsf(d)));
-
-    
-//   double cnd = RSQRT2PI * __expf(- 0.5f * d * d) * (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
-
-//     if (d > 0)
-//       cnd = 1.0f - cnd;
-
-//     return cnd;
-// }
-
-__device__ inline double cndGPU(double d)
-{
-  return (1.0 + erf(d/sqrt(2.0)))/2.0;
-}
-
-__global__ void gpuBlackScholesKernel(double *S, double *K, double *R, double *T, 
-				      double *SIG, double *CRES, double *PRES,
-				      uint32_t nxS)
-{
-  uint32_t i, id;
-  
-  id = blockIdx.x * blockDim.x + threadIdx.x;
-  i = id % nxS;
-  
-  double sqrtT = __fdividef(1.0F, rsqrtf(T[i]));
-  double d1 = (log(S[i] / K[i]) + (R[i] + SIG[i] * SIG[i] * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));  
-  double d2 = (log(S[i] / K[i]) + (R[i] - SIG[i] * SIG[i] * 0.5) * T[i]) / (SIG[i] * sqrt(T[i]));
-  
-  CRES[i] = S[i] * (normcdf(d1)) - K[i] * exp(-R[i] * T[i]) * normcdf(d2);
-  PRES[i] = -S[i] * (normcdf(-d1)) + K[i] * exp(-R[i] * T[i]) * normcdf(-d2);
-}
-
-#define THREADS_PER_BLOCK 64
-
-extern "C" void gpu_black_scholes(void *descr[], void *args)
-{
-  double *S, *K, *R, *T, *SIG, *CRES, *PRES;
-  uint32_t nxS;
-  uint32_t nblocks;
-
-  S = (double *) STARPU_MATRIX_GET_PTR(descr[0]);
-  K = (double *) STARPU_MATRIX_GET_PTR(descr[1]);
-  R = (double *) STARPU_MATRIX_GET_PTR(descr[2]);
-  T = (double *) STARPU_MATRIX_GET_PTR(descr[3]);
-  SIG = (double *) STARPU_MATRIX_GET_PTR(descr[4]);
-  CRES = (double *) STARPU_MATRIX_GET_PTR(descr[5]);
-  PRES = (double *) STARPU_MATRIX_GET_PTR(descr[6]);
-
-  nxS = STARPU_MATRIX_GET_NX(descr[0]);
-
-  nblocks = (nxS + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
-
-  gpuBlackScholesKernel
-    <<< nblocks, THREADS_PER_BLOCK, 0, starpu_cuda_get_local_stream()
-    >>> (S, K, R, T, SIG, CRES, PRES, nxS);
-  
-  cudaStreamSynchronize(starpu_cuda_get_local_stream());
-}