Browse Source

Merge branch 'master' of git+ssh://scm.gforge.inria.fr/gitroot/starpu/starpu into ft_checkpoint

Romain LION 5 years ago
parent
commit
ae98233b60

+ 9 - 9
julia/Manifest.toml

@@ -7,28 +7,28 @@ uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 deps = ["Libdl", "Random", "Test"]
 git-tree-sha1 = "6f457df38ae2ba239d5e43b80493bb907de826b2"
 repo-rev = "655e9862947d17423f2fb91ea1014e1cb73c1be1"
-repo-url = "https://github.com/analytech-solutions/CBinding.jl.git"
+repo-url = "https://github.com/analytech-solutions/CBinding.jl"
 uuid = "d43a6710-96b8-4a2d-833c-c424785e5374"
 version = "0.8.1"
 
 [[CEnum]]
-git-tree-sha1 = "62847acab40e6855a9b5905ccb99c2b5cf6b3ebb"
+git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
 uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
-version = "0.2.0"
+version = "0.4.1"
 
 [[Clang]]
 deps = ["CEnum", "DataStructures", "LLVM_jll", "Libdl"]
-git-tree-sha1 = "45013227beea038ecc17e8c07cd7c7b05ed26067"
-repo-rev = "master"
-repo-url = "https://github.com/phuchant/Clang.jl.git"
+git-tree-sha1 = "2142a3a54faa28f08edb7b16bde2d3d32b1f3785"
+repo-rev = "29ad279"
+repo-url = "https://github.com/phuchant/Clang.jl"
 uuid = "40e3b903-d033-50b4-a0cc-940c62c95e31"
-version = "0.11.0"
+version = "0.11.1"
 
 [[DataStructures]]
 deps = ["InteractiveUtils", "OrderedCollections"]
-git-tree-sha1 = "6166ecfaf2b8bbf2b68d791bc1d54501f345d314"
+git-tree-sha1 = "af6d9c86e191c917c2276fbede1137e8ea20157f"
 uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
-version = "0.17.15"
+version = "0.17.17"
 
 [[Dates]]
 deps = ["Printf"]

+ 3 - 3
julia/examples/cholesky/cholesky.sh

@@ -15,6 +15,6 @@
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
 
-$(dirname $0)/../execute.sh cholesky/cholesky_native.jl
-$(dirname $0)/../execute.sh cholesky/cholesky_implicit.jl
-$(dirname $0)/../execute.sh cholesky/cholesky_tag.jl
+$(dirname $0)/../execute.sh cholesky/cholesky_native.jl -quickcheck
+$(dirname $0)/../execute.sh cholesky/cholesky_implicit.jl -quickcheck
+$(dirname $0)/../execute.sh cholesky/cholesky_tag.jl -quickcheck

+ 2 - 3
julia/examples/cholesky/cholesky_common.jl

@@ -92,7 +92,7 @@ function check(mat::Matrix{Float32})
         end
     end
 
-    println("Verification successful !")
+    println(stderr, "Verification successful !")
 end
 
 function clean_tags(nblocks)
@@ -138,10 +138,9 @@ function main(size_p :: Int, nblocks :: Int; verify = false, verbose = false)
     starpu_memory_unpin(mat)
 
     flop = (1.0*size_p*size_p*size_p)/3.0
-    println("# size\tms\tGFlops")
     time_ms = (t_end-t_start) / 1e6
     gflops = flop/(time_ms*1000)/1000
-    println("# $size_p\t$time_ms\t$gflops")
+    println("$size_p\t$time_ms\t$gflops")
 
     clean_tags(nblocks)
 

+ 9 - 2
julia/examples/cholesky/cholesky_implicit.jl

@@ -58,7 +58,14 @@ end
 starpu_init()
 starpu_cublas_init()
 
-main(1024, 8, verify = true)
-main(15360, 16)
+println("# size\tms\tGFlops")
+
+if length(ARGS) > 0 && ARGS[1] == "-quickcheck"
+    main(1024, 8, verify = true)
+else
+    for size in 1024:1024:15360
+        main(size, 16)
+    end
+end
 
 starpu_shutdown()

+ 22 - 82
julia/examples/cholesky/cholesky_native.jl

@@ -1,79 +1,11 @@
-using LinearAlgebra.BLAS
-
-function u11(sub11)
-    nx = size(sub11, 1)
-    ld = size(sub11, 1)
-
-    for z in 0:nx-1
-        lambda11::Float32 = sqrt(sub11[z+1,z+1])
-        sub11[z+1,z+1] = lambda11
-        if lambda11 == 0.0f0
-            error("lamda11")
-        end
-
-        X = view(sub11, z+2:z+2+(nx-z-2), z+1)
-        scal!(nx-z-1, 1.0f0/lambda11, X, 1)
-
-        A = view(sub11, z+2:z+2+(nx-z-2), z+2:z+2+(nx-z-2))
-        syr!('L', -1.0f0, X, A)
-    end
-end
-
-function u21(sub11, sub21)
-    trsm!('R', 'L', 'T', 'N', 1.0f0, sub11, sub21)
-end
-
-function u22(left, right, center)
-    gemm!('N', 'T', -1.0f0, left, right, 1.0f0, center)
-end
-
-function get_block(mat :: Matrix{Float32}, m, n, nblocks)
-    dim = size(mat, 1)
-    if dim != size(mat,2)
-        error("mat must be a square matrix")
-    end
-    if dim % nblocks != 0
-        error("dim must be a multiple of nblocks")
-    end
-
-    stride = Int(dim/nblocks)
-
-    return view(mat,
-                m*stride+1:(m+1)*stride,
-                n*stride+1:(n+1)*stride)
-end
-
-function cholesky(mat :: Matrix{Float32}, size, nblocks)
-    for k in 0:nblocks-1
-        sdatakk = get_block(mat, k, k, nblocks)
-        u11(sdatakk)
-
-        for m in k+1:nblocks-1
-            sdatamk = get_block(mat, m, k, nblocks)
-            u21(sdatakk, sdatamk)
-        end
-
-        for m in k+1:nblocks-1
-            sdatamk = get_block(mat, m, k, nblocks)
-
-            for n in k+1:nblocks-1
-                if n <= m
-                    sdatank = get_block(mat, n, k, nblocks)
-                    sdatamn = get_block(mat, m, n, nblocks)
-                    u22(sdatamk, sdatank, sdatamn)
-                end
-            end
-        end
-
-    end
-end
+using LinearAlgebra
 
 function check(mat::Matrix{Float32})
     size_p = size(mat, 1)
 
     for i in 1:size_p
         for j in 1:size_p
-            if j > i
+            if j < i
                 mat[i, j] = 0.0f0
             end
         end
@@ -81,7 +13,7 @@ function check(mat::Matrix{Float32})
 
     test_mat ::Matrix{Float32} = zeros(Float32, size_p, size_p)
 
-    syrk!('L', 'N', 1.0f0, mat, 0.0f0, test_mat)
+    BLAS.syrk!('L', 'T', 1.0f0, mat, 0.0f0, test_mat)
 
     for i in 1:size_p
         for j in 1:size_p
@@ -97,12 +29,11 @@ function check(mat::Matrix{Float32})
         end
     end
 
-    println("Verification successful !")
+    println(stderr, "Verification successful !")
 end
 
-function main(size_p :: Int, nblocks :: Int, display = false)
-    mat :: Matrix{Float32} = zeros(Float32, size_p, size_p)
-
+function main(size_p :: Int; verify = false, verbose = false)
+    mat = zeros(Float32, size_p, size_p)
     # create a simple definite positive symetric matrix
     # Hilbert matrix h(i,j) = 1/(i+j+1)
 
@@ -112,28 +43,37 @@ function main(size_p :: Int, nblocks :: Int, display = false)
         end
     end
 
-    if display
+    if verbose
         display(mat)
     end
 
     t_start = time_ns()
 
-    cholesky(mat, size_p, nblocks)
+    cholesky!(mat)
 
     t_end = time_ns()
 
     flop = (1.0*size_p*size_p*size_p)/3.0
-    println("# size\tms\tGFlops")
     time_ms = (t_end-t_start) / 1e6
     gflops = flop/(time_ms*1000)/1000
-    println("# $size_p\t$time_ms\t$gflops")
+    println("$size_p\t$time_ms\t$gflops")
 
-    if display
+    if verbose
         display(mat)
     end
 
-    check(mat)
+    if verify
+        check(mat)
+    end
 end
 
-main(1024*20, 8)
+println("# size\tms\tGFlops")
+
+if length(ARGS) > 0 && ARGS[1] == "-quickcheck"
+    main(1024, verify = true)
+else
+    for size in 1024:1024:15360
+        main(size)
+    end
+end
 

+ 9 - 2
julia/examples/cholesky/cholesky_tag.jl

@@ -80,7 +80,14 @@ end
 starpu_init()
 starpu_cublas_init()
 
-main(1024, 8, verify = true)
-main(15360, 16)
+println("# size\tms\tGFlops")
+
+if length(ARGS) > 0 && ARGS[1] == "-quickcheck"
+    main(1024, 8, verify = true)
+else
+    for size in 1024:1024:15360
+        main(size, 16)
+    end
+end
 
 starpu_shutdown()

+ 2 - 1
mpi/examples/matrix_mult/mm.c

@@ -289,7 +289,8 @@ static struct starpu_codelet gemm_cl =
 {
 	.cpu_funcs = {cpu_mult}, /* cpu implementation(s) of the routine */
 	.nbuffers = 3, /* number of data handles referenced by this routine */
-	.modes = {STARPU_R, STARPU_R, STARPU_RW} /* access modes for each data handle */
+	.modes = {STARPU_R, STARPU_R, STARPU_RW}, /* access modes for each data handle */
+	.name = "gemm" /* to display task name in traces */
 };
 
 int main(int argc, char *argv[])

+ 8 - 4
src/core/perfmodel/regression.c

@@ -119,7 +119,8 @@ static unsigned find_list_size(struct starpu_perfmodel_history_list *list_histor
 	struct starpu_perfmodel_history_list *ptr = list_history;
 	while (ptr)
 	{
-		cnt++;
+		if (ptr->entry->nsample)
+			cnt++;
 		ptr = ptr->next;
 	}
 
@@ -146,11 +147,14 @@ static void dump_list(unsigned *x, double *y, struct starpu_perfmodel_history_li
 
 	while (ptr)
 	{
-		x[i] = ptr->entry->size;
-		y[i] = ptr->entry->mean;
+		if (ptr->entry->nsample)
+		{
+			x[i] = ptr->entry->size;
+			y[i] = ptr->entry->mean;
+			i++;
+		}
 
 		ptr = ptr->next;
-		i++;
 	}
 }
 

+ 4 - 0
src/core/workers.c

@@ -1754,6 +1754,8 @@ void starpu_pause()
 {
 	STARPU_HG_DISABLE_CHECKING(_starpu_config.pause_depth);
 	_starpu_config.pause_depth += 1;
+
+	starpu_fxt_trace_user_event_string("starpu_pause");
 }
 
 void starpu_resume()
@@ -1765,6 +1767,8 @@ void starpu_resume()
 		STARPU_PTHREAD_COND_BROADCAST(&pause_cond);
 	}
 	STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex);
+
+	starpu_fxt_trace_user_event_string("starpu_resume");
 }
 
 unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED)

+ 18 - 16
src/profiling/profiling.c

@@ -325,26 +325,28 @@ void _starpu_worker_stop_sleeping(int workerid)
 
 		STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
 
-		STARPU_ASSERT(worker_registered_sleeping_start[workerid] == 1);
-		sleeping_start = &sleeping_start_date[workerid];
-
-                /* Perhaps that profiling was enabled while the worker was
-                 * already blocked, so we don't measure (end - start), but
-                 * (end - max(start,worker_start)) where worker_start is the
-                 * date of the previous profiling info reset on the worker */
-		struct timespec *worker_start = &worker_info[workerid].start_time;
-		if (starpu_timespec_cmp(sleeping_start, worker_start, <))
+		if (worker_registered_sleeping_start[workerid] == 1)
 		{
-			/* sleeping_start < worker_start */
-			sleeping_start = worker_start;
-		}
+			sleeping_start = &sleeping_start_date[workerid];
+
+			/* Perhaps that profiling was enabled while the worker was
+			 * already blocked, so we don't measure (end - start), but
+			 * (end - max(start,worker_start)) where worker_start is the
+			 * date of the previous profiling info reset on the worker */
+			struct timespec *worker_start = &worker_info[workerid].start_time;
+			if (starpu_timespec_cmp(sleeping_start, worker_start, <))
+			{
+				/* sleeping_start < worker_start */
+				sleeping_start = worker_start;
+			}
 
-		struct timespec sleeping_time;
-		starpu_timespec_sub(&sleep_end_time, sleeping_start, &sleeping_time);
+			struct timespec sleeping_time;
+			starpu_timespec_sub(&sleep_end_time, sleeping_start, &sleeping_time);
 
-		starpu_timespec_accumulate(&worker_info[workerid].sleeping_time, &sleeping_time);
+			starpu_timespec_accumulate(&worker_info[workerid].sleeping_time, &sleeping_time);
 
-		worker_registered_sleeping_start[workerid] = 0;
+			worker_registered_sleeping_start[workerid] = 0;
+		}
 
 		STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);