| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 | 
							- # StarPU --- Runtime system for heterogeneous multicore architectures.
 
- #
 
- # Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
 
- #
 
- # StarPU is free software; you can redistribute it and/or modify
 
- # it under the terms of the GNU Lesser General Public License as published by
 
- # the Free Software Foundation; either version 2.1 of the License, or (at
 
- # your option) any later version.
 
- #
 
- # StarPU is distributed in the hope that it will be useful, but
 
- # WITHOUT ANY WARRANTY; without even the implied warranty of
 
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
- #
 
- # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
- #
 
- using StarPU
 
- using Printf
 
- const EPSILON = 1e-6
 
- function check(alpha, X, Y)
 
-     for i in 1:length(X)
 
-         expected_value = alpha * X[i] + 4.0
 
-         if abs(Y[i] - expected_value) > expected_value * EPSILON
 
-             error("at ", i, ", ", alpha, "*", X[i], "+4.0=", Y[i], ", expected ", expected_value)
 
-         end
 
-     end
 
- end
 
- @target STARPU_CPU+STARPU_CUDA
 
- @codelet function axpy(X :: Vector{Float32}, Y :: Vector{Float32}, alpha ::Float32) :: Nothing
 
-     STARPU_SAXPY(length(X), alpha, X, 1, Y, 1)
 
-     return
 
- end
 
- function axpy(N, NBLOCKS, alpha, display = true)
 
-     X = Array(fill(1.0f0, N))
 
-     Y = Array(fill(4.0f0, N))
 
-     starpu_memory_pin(X)
 
-     starpu_memory_pin(Y)
 
-     block_filter = starpu_data_filter(STARPU_VECTOR_FILTER_BLOCK, NBLOCKS)
 
-     perfmodel = starpu_perfmodel(
 
-         perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED),
 
-         symbol = "history_perf"
 
-     )
 
-     cl = starpu_codelet(
 
-         cpu_func = CPU_CODELETS["axpy"],
 
-         cuda_func = CUDA_CODELETS["axpy"],
 
-         #cuda_func = STARPU_SAXPY,
 
-         modes = [STARPU_R, STARPU_RW],
 
-         perfmodel = perfmodel
 
-     )
 
-     if display
 
-         println("BEFORE x[0] = ", X[1])
 
-         println("BEFORE y[0] = ", Y[1])
 
-     end
 
-     t_start = time_ns()
 
-     @starpu_block let
 
-         hX,hY = starpu_data_register(X, Y)
 
-         starpu_data_partition(hX, block_filter)
 
-         starpu_data_partition(hY, block_filter)
 
-         for b in 1:NBLOCKS
 
-             task = starpu_task(cl = cl, handles = [hX[b],hY[b]], cl_arg=(Float32(alpha),),
 
-                                tag=starpu_tag_t(b))
 
-             starpu_task_submit(task)
 
-         end
 
-         starpu_task_wait_for_all()
 
-     end
 
-     t_end = time_ns()
 
-     timing = (t_end-t_start)/1000
 
-     if display
 
-         @printf("timing -> %d us %.2f MB/s\n", timing, 3*N*4/timing)
 
-         println("AFTER y[0] = ", Y[1], " (ALPHA=", alpha, ")")
 
-     end
 
-     check(alpha, X, Y)
 
-     starpu_memory_unpin(X)
 
-     starpu_memory_unpin(Y)
 
- end
 
- function main()
 
-     N = 16 * 1024 * 1024
 
-     NBLOCKS = 8
 
-     alpha = 3.41
 
-     starpu_init()
 
-     starpu_cublas_init()
 
-     # warmup
 
-     axpy(10, 1, alpha, false)
 
-     axpy(N, NBLOCKS, alpha)
 
-     starpu_shutdown()
 
- end
 
- main()
 
 
  |