| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 | # StarPU --- Runtime system for heterogeneous multicore architectures.## Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria## StarPU is free software; you can redistribute it and/or modify# it under the terms of the GNU Lesser General Public License as published by# the Free Software Foundation; either version 2.1 of the License, or (at# your option) any later version.## StarPU is distributed in the hope that it will be useful, but# WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.## See the GNU Lesser General Public License in COPYING.LGPL for more details.#using StarPUusing Printfconst EPSILON = 1e-6function check(alpha, X, Y)    for i in 1:length(X)        expected_value = alpha * X[i] + 4.0        if abs(Y[i] - expected_value) > expected_value * EPSILON            error("at ", i, ", ", alpha, "*", X[i], "+4.0=", Y[i], ", expected ", expected_value)        end    endend@target STARPU_CPU+STARPU_CUDA@codelet function axpy(X :: Vector{Float32}, Y :: Vector{Float32}, alpha ::Float32) :: Nothing    STARPU_SAXPY(length(X), alpha, X, 1, Y, 1)    returnendfunction axpy(N, NBLOCKS, alpha, display = true)    X = Array(fill(1.0f0, N))    Y = Array(fill(4.0f0, N))    starpu_memory_pin(X)    starpu_memory_pin(Y)    block_filter = starpu_data_filter(STARPU_VECTOR_FILTER_BLOCK, NBLOCKS)    if display        println("BEFORE x[0] = ", X[1])        println("BEFORE y[0] = ", Y[1])    end    t_start = time_ns()    @starpu_block let        hX,hY = starpu_data_register(X, Y)        starpu_data_partition(hX, block_filter)        starpu_data_partition(hY, block_filter)        for b in 1:NBLOCKS            starpu_task_insert(codelet_name = "axpy",                               handles = [hX[b], hY[b]],                               cl_arg = (Float32(alpha),),                               tag = starpu_tag_t(b),                               modes = [STARPU_R, STARPU_RW])        end        starpu_task_wait_for_all()    end    t_end = time_ns()    timing = (t_end-t_start)/1000    if display        @printf("timing -> %d us %.2f MB/s\n", timing, 3*N*4/timing)        println("AFTER y[0] = ", Y[1], " (ALPHA=", alpha, ")")    end    check(alpha, X, Y)    starpu_memory_unpin(X)    starpu_memory_unpin(Y)endfunction main()    N = 16 * 1024 * 1024    NBLOCKS = 8    alpha = 3.41    starpu_init()    starpu_cublas_init()    # warmup    axpy(10, 1, alpha, false)    axpy(N, NBLOCKS, alpha)    starpu_shutdown()endmain()
 |