123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832 |
- @c -*-texinfo-*-
- @c This file is part of the StarPU Handbook.
- @c Copyright (C) 2009--2011 Universit@'e de Bordeaux 1
- @c Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
- @c Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
- @c See the file starpu.texi for copying conditions.
- @menu
- * Versioning::
- * Initialization and Termination::
- * Workers' Properties::
- * Data Management::
- * Data Interfaces::
- * Data Partition::
- * Codelets and Tasks::
- * Explicit Dependencies::
- * Implicit Data Dependencies::
- * Performance Model API::
- * Profiling API::
- * CUDA extensions::
- * OpenCL extensions::
- * Miscellaneous helpers::
- @end menu
- @node Versioning
- @section Versioning
- @defmac STARPU_MAJOR_VERSION
- Define the major version of StarPU
- @end defmac
- @defmac STARPU_MINOR_VERSION
- Define the minor version of StarPU
- @end defmac
- @node Initialization and Termination
- @section Initialization and Termination
- @deftypefun int starpu_init ({struct starpu_conf *}@var{conf})
- This is StarPU initialization method, which must be called prior to any other
- StarPU call. It is possible to specify StarPU's configuration (e.g. scheduling
- policy, number of cores, ...) by passing a non-null argument. Default
- configuration is used if the passed argument is @code{NULL}.
- Upon successful completion, this function returns 0. Otherwise, @code{-ENODEV}
- indicates that no worker was available (so that StarPU was not initialized).
- @end deftypefun
- @deftp {Data Type} {struct starpu_driver}
- @table @asis
- @item @code{enum starpu_archtype type}
- The type of the driver. Only STARPU_CPU_DRIVER, STARPU_CUDA_DRIVER and
- STARPU_OPENCL_DRIVER are currently supported.
- @item @code{union id} Anonymous union
- @table @asis
- @item @code{unsigned cpu_id}
- Should only be used if type is STARPU_CPU_WORKER.
- @item @code{unsigned cuda_id}
- Should only be used if type is STARPU_CUDA_WORKER.
- @item @code{cl_device_id opencl_id}
- Should only be used if type is STARPU_OPENCL_WORKER.
- @end table
- @end table
- @end deftp
- @deftp {Data Type} {struct starpu_conf}
- This structure is passed to the @code{starpu_init} function in order
- to configure StarPU. It has to be initialized with @code{starpu_conf_init}.
- When the default value is used, StarPU automatically selects the number of
- processing units and takes the default scheduling policy. The environment
- variables overwrite the equivalent parameters.
- @table @asis
- @item @code{const char *sched_policy_name} (default = NULL)
- This is the name of the scheduling policy. This can also be specified
- with the @code{STARPU_SCHED} environment variable.
- @item @code{struct starpu_sched_policy *sched_policy} (default = NULL)
- This is the definition of the scheduling policy. This field is ignored
- if @code{sched_policy_name} is set.
- @item @code{int ncpus} (default = -1)
- This is the number of CPU cores that StarPU can use. This can also be
- specified with the @code{STARPU_NCPU} environment variable.
- @item @code{int ncuda} (default = -1)
- This is the number of CUDA devices that StarPU can use. This can also
- be specified with the @code{STARPU_NCUDA} environment variable.
- @item @code{int nopencl} (default = -1)
- This is the number of OpenCL devices that StarPU can use. This can
- also be specified with the @code{STARPU_NOPENCL} environment variable.
- @item @code{unsigned use_explicit_workers_bindid} (default = 0)
- If this flag is set, the @code{workers_bindid} array indicates where the
- different workers are bound, otherwise StarPU automatically selects where to
- bind the different workers. This can also be specified with the
- @code{STARPU_WORKERS_CPUID} environment variable.
- @item @code{unsigned workers_bindid[STARPU_NMAXWORKERS]}
- If the @code{use_explicit_workers_bindid} flag is set, this array
- indicates where to bind the different workers. The i-th entry of the
- @code{workers_bindid} indicates the logical identifier of the
- processor which should execute the i-th worker. Note that the logical
- ordering of the CPUs is either determined by the OS, or provided by
- the @code{hwloc} library in case it is available.
- @item @code{unsigned use_explicit_workers_cuda_gpuid} (default = 0)
- If this flag is set, the CUDA workers will be attached to the CUDA devices
- specified in the @code{workers_cuda_gpuid} array. Otherwise, StarPU affects the
- CUDA devices in a round-robin fashion. This can also be specified with the
- @code{STARPU_WORKERS_CUDAID} environment variable.
- @item @code{unsigned workers_cuda_gpuid[STARPU_NMAXWORKERS]}
- If the @code{use_explicit_workers_cuda_gpuid} flag is set, this array
- contains the logical identifiers of the CUDA devices (as used by
- @code{cudaGetDevice}).
- @item @code{unsigned use_explicit_workers_opencl_gpuid} (default = 0)
- If this flag is set, the OpenCL workers will be attached to the OpenCL devices
- specified in the @code{workers_opencl_gpuid} array. Otherwise, StarPU affects
- the OpenCL devices in a round-robin fashion. This can also be specified with
- the @code{STARPU_WORKERS_OPENCLID} environment variable.
- @item @code{unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS]}
- If the @code{use_explicit_workers_opencl_gpuid} flag is set, this array
- contains the logical identifiers of the OpenCL devices to be used.
- @item @code{int calibrate} (default = 0)
- If this flag is set, StarPU will calibrate the performance models when
- executing tasks. If this value is equal to @code{-1}, the default value is
- used. If the value is equal to @code{1}, it will force continuing
- calibration. If the value is equal to @code{2}, the existing performance
- models will be overwritten. This can also be specified with the
- @code{STARPU_CALIBRATE} environment variable.
- @item @code{int bus_calibrate} (default = 0)
- If this flag is set, StarPU will recalibrate the bus. If this value is equal
- to @code{-1}, the default value is used. This can also be specified with the
- @code{STARPU_BUS_CALIBRATE} environment variable.
- @item @code{int single_combined_worker} (default = 0)
- By default, StarPU executes parallel tasks concurrently.
- Some parallel libraries (e.g. most OpenMP implementations) however do
- not support concurrent calls to parallel code. In such case, setting this flag
- makes StarPU only start one parallel task at a time (but other
- CPU and GPU tasks are not affected and can be run concurrently). The parallel
- task scheduler will however still however still try varying combined worker
- sizes to look for the most efficient ones.
- This can also be specified with the @code{STARPU_SINGLE_COMBINED_WORKER} environment variable.
- @item @code{int disable_asynchronous_copy} (default = 0)
- This flag should be set to 1 to disable asynchronous copies between
- CPUs and all accelerators. This can also be specified with the
- @code{STARPU_DISABLE_ASYNCHRONOUS_COPY} environment variable.
- The AMD implementation of OpenCL is known to
- fail when copying data asynchronously. When using this implementation,
- it is therefore necessary to disable asynchronous data transfers.
- This can also be specified at compilation time by giving to the
- configure script the option @code{--disable-asynchronous-copy}.
- @item @code{int disable_asynchronous_cuda_copy} (default = 0)
- This flag should be set to 1 to disable asynchronous copies between
- CPUs and CUDA accelerators. This can also be specified with the
- @code{STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY} environment variable.
- This can also be specified at compilation time by giving to the
- configure script the option @code{--disable-asynchronous-cuda-copy}.
- @item @code{int disable_asynchronous_opencl_copy} (default = 0)
- This flag should be set to 1 to disable asynchronous copies between
- CPUs and OpenCL accelerators. This can also be specified with the
- @code{STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY} environment variable.
- The AMD implementation of OpenCL is known to
- fail when copying data asynchronously. When using this implementation,
- it is therefore necessary to disable asynchronous data transfers.
- This can also be specified at compilation time by giving to the
- configure script the option @code{--disable-asynchronous-opencl-copy}.
- @item @code{int *cuda_opengl_interoperability} (default = NULL)
- This can be set to an array of CUDA device identifiers for which
- @code{cudaGLSetGLDevice} should be called instead of @code{cudaSetDevice}. Its
- size is specified by the @code{n_cuda_opengl_interoperability} field below
- @item @code{int *n_cuda_opengl_interoperability} (default = 0)
- This has to be set to the size of the array pointed to by the
- @code{cuda_opengl_interoperability} field.
- @item @code{struct starpu_driver *not_launched_drivers}
- The drivers that should not be launched by StarPU.
- @item @code{unsigned n_not_launched_drivers}
- The number of StarPU drivers that should not be launched by StarPU.
- @item @code{trace_buffer_size}
- Specifies the buffer size used for FxT tracing. Starting from FxT version
- 0.2.12, the buffer will automatically be flushed when it fills in, but it may
- still be interesting to specify a bigger value to avoid any flushing (which
- would disturb the trace).
- @end table
- @end deftp
- @deftypefun int starpu_conf_init ({struct starpu_conf *}@var{conf})
- This function initializes the @var{conf} structure passed as argument
- with the default values. In case some configuration parameters are already
- specified through environment variables, @code{starpu_conf_init} initializes
- the fields of the structure according to the environment variables. For
- instance if @code{STARPU_CALIBRATE} is set, its value is put in the
- @code{.calibrate} field of the structure passed as argument.
- Upon successful completion, this function returns 0. Otherwise, @code{-EINVAL}
- indicates that the argument was NULL.
- @end deftypefun
- @deftypefun void starpu_shutdown (void)
- This is StarPU termination method. It must be called at the end of the
- application: statistics and other post-mortem debugging information are not
- guaranteed to be available until this method has been called.
- @end deftypefun
- @deftypefun int starpu_asynchronous_copy_disabled (void)
- Return 1 if asynchronous data transfers between CPU and accelerators
- are disabled.
- @end deftypefun
- @deftypefun int starpu_asynchronous_cuda_copy_disabled (void)
- Return 1 if asynchronous data transfers between CPU and CUDA accelerators
- are disabled.
- @end deftypefun
- @deftypefun int starpu_asynchronous_opencl_copy_disabled (void)
- Return 1 if asynchronous data transfers between CPU and OpenCL accelerators
- are disabled.
- @end deftypefun
- @node Workers' Properties
- @section Workers' Properties
- @deftp {Data Type} {enum starpu_archtype}
- The different values are:
- @table @asis
- @item @code{STARPU_CPU_WORKER}
- @item @code{STARPU_CUDA_WORKER}
- @item @code{STARPU_OPENCL_WORKER}
- @end table
- @end deftp
- @deftypefun unsigned starpu_worker_get_count (void)
- This function returns the number of workers (i.e. processing units executing
- StarPU tasks). The returned value should be at most @code{STARPU_NMAXWORKERS}.
- @end deftypefun
- @deftypefun int starpu_worker_get_count_by_type ({enum starpu_archtype} @var{type})
- Returns the number of workers of the given @var{type}. A positive
- (or @code{NULL}) value is returned in case of success, @code{-EINVAL} indicates that
- the type is not valid otherwise.
- @end deftypefun
- @deftypefun unsigned starpu_cpu_worker_get_count (void)
- This function returns the number of CPUs controlled by StarPU. The returned
- value should be at most @code{STARPU_MAXCPUS}.
- @end deftypefun
- @deftypefun unsigned starpu_cuda_worker_get_count (void)
- This function returns the number of CUDA devices controlled by StarPU. The returned
- value should be at most @code{STARPU_MAXCUDADEVS}.
- @end deftypefun
- @deftypefun unsigned starpu_opencl_worker_get_count (void)
- This function returns the number of OpenCL devices controlled by StarPU. The returned
- value should be at most @code{STARPU_MAXOPENCLDEVS}.
- @end deftypefun
- @deftypefun int starpu_worker_get_id (void)
- This function returns the identifier of the current worker, i.e the one associated to the calling
- thread. The returned value is either -1 if the current context is not a StarPU
- worker (i.e. when called from the application outside a task or a callback), or
- an integer between 0 and @code{starpu_worker_get_count() - 1}.
- @end deftypefun
- @deftypefun int starpu_worker_get_ids_by_type ({enum starpu_archtype} @var{type}, int *@var{workerids}, int @var{maxsize})
- This function gets the list of identifiers of workers with the given
- type. It fills the workerids array with the identifiers of the workers that have the type
- indicated in the first argument. The maxsize argument indicates the size of the
- workids array. The returned value gives the number of identifiers that were put
- in the array. @code{-ERANGE} is returned is maxsize is lower than the number of
- workers with the appropriate type: in that case, the array is filled with the
- maxsize first elements. To avoid such overflows, the value of maxsize can be
- chosen by the means of the @code{starpu_worker_get_count_by_type} function, or
- by passing a value greater or equal to @code{STARPU_NMAXWORKERS}.
- @end deftypefun
- @deftypefun int starpu_worker_get_by_type ({enum starpu_archtype} @var{type}, int @var{num})
- This returns the identifier of the @var{num}-th worker that has the specified type
- @var{type}. If there are no such worker, -1 is returned.
- @end deftypefun
- @deftypefun int starpu_worker_get_by_devid ({enum starpu_archtype} @var{type}, int @var{devid})
- This returns the identifier of the worker that has the specified type
- @var{type} and devid @var{devid} (which may not be the n-th, if some devices are
- skipped for instance). If there are no such worker, -1 is returned.
- @end deftypefun
- @deftypefun int starpu_worker_get_devid (int @var{id})
- This functions returns the device id of the given worker. The worker
- should be identified with the value returned by the @code{starpu_worker_get_id} function. In the case of a
- CUDA worker, this device identifier is the logical device identifier exposed by
- CUDA (used by the @code{cudaGetDevice} function for instance). The device
- identifier of a CPU worker is the logical identifier of the core on which the
- worker was bound; this identifier is either provided by the OS or by the
- @code{hwloc} library in case it is available.
- @end deftypefun
- @deftypefun {enum starpu_archtype} starpu_worker_get_type (int @var{id})
- This function returns the type of processing unit associated to a
- worker. The worker identifier is a value returned by the
- @code{starpu_worker_get_id} function). The returned value
- indicates the architecture of the worker: @code{STARPU_CPU_WORKER} for a CPU
- core, @code{STARPU_CUDA_WORKER} for a CUDA device, and
- @code{STARPU_OPENCL_WORKER} for a OpenCL device. The value returned for an invalid
- identifier is unspecified.
- @end deftypefun
- @deftypefun void starpu_worker_get_name (int @var{id}, char *@var{dst}, size_t @var{maxlen})
- This function allows to get the name of a given worker.
- StarPU associates a unique human readable string to each processing unit. This
- function copies at most the @var{maxlen} first bytes of the unique string
- associated to a worker identified by its identifier @var{id} into the
- @var{dst} buffer. The caller is responsible for ensuring that the @var{dst}
- is a valid pointer to a buffer of @var{maxlen} bytes at least. Calling this
- function on an invalid identifier results in an unspecified behaviour.
- @end deftypefun
- @deftypefun unsigned starpu_worker_get_memory_node (unsigned @var{workerid})
- This function returns the identifier of the memory node associated to the
- worker identified by @var{workerid}.
- @end deftypefun
- @deftp {Data Type} {enum starpu_node_kind}
- todo
- @table @asis
- @item @code{STARPU_UNUSED}
- @item @code{STARPU_CPU_RAM}
- @item @code{STARPU_CUDA_RAM}
- @item @code{STARPU_OPENCL_RAM}
- @end table
- @end deftp
- @deftypefun {enum starpu_node_kind} starpu_node_get_kind (unsigned @var{node})
- Returns the type of the given node as defined by @code{enum
- starpu_node_kind}. For example, when defining a new data interface,
- this function should be used in the allocation function to determine
- on which device the memory needs to be allocated.
- @end deftypefun
- @node Data Management
- @section Data Management
- @menu
- * Introduction to Data Management::
- * Basic Data Management API::
- * Access registered data from the application::
- @end menu
- This section describes the data management facilities provided by StarPU.
- We show how to use existing data interfaces in @ref{Data Interfaces}, but developers can
- design their own data interfaces if required.
- @node Introduction to Data Management
- @subsection Introduction
- Data management is done at a high-level in StarPU: rather than accessing a mere
- list of contiguous buffers, the tasks may manipulate data that are described by
- a high-level construct which we call data interface.
- An example of data interface is the "vector" interface which describes a
- contiguous data array on a spefic memory node. This interface is a simple
- structure containing the number of elements in the array, the size of the
- elements, and the address of the array in the appropriate address space (this
- address may be invalid if there is no valid copy of the array in the memory
- node). More informations on the data interfaces provided by StarPU are
- given in @ref{Data Interfaces}.
- When a piece of data managed by StarPU is used by a task, the task
- implementation is given a pointer to an interface describing a valid copy of
- the data that is accessible from the current processing unit.
- Every worker is associated to a memory node which is a logical abstraction of
- the address space from which the processing unit gets its data. For instance,
- the memory node associated to the different CPU workers represents main memory
- (RAM), the memory node associated to a GPU is DRAM embedded on the device.
- Every memory node is identified by a logical index which is accessible from the
- @code{starpu_worker_get_memory_node} function. When registering a piece of data
- to StarPU, the specified memory node indicates where the piece of data
- initially resides (we also call this memory node the home node of a piece of
- data).
- @node Basic Data Management API
- @subsection Basic Data Management API
- @deftypefun int starpu_malloc (void **@var{A}, size_t @var{dim})
- This function allocates data of the given size in main memory. It will also try to pin it in
- CUDA or OpenCL, so that data transfers from this buffer can be asynchronous, and
- thus permit data transfer and computation overlapping. The allocated buffer must
- be freed thanks to the @code{starpu_free} function.
- @end deftypefun
- @deftypefun void starpu_malloc_set_align (size_t @var{align})
- This functions sets an alignment constraints for @code{starpu_malloc}
- allocations. @var{align} must be a power of two. This is for instance called
- automatically by the OpenCL driver to specify its own alignment constraints.
- @end deftypefun
- @deftypefun int starpu_free (void *@var{A})
- This function frees memory which has previously allocated with
- @code{starpu_malloc}.
- @end deftypefun
- @deftp {Data Type} {enum starpu_access_mode}
- This datatype describes a data access mode. The different available modes are:
- @table @asis
- @item @code{STARPU_R}: read-only mode.
- @item @code{STARPU_W}: write-only mode.
- @item @code{STARPU_RW}: read-write mode.
- This is equivalent to @code{STARPU_R|STARPU_W}.
- @item @code{STARPU_SCRATCH}: scratch memory.
- A temporary buffer is allocated for the task, but StarPU does not
- enforce data consistency---i.e. each device has its own buffer,
- independently from each other (even for CPUs), and no data transfer is
- ever performed. This is useful for temporary variables to avoid
- allocating/freeing buffers inside each task.
- Currently, no behavior is defined concerning the relation with the
- @code{STARPU_R} and @code{STARPU_W} modes and the value provided at
- registration---i.e., the value of the scratch buffer is undefined at
- entry of the codelet function. It is being considered for future
- extensions at least to define the initial value. For now, data to be
- used in @code{SCRATCH} mode should be registered with node @code{-1} and
- a @code{NULL} pointer, since the value of the provided buffer is simply
- ignored for now.
- @item @code{STARPU_REDUX}: reduction mode. TODO!
- @end table
- @end deftp
- @deftp {Data Type} {starpu_data_handle_t}
- StarPU uses @code{starpu_data_handle_t} as an opaque handle to manage a piece of
- data. Once a piece of data has been registered to StarPU, it is associated to a
- @code{starpu_data_handle_t} which keeps track of the state of the piece of data
- over the entire machine, so that we can maintain data consistency and locate
- data replicates for instance.
- @end deftp
- @deftypefun void starpu_data_register (starpu_data_handle_t *@var{handleptr}, unsigned @var{home_node}, void *@var{data_interface}, {struct starpu_data_interface_ops} *@var{ops})
- Register a piece of data into the handle located at the @var{handleptr}
- address. The @var{data_interface} buffer contains the initial description of the
- data in the home node. The @var{ops} argument is a pointer to a structure
- describing the different methods used to manipulate this type of interface. See
- @ref{struct starpu_data_interface_ops} for more details on this structure.
- If @code{home_node} is -1, StarPU will automatically
- allocate the memory when it is used for the
- first time in write-only mode. Once such data handle has been automatically
- allocated, it is possible to access it using any access mode.
- Note that StarPU supplies a set of predefined types of interface (e.g. vector or
- matrix) which can be registered by the means of helper functions (e.g.
- @code{starpu_vector_data_register} or @code{starpu_matrix_data_register}).
- @end deftypefun
- @deftypefun void starpu_data_register_same ({starpu_data_handle_t *}@var{handledst}, starpu_data_handle_t @var{handlesrc})
- Register a new piece of data into the handle @var{handledst} with the
- same interface as the handle @var{handlesrc}.
- @end deftypefun
- @deftypefun void starpu_data_unregister (starpu_data_handle_t @var{handle})
- This function unregisters a data handle from StarPU. If the data was
- automatically allocated by StarPU because the home node was -1, all
- automatically allocated buffers are freed. Otherwise, a valid copy of the data
- is put back into the home node in the buffer that was initially registered.
- Using a data handle that has been unregistered from StarPU results in an
- undefined behaviour.
- @end deftypefun
- @deftypefun void starpu_data_unregister_no_coherency (starpu_data_handle_t @var{handle})
- This is the same as starpu_data_unregister, except that StarPU does not put back
- a valid copy into the home node, in the buffer that was initially registered.
- @end deftypefun
- @deftypefun void starpu_data_unregister_submit (starpu_data_handle_t @var{handle})
- Destroy the data handle once it is not needed anymore by any submitted
- task. No coherency is assumed.
- @end deftypefun
- @deftypefun void starpu_data_invalidate (starpu_data_handle_t @var{handle})
- Destroy all replicates of the data handle immediately. After data invalidation,
- the first access to the handle must be performed in write-only mode.
- Accessing an invalidated data in read-mode results in undefined
- behaviour.
- @end deftypefun
- @deftypefun void starpu_data_invalidate_submit (starpu_data_handle_t @var{handle})
- Submits invalidation of the data handle after completion of previously submitted tasks.
- @end deftypefun
- @c TODO create a specific sections about user interaction with the DSM ?
- @deftypefun void starpu_data_set_wt_mask (starpu_data_handle_t @var{handle}, uint32_t @var{wt_mask})
- This function sets the write-through mask of a given data, i.e. a bitmask of
- nodes where the data should be always replicated after modification. It also
- prevents the data from being evicted from these nodes when memory gets scarse.
- @end deftypefun
- @deftypefun int starpu_data_prefetch_on_node (starpu_data_handle_t @var{handle}, unsigned @var{node}, unsigned @var{async})
- Issue a prefetch request for a given data to a given node, i.e.
- requests that the data be replicated to the given node, so that it is available
- there for tasks. If the @var{async} parameter is 0, the call will block until
- the transfer is achieved, else the call will return as soon as the request is
- scheduled (which may however have to wait for a task completion).
- @end deftypefun
- @deftypefun starpu_data_handle_t starpu_data_lookup ({const void *}@var{ptr})
- Return the handle corresponding to the data pointed to by the @var{ptr}
- host pointer.
- @end deftypefun
- @deftypefun int starpu_data_request_allocation (starpu_data_handle_t @var{handle}, unsigned @var{node})
- Explicitly ask StarPU to allocate room for a piece of data on the specified
- memory node.
- @end deftypefun
- @deftypefun void starpu_data_query_status (starpu_data_handle_t @var{handle}, int @var{memory_node}, {int *}@var{is_allocated}, {int *}@var{is_valid}, {int *}@var{is_requested})
- Query the status of the handle on the specified memory node.
- @end deftypefun
- @deftypefun void starpu_data_advise_as_important (starpu_data_handle_t @var{handle}, unsigned @var{is_important})
- This function allows to specify that a piece of data can be discarded
- without impacting the application.
- @end deftypefun
- @deftypefun void starpu_data_set_reduction_methods (starpu_data_handle_t @var{handle}, {struct starpu_codelet *}@var{redux_cl}, {struct starpu_codelet *}@var{init_cl})
- This sets the codelets to be used for the @var{handle} when it is accessed in
- REDUX mode. Per-worker buffers will be initialized with the @var{init_cl}
- codelet, and reduction between per-worker buffers will be done with the
- @var{redux_cl} codelet.
- @end deftypefun
- @node Access registered data from the application
- @subsection Access registered data from the application
- @deftypefun int starpu_data_acquire (starpu_data_handle_t @var{handle}, {enum starpu_access_mode} @var{mode})
- The application must call this function prior to accessing registered data from
- main memory outside tasks. StarPU ensures that the application will get an
- up-to-date copy of the data in main memory located where the data was
- originally registered, and that all concurrent accesses (e.g. from tasks) will
- be consistent with the access mode specified in the @var{mode} argument.
- @code{starpu_data_release} must be called once the application does not need to
- access the piece of data anymore. Note that implicit data
- dependencies are also enforced by @code{starpu_data_acquire}, i.e.
- @code{starpu_data_acquire} will wait for all tasks scheduled to work on
- the data, unless they have been disabled explictly by calling
- @code{starpu_data_set_default_sequential_consistency_flag} or
- @code{starpu_data_set_sequential_consistency_flag}.
- @code{starpu_data_acquire} is a blocking call, so that it cannot be called from
- tasks or from their callbacks (in that case, @code{starpu_data_acquire} returns
- @code{-EDEADLK}). Upon successful completion, this function returns 0.
- @end deftypefun
- @deftypefun int starpu_data_acquire_cb (starpu_data_handle_t @var{handle}, {enum starpu_access_mode} @var{mode}, void (*@var{callback})(void *), void *@var{arg})
- @code{starpu_data_acquire_cb} is the asynchronous equivalent of
- @code{starpu_data_acquire}. When the data specified in the first argument is
- available in the appropriate access mode, the callback function is executed.
- The application may access the requested data during the execution of this
- callback. The callback function must call @code{starpu_data_release} once the
- application does not need to access the piece of data anymore.
- Note that implicit data dependencies are also enforced by
- @code{starpu_data_acquire_cb} in case they are not disabled.
- Contrary to @code{starpu_data_acquire}, this function is non-blocking and may
- be called from task callbacks. Upon successful completion, this function
- returns 0.
- @end deftypefun
- @deftypefun int starpu_data_acquire_on_node (starpu_data_handle_t @var{handle}, unsigned @var{node}, {enum starpu_access_mode} @var{mode})
- This is the same as @code{starpu_data_acquire}, except that the data will be
- available on the given memory node instead of main memory.
- @end deftypefun
- @deftypefun int starpu_data_acquire_on_node_cb (starpu_data_handle_t @var{handle}, unsigned @var{node}, {enum starpu_access_mode} @var{mode}, void (*@var{callback})(void *), void *@var{arg})
- This is the same as @code{starpu_data_acquire_cb}, except that the data will be
- available on the given memory node instead of main memory.
- @end deftypefun
- @defmac STARPU_DATA_ACQUIRE_CB (starpu_data_handle_t @var{handle}, {enum starpu_access_mode} @var{mode}, code)
- @code{STARPU_DATA_ACQUIRE_CB} is the same as @code{starpu_data_acquire_cb},
- except that the code to be executed in a callback is directly provided as a
- macro parameter, and the data handle is automatically released after it. This
- permits to easily execute code which depends on the value of some registered
- data. This is non-blocking too and may be called from task callbacks.
- @end defmac
- @deftypefun void starpu_data_release (starpu_data_handle_t @var{handle})
- This function releases the piece of data acquired by the application either by
- @code{starpu_data_acquire} or by @code{starpu_data_acquire_cb}.
- @end deftypefun
- @deftypefun void starpu_data_release_on_node (starpu_data_handle_t @var{handle}, unsigned @var{node})
- This is the same as @code{starpu_data_release}, except that the data will be
- available on the given memory node instead of main memory.
- @end deftypefun
- @node Data Interfaces
- @section Data Interfaces
- @menu
- * Registering Data::
- * Accessing Data Interfaces::
- * Defining Interface::
- @end menu
- @node Registering Data
- @subsection Registering Data
- There are several ways to register a memory region so that it can be managed by
- StarPU. The functions below allow the registration of vectors, 2D matrices, 3D
- matrices as well as BCSR and CSR sparse matrices.
- @deftypefun void starpu_void_data_register ({starpu_data_handle_t *}@var{handle})
- Register a void interface. There is no data really associated to that
- interface, but it may be used as a synchronization mechanism. It also
- permits to express an abstract piece of data that is managed by the
- application internally: this makes it possible to forbid the
- concurrent execution of different tasks accessing the same "void" data
- in read-write concurrently.
- @end deftypefun
- @deftypefun void starpu_variable_data_register ({starpu_data_handle_t *}@var{handle}, unsigned @var{home_node}, uintptr_t @var{ptr}, size_t @var{size})
- Register the @var{size}-byte element pointed to by @var{ptr}, which is
- typically a scalar, and initialize @var{handle} to represent this data
- item.
- @cartouche
- @smallexample
- float var;
- starpu_data_handle_t var_handle;
- starpu_variable_data_register(&var_handle, 0, (uintptr_t)&var, sizeof(var));
- @end smallexample
- @end cartouche
- @end deftypefun
- @deftypefun void starpu_vector_data_register ({starpu_data_handle_t *}@var{handle}, unsigned @var{home_node}, uintptr_t @var{ptr}, uint32_t @var{nx}, size_t @var{elemsize})
- Register the @var{nx} @var{elemsize}-byte elements pointed to by
- @var{ptr} and initialize @var{handle} to represent it.
- @cartouche
- @smallexample
- float vector[NX];
- starpu_data_handle_t vector_handle;
- starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX,
- sizeof(vector[0]));
- @end smallexample
- @end cartouche
- @end deftypefun
- @deftypefun void starpu_matrix_data_register ({starpu_data_handle_t *}@var{handle}, unsigned @var{home_node}, uintptr_t @var{ptr}, uint32_t @var{ld}, uint32_t @var{nx}, uint32_t @var{ny}, size_t @var{elemsize})
- Register the @var{nx}x@var{ny} 2D matrix of @var{elemsize}-byte elements
- pointed by @var{ptr} and initialize @var{handle} to represent it.
- @var{ld} specifies the number of elements between rows.
- a value greater than @var{nx} adds padding, which can be useful for
- alignment purposes.
- @cartouche
- @smallexample
- float *matrix;
- starpu_data_handle_t matrix_handle;
- matrix = (float*)malloc(width * height * sizeof(float));
- starpu_matrix_data_register(&matrix_handle, 0, (uintptr_t)matrix,
- width, width, height, sizeof(float));
- @end smallexample
- @end cartouche
- @end deftypefun
- @deftypefun void starpu_block_data_register ({starpu_data_handle_t *}@var{handle}, unsigned @var{home_node}, uintptr_t @var{ptr}, uint32_t @var{ldy}, uint32_t @var{ldz}, uint32_t @var{nx}, uint32_t @var{ny}, uint32_t @var{nz}, size_t @var{elemsize})
- Register the @var{nx}x@var{ny}x@var{nz} 3D matrix of @var{elemsize}-byte
- elements pointed by @var{ptr} and initialize @var{handle} to represent
- it. Again, @var{ldy} and @var{ldz} specify the number of elements
- between rows and between z planes.
- @cartouche
- @smallexample
- float *block;
- starpu_data_handle_t block_handle;
- block = (float*)malloc(nx*ny*nz*sizeof(float));
- starpu_block_data_register(&block_handle, 0, (uintptr_t)block,
- nx, nx*ny, nx, ny, nz, sizeof(float));
- @end smallexample
- @end cartouche
- @end deftypefun
- @deftypefun void starpu_bcsr_data_register (starpu_data_handle_t *@var{handle}, unsigned @var{home_node}, uint32_t @var{nnz}, uint32_t @var{nrow}, uintptr_t @var{nzval}, uint32_t *@var{colind}, uint32_t *@var{rowptr}, uint32_t @var{firstentry}, uint32_t @var{r}, uint32_t @var{c}, size_t @var{elemsize})
- This variant of @code{starpu_data_register} uses the BCSR (Blocked
- Compressed Sparse Row Representation) sparse matrix interface.
- Register the sparse matrix made of @var{nnz} non-zero blocks of elements of size
- @var{elemsize} stored in @var{nzval} and initializes @var{handle} to represent
- it. Blocks have size @var{r} * @var{c}. @var{nrow} is the number of rows (in
- terms of blocks), @code{colind[i]} is the block-column index for block @code{i}
- in @code{nzval}, @code{rowptr[i]} is the block-index (in nzval) of the first block of row @code{i}.
- @var{firstentry} is the index of the first entry of the given arrays (usually 0
- or 1).
- @end deftypefun
- @deftypefun void starpu_csr_data_register (starpu_data_handle_t *@var{handle}, unsigned @var{home_node}, uint32_t @var{nnz}, uint32_t @var{nrow}, uintptr_t @var{nzval}, uint32_t *@var{colind}, uint32_t *@var{rowptr}, uint32_t @var{firstentry}, size_t @var{elemsize})
- This variant of @code{starpu_data_register} uses the CSR (Compressed
- Sparse Row Representation) sparse matrix interface.
- TODO
- @end deftypefun
- @deftypefun void starpu_coo_data_register (starpu_data_handle_t *@var{handleptr}, unsigned @var{home_node}, uint32_t @var{nx}, uint32_t @var{ny}, uint32_t @var{n_values}, uint32_t *@var{columns}, uint32_t *@var{rows}, uintptr_t @var{values}, size_t @var{elemsize});
- Register the @var{nx}x@var{ny} 2D matrix given in the COO format, using the
- @var{columns}, @var{rows}, @var{values} arrays, which must have @var{n_values}
- elements of size @var{elemsize}. Initialize @var{handleptr}.
- @end deftypefun
- @deftypefun {void *} starpu_data_get_interface_on_node (starpu_data_handle_t @var{handle}, unsigned @var{memory_node})
- Return the interface associated with @var{handle} on @var{memory_node}.
- @end deftypefun
- @node Accessing Data Interfaces
- @subsection Accessing Data Interfaces
- Each data interface is provided with a set of field access functions.
- The ones using a @code{void *} parameter aimed to be used in codelet
- implementations (see for example the code in @ref{Vector Scaling Using StarPU's API}).
- @deftp {Data Type} {enum starpu_data_interface_id}
- The different values are:
- @table @asis
- @item @code{STARPU_MATRIX_INTERFACE_ID}
- @item @code{STARPU_BLOCK_INTERFACE_ID}
- @item @code{STARPU_VECTOR_INTERFACE_ID}
- @item @code{STARPU_CSR_INTERFACE_ID}
- @item @code{STARPU_BCSR_INTERFACE_ID}
- @item @code{STARPU_VARIABLE_INTERFACE_ID}
- @item @code{STARPU_VOID_INTERFACE_ID}
- @item @code{STARPU_MULTIFORMAT_INTERFACE_ID}
- @item @code{STARPU_COO_INTERCACE_ID}
- @item @code{STARPU_NINTERFACES_ID}: number of data interfaces
- @end table
- @end deftp
- @menu
- * Accessing Handle::
- * Accessing Variable Data Interfaces::
- * Accessing Vector Data Interfaces::
- * Accessing Matrix Data Interfaces::
- * Accessing Block Data Interfaces::
- * Accessing BCSR Data Interfaces::
- * Accessing CSR Data Interfaces::
- * Accessing COO Data Interfaces::
- @end menu
- @node Accessing Handle
- @subsubsection Handle
- @deftypefun {void *} starpu_handle_to_pointer (starpu_data_handle_t @var{handle}, unsigned @var{node})
- Return the pointer associated with @var{handle} on node @var{node} or
- @code{NULL} if @var{handle}'s interface does not support this
- operation or data for this handle is not allocated on that node.
- @end deftypefun
- @deftypefun {void *} starpu_handle_get_local_ptr (starpu_data_handle_t @var{handle})
- Return the local pointer associated with @var{handle} or @code{NULL}
- if @var{handle}'s interface does not have data allocated locally
- @end deftypefun
- @deftypefun {enum starpu_data_interface_id} starpu_handle_get_interface_id (starpu_data_handle_t @var{handle})
- Return the unique identifier of the interface associated with the given @var{handle}.
- @end deftypefun
- @deftypefun size_t starpu_handle_get_size (starpu_data_handle_t @var{handle})
- Return the size of the data associated with @var{handle}
- @end deftypefun
- @deftypefun int starpu_handle_pack_data (starpu_data_handle_t @var{handle}, {void **}@var{ptr}, {size_t *}@var{count})
- Execute the packing operation of the interface of the data registered
- at @var{handle} (@pxref{struct starpu_data_interface_ops}). This
- packing operation must allocate a buffer large enough at @var{ptr} and
- copy into the newly allocated buffer the data associated to
- @var{handle}.
- The function also sets @var{count} to the size of the data handle by calling
- @code{starpu_handle_get_size()}.
- @end deftypefun
- @deftypefun int starpu_handle_unpack_data (starpu_data_handle_t @var{handle}, {void *}@var{ptr}, size_t @var{count})
- Unpack in @var{handle} the data located at @var{ptr} of size
- @var{count} as described by the interface of the data. The interface
- registered at @var{handle} must define a unpacking operation
- (@pxref{struct starpu_data_interface_ops}). The memory at the address @code{ptr}
- is freed after calling the data unpacking operation.
- @end deftypefun
- @node Accessing Variable Data Interfaces
- @subsubsection Variable Data Interfaces
- @deftypefun size_t starpu_variable_get_elemsize (starpu_data_handle_t @var{handle})
- Return the size of the variable designated by @var{handle}.
- @end deftypefun
- @deftypefun uintptr_t starpu_variable_get_local_ptr (starpu_data_handle_t @var{handle})
- Return a pointer to the variable designated by @var{handle}.
- @end deftypefun
- @defmac STARPU_VARIABLE_GET_PTR ({void *}@var{interface})
- Return a pointer to the variable designated by @var{interface}.
- @end defmac
- @defmac STARPU_VARIABLE_GET_ELEMSIZE ({void *}@var{interface})
- Return the size of the variable designated by @var{interface}.
- @end defmac
- @defmac STARPU_VARIABLE_GET_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the variable designated by @var{interface}, to be
- used on OpenCL. The offset documented below has to be used in addition to this.
- @end defmac
- @defmac STARPU_VARIABLE_GET_OFFSET ({void *}@var{interface})
- Return the offset in the variable designated by @var{interface}, to be used
- with the device handle.
- @end defmac
- @node Accessing Vector Data Interfaces
- @subsubsection Vector Data Interfaces
- @deftypefun uint32_t starpu_vector_get_nx (starpu_data_handle_t @var{handle})
- Return the number of elements registered into the array designated by @var{handle}.
- @end deftypefun
- @deftypefun size_t starpu_vector_get_elemsize (starpu_data_handle_t @var{handle})
- Return the size of each element of the array designated by @var{handle}.
- @end deftypefun
- @deftypefun uintptr_t starpu_vector_get_local_ptr (starpu_data_handle_t @var{handle})
- Return the local pointer associated with @var{handle}.
- @end deftypefun
- @defmac STARPU_VECTOR_GET_PTR ({void *}@var{interface})
- Return a pointer to the array designated by @var{interface}, valid on CPUs and
- CUDA only. For OpenCL, the device handle and offset need to be used instead.
- @end defmac
- @defmac STARPU_VECTOR_GET_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the array designated by @var{interface}, to be used on OpenCL. the offset
- documented below has to be used in addition to this.
- @end defmac
- @defmac STARPU_VECTOR_GET_OFFSET ({void *}@var{interface})
- Return the offset in the array designated by @var{interface}, to be used with the device handle.
- @end defmac
- @defmac STARPU_VECTOR_GET_NX ({void *}@var{interface})
- Return the number of elements registered into the array designated by @var{interface}.
- @end defmac
- @defmac STARPU_VECTOR_GET_ELEMSIZE ({void *}@var{interface})
- Return the size of each element of the array designated by @var{interface}.
- @end defmac
- @node Accessing Matrix Data Interfaces
- @subsubsection Matrix Data Interfaces
- @deftypefun uint32_t starpu_matrix_get_nx (starpu_data_handle_t @var{handle})
- Return the number of elements on the x-axis of the matrix designated by @var{handle}.
- @end deftypefun
- @deftypefun uint32_t starpu_matrix_get_ny (starpu_data_handle_t @var{handle})
- Return the number of elements on the y-axis of the matrix designated by
- @var{handle}.
- @end deftypefun
- @deftypefun uint32_t starpu_matrix_get_local_ld (starpu_data_handle_t @var{handle})
- Return the number of elements between each row of the matrix designated by
- @var{handle}. Maybe be equal to nx when there is no padding.
- @end deftypefun
- @deftypefun uintptr_t starpu_matrix_get_local_ptr (starpu_data_handle_t @var{handle})
- Return the local pointer associated with @var{handle}.
- @end deftypefun
- @deftypefun size_t starpu_matrix_get_elemsize (starpu_data_handle_t @var{handle})
- Return the size of the elements registered into the matrix designated by
- @var{handle}.
- @end deftypefun
- @defmac STARPU_MATRIX_GET_PTR ({void *}@var{interface})
- Return a pointer to the matrix designated by @var{interface}, valid on CPUs and
- CUDA devices only. For OpenCL devices, the device handle and offset need to be
- used instead.
- @end defmac
- @defmac STARPU_MATRIX_GET_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the matrix designated by @var{interface}, to be used
- on OpenCL. The offset documented below has to be used in addition to this.
- @end defmac
- @defmac STARPU_MATRIX_GET_OFFSET ({void *}@var{interface})
- Return the offset in the matrix designated by @var{interface}, to be used with
- the device handle.
- @end defmac
- @defmac STARPU_MATRIX_GET_NX ({void *}@var{interface})
- Return the number of elements on the x-axis of the matrix designated by
- @var{interface}.
- @end defmac
- @defmac STARPU_MATRIX_GET_NY ({void *}@var{interface})
- Return the number of elements on the y-axis of the matrix designated by
- @var{interface}.
- @end defmac
- @defmac STARPU_MATRIX_GET_LD ({void *}@var{interface})
- Return the number of elements between each row of the matrix designated by
- @var{interface}. May be equal to nx when there is no padding.
- @end defmac
- @defmac STARPU_MATRIX_GET_ELEMSIZE ({void *}@var{interface})
- Return the size of the elements registered into the matrix designated by
- @var{interface}.
- @end defmac
- @node Accessing Block Data Interfaces
- @subsubsection Block Data Interfaces
- @deftypefun uint32_t starpu_block_get_nx (starpu_data_handle_t @var{handle})
- Return the number of elements on the x-axis of the block designated by @var{handle}.
- @end deftypefun
- @deftypefun uint32_t starpu_block_get_ny (starpu_data_handle_t @var{handle})
- Return the number of elements on the y-axis of the block designated by @var{handle}.
- @end deftypefun
- @deftypefun uint32_t starpu_block_get_nz (starpu_data_handle_t @var{handle})
- Return the number of elements on the z-axis of the block designated by @var{handle}.
- @end deftypefun
- @deftypefun uint32_t starpu_block_get_local_ldy (starpu_data_handle_t @var{handle})
- Return the number of elements between each row of the block designated by
- @var{handle}, in the format of the current memory node.
- @end deftypefun
- @deftypefun uint32_t starpu_block_get_local_ldz (starpu_data_handle_t @var{handle})
- Return the number of elements between each z plane of the block designated by
- @var{handle}, in the format of the current memory node.
- @end deftypefun
- @deftypefun uintptr_t starpu_block_get_local_ptr (starpu_data_handle_t @var{handle})
- Return the local pointer associated with @var{handle}.
- @end deftypefun
- @deftypefun size_t starpu_block_get_elemsize (starpu_data_handle_t @var{handle})
- Return the size of the elements of the block designated by @var{handle}.
- @end deftypefun
- @defmac STARPU_BLOCK_GET_PTR ({void *}@var{interface})
- Return a pointer to the block designated by @var{interface}.
- @end defmac
- @defmac STARPU_BLOCK_GET_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the block designated by @var{interface}, to be used
- on OpenCL. The offset document below has to be used in addition to this.
- @end defmac
- @defmac STARPU_BLOCK_GET_OFFSET ({void *}@var{interface})
- Return the offset in the block designated by @var{interface}, to be used with
- the device handle.
- @end defmac
- @defmac STARPU_BLOCK_GET_NX ({void *}@var{interface})
- Return the number of elements on the x-axis of the block designated by @var{handle}.
- @end defmac
- @defmac STARPU_BLOCK_GET_NY ({void *}@var{interface})
- Return the number of elements on the y-axis of the block designated by @var{handle}.
- @end defmac
- @defmac STARPU_BLOCK_GET_NZ ({void *}@var{interface})
- Return the number of elements on the z-axis of the block designated by @var{handle}.
- @end defmac
- @defmac STARPU_BLOCK_GET_LDY ({void *}@var{interface})
- Return the number of elements between each row of the block designated by
- @var{interface}. May be equal to nx when there is no padding.
- @end defmac
- @defmac STARPU_BLOCK_GET_LDZ ({void *}@var{interface})
- Return the number of elements between each z plane of the block designated by
- @var{interface}. May be equal to nx*ny when there is no padding.
- @end defmac
- @defmac STARPU_BLOCK_GET_ELEMSIZE ({void *}@var{interface})
- Return the size of the elements of the matrix designated by @var{interface}.
- @end defmac
- @node Accessing BCSR Data Interfaces
- @subsubsection BCSR Data Interfaces
- @deftypefun uint32_t starpu_bcsr_get_nnz (starpu_data_handle_t @var{handle})
- Return the number of non-zero elements in the matrix designated by @var{handle}.
- @end deftypefun
- @deftypefun uint32_t starpu_bcsr_get_nrow (starpu_data_handle_t @var{handle})
- Return the number of rows (in terms of blocks of size r*c) in the matrix
- designated by @var{handle}.
- @end deftypefun
- @deftypefun uint32_t starpu_bcsr_get_firstentry (starpu_data_handle_t @var{handle})
- Return the index at which all arrays (the column indexes, the row pointers...)
- of the matrix desginated by @var{handle} start.
- @end deftypefun
- @deftypefun uintptr_t starpu_bcsr_get_local_nzval (starpu_data_handle_t @var{handle})
- Return a pointer to the non-zero values of the matrix designated by @var{handle}.
- @end deftypefun
- @deftypefun {uint32_t *} starpu_bcsr_get_local_colind (starpu_data_handle_t @var{handle})
- Return a pointer to the column index, which holds the positions of the non-zero
- entries in the matrix designated by @var{handle}.
- @end deftypefun
- @deftypefun {uint32_t *} starpu_bcsr_get_local_rowptr (starpu_data_handle_t @var{handle})
- Return the row pointer array of the matrix designated by @var{handle}.
- @end deftypefun
- @deftypefun uint32_t starpu_bcsr_get_r (starpu_data_handle_t @var{handle})
- Return the number of rows in a block.
- @end deftypefun
- @deftypefun uint32_t starpu_bcsr_get_c (starpu_data_handle_t @var{handle})
- Return the numberof columns in a block.
- @end deftypefun
- @deftypefun size_t starpu_bcsr_get_elemsize (starpu_data_handle_t @var{handle})
- Return the size of the elements in the matrix designated by @var{handle}.
- @end deftypefun
- @defmac STARPU_BCSR_GET_NNZ ({void *}@var{interface})
- Return the number of non-zero values in the matrix designated by @var{interface}.
- @end defmac
- @defmac STARPU_BCSR_GET_NZVAL ({void *}@var{interface})
- Return a pointer to the non-zero values of the matrix designated by @var{interface}.
- @end defmac
- @defmac STARPU_BCSR_GET_NZVAL_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the array of non-zero values in the matrix designated
- by @var{interface}. The offset documented below has to be used in addition to
- this.
- @end defmac
- @defmac STARPU_BCSR_GET_COLIND ({void *}@var{interface})
- Return a pointer to the column index of the matrix designated by @var{interface}.
- @end defmac
- @defmac STARPU_BCSR_GET_COLIND_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the column index of the matrix designated by
- @var{interface}. The offset documented below has to be used in addition to
- this.
- @end defmac
- @defmac STARPU_BCSR_GET_ROWPTR ({void *}@var{interface})
- Return a pointer to the row pointer array of the matrix designated by @var{interface}.
- @end defmac
- @defmac STARPU_CSR_GET_ROWPTR_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the row pointer array of the matrix designated by
- @var{interface}. The offset documented below has to be used in addition to
- this.
- @end defmac
- @defmac STARPU_BCSR_GET_OFFSET ({void *}@var{interface})
- Return the offset in the arrays (coling, rowptr, nzval) of the matrix
- designated by @var{interface}, to be used with the device handles.
- @end defmac
- @node Accessing CSR Data Interfaces
- @subsubsection CSR Data Interfaces
- @deftypefun uint32_t starpu_csr_get_nnz (starpu_data_handle_t @var{handle})
- Return the number of non-zero values in the matrix designated by @var{handle}.
- @end deftypefun
- @deftypefun uint32_t starpu_csr_get_nrow (starpu_data_handle_t @var{handle})
- Return the size of the row pointer array of the matrix designated by @var{handle}.
- @end deftypefun
- @deftypefun uint32_t starpu_csr_get_firstentry (starpu_data_handle_t @var{handle})
- Return the index at which all arrays (the column indexes, the row pointers...)
- of the matrix designated by @var{handle} start.
- @end deftypefun
- @deftypefun uintptr_t starpu_csr_get_local_nzval (starpu_data_handle_t @var{handle})
- Return a local pointer to the non-zero values of the matrix designated by @var{handle}.
- @end deftypefun
- @deftypefun {uint32_t *} starpu_csr_get_local_colind (starpu_data_handle_t @var{handle})
- Return a local pointer to the column index of the matrix designated by @var{handle}.
- @end deftypefun
- @deftypefun {uint32_t *} starpu_csr_get_local_rowptr (starpu_data_handle_t @var{handle})
- Return a local pointer to the row pointer array of the matrix designated by @var{handle}.
- @end deftypefun
- @deftypefun size_t starpu_csr_get_elemsize (starpu_data_handle_t @var{handle})
- Return the size of the elements registered into the matrix designated by @var{handle}.
- @end deftypefun
- @defmac STARPU_CSR_GET_NNZ ({void *}@var{interface})
- Return the number of non-zero values in the matrix designated by @var{interface}.
- @end defmac
- @defmac STARPU_CSR_GET_NROW ({void *}@var{interface})
- Return the size of the row pointer array of the matrix designated by @var{interface}.
- @end defmac
- @defmac STARPU_CSR_GET_NZVAL ({void *}@var{interface})
- Return a pointer to the non-zero values of the matrix designated by @var{interface}.
- @end defmac
- @defmac STARPU_CSR_GET_NZVAL_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the array of non-zero values in the matrix designated
- by @var{interface}. The offset documented below has to be used in addition to
- this.
- @end defmac
- @defmac STARPU_CSR_GET_COLIND ({void *}@var{interface})
- Return a pointer to the column index of the matrix designated by @var{interface}.
- @end defmac
- @defmac STARPU_CSR_GET_COLIND_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the column index of the matrix designated by
- @var{interface}. The offset documented below has to be used in addition to
- this.
- @end defmac
- @defmac STARPU_CSR_GET_ROWPTR ({void *}@var{interface})
- Return a pointer to the row pointer array of the matrix designated by @var{interface}.
- @end defmac
- @defmac STARPU_CSR_GET_ROWPTR_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the row pointer array of the matrix designated by
- @var{interface}. The offset documented below has to be used in addition to
- this.
- @end defmac
- @defmac STARPU_CSR_GET_OFFSET ({void *}@var{interface})
- Return the offset in the arrays (colind, rowptr, nzval) of the matrix
- designated by @var{interface}, to be used with the device handles.
- @end defmac
- @defmac STARPU_CSR_GET_FIRSTENTRY ({void *}@var{interface})
- Return the index at which all arrays (the column indexes, the row pointers...)
- of the @var{interface} start.
- @end defmac
- @defmac STARPU_CSR_GET_ELEMSIZE ({void *}@var{interface})
- Return the size of the elements registered into the matrix designated by @var{interface}.
- @end defmac
- @node Accessing COO Data Interfaces
- @subsubsection COO Data Interfaces
- @defmac STARPU_COO_GET_COLUMNS ({void *}@var{interface})
- Return a pointer to the column array of the matrix designated by
- @var{interface}.
- @end defmac
- @defmac STARPU_COO_GET_COLUMNS_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the column array of the matrix designated by
- @var{interface}, to be used on OpenCL. The offset documented below has to be
- used in addition to this.
- @end defmac
- @defmac STARPU_COO_GET_ROWS (interface)
- Return a pointer to the rows array of the matrix designated by @var{interface}.
- @end defmac
- @defmac STARPU_COO_GET_ROWS_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the row array of the matrix designated by
- @var{interface}, to be used on OpenCL. The offset documented below has to be
- used in addition to this.
- @end defmac
- @defmac STARPU_COO_GET_VALUES (interface)
- Return a pointer to the values array of the matrix designated by
- @var{interface}.
- @end defmac
- @defmac STARPU_COO_GET_VALUES_DEV_HANDLE ({void *}@var{interface})
- Return a device handle for the value array of the matrix designated by
- @var{interface}, to be used on OpenCL. The offset documented below has to be
- used in addition to this.
- @end defmac
- @defmac STARPU_COO_GET_OFFSET ({void *}@var{itnerface})
- Return the offset in the arrays of the COO matrix designated by @var{interface}.
- @end defmac
- @defmac STARPU_COO_GET_NX (interface)
- Return the number of elements on the x-axis of the matrix designated by
- @var{interface}.
- @end defmac
- @defmac STARPU_COO_GET_NY (interface)
- Return the number of elements on the y-axis of the matrix designated by
- @var{interface}.
- @end defmac
- @defmac STARPU_COO_GET_NVALUES (interface)
- Return the number of values registered in the matrix designated by
- @var{interface}.
- @end defmac
- @defmac STARPU_COO_GET_ELEMSIZE (interface)
- Return the size of the elements registered into the matrix designated by
- @var{interface}.
- @end defmac
- @node Defining Interface
- @subsection Defining Interface
- Applications can provide their own interface. An example is provided in
- @code{examples/interface}. A few helpers are provided.
- @deftypefun uintptr_t starpu_allocate_buffer_on_node (unsigned @var{dst_node}, size_t @var{size})
- Allocate @var{size} bytes on node @var{dst_node}. This returns 0 if allocation
- failed, the allocation method should then return -ENOMEM as allocated size.
- @end deftypefun
- @deftypefun void starpu_free_buffer_on_node (unsigned @var{dst_node}, uintptr_t @var{addr}, size_t @var{size})
- Free @var{addr} of @var{size} bytes on node @var{dst_node}.
- @end deftypefun
- @node Data Partition
- @section Data Partition
- @menu
- * Basic API::
- * Predefined filter functions::
- @end menu
- @node Basic API
- @subsection Basic API
- @deftp {Data Type} {struct starpu_data_filter}
- The filter structure describes a data partitioning operation, to be given to the
- @code{starpu_data_partition} function, see @ref{starpu_data_partition}
- for an example. The different fields are:
- @table @asis
- @item @code{void (*filter_func)(void *father_interface, void* child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts)}
- This function fills the @code{child_interface} structure with interface
- information for the @code{id}-th child of the parent @code{father_interface} (among @code{nparts}).
- @item @code{unsigned nchildren}
- This is the number of parts to partition the data into.
- @item @code{unsigned (*get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle)}
- This returns the number of children. This can be used instead of @code{nchildren} when the number of
- children depends on the actual data (e.g. the number of blocks in a sparse
- matrix).
- @item @code{struct starpu_data_interface_ops *(*get_child_ops)(struct starpu_data_filter *, unsigned id)}
- In case the resulting children use a different data interface, this function
- returns which interface is used by child number @code{id}.
- @item @code{unsigned filter_arg}
- Allow to define an additional parameter for the filter function.
- @item @code{void *filter_arg_ptr}
- Allow to define an additional pointer parameter for the filter
- function, such as the sizes of the different parts.
- @end table
- @end deftp
- @deftypefun void starpu_data_partition (starpu_data_handle_t @var{initial_handle}, {struct starpu_data_filter *}@var{f})
- @anchor{starpu_data_partition}
- This requests partitioning one StarPU data @var{initial_handle} into several
- subdata according to the filter @var{f}, as shown in the following example:
- @cartouche
- @smallexample
- struct starpu_data_filter f = @{
- .filter_func = starpu_matrix_filter_block,
- .nchildren = nslicesx,
- .get_nchildren = NULL,
- .get_child_ops = NULL
- @};
- starpu_data_partition(A_handle, &f);
- @end smallexample
- @end cartouche
- @end deftypefun
- @deftypefun void starpu_data_unpartition (starpu_data_handle_t @var{root_data}, unsigned @var{gathering_node})
- This unapplies one filter, thus unpartitioning the data. The pieces of data are
- collected back into one big piece in the @var{gathering_node} (usually 0). Tasks
- working on the partitioned data must be already finished when calling @code{starpu_data_unpartition}.
- @cartouche
- @smallexample
- starpu_data_unpartition(A_handle, 0);
- @end smallexample
- @end cartouche
- @end deftypefun
- @deftypefun int starpu_data_get_nb_children (starpu_data_handle_t @var{handle})
- This function returns the number of children.
- @end deftypefun
- @deftypefun starpu_data_handle_t starpu_data_get_child (starpu_data_handle_t @var{handle}, unsigned @var{i})
- Return the @var{i}th child of the given @var{handle}, which must have been partitionned beforehand.
- @end deftypefun
- @deftypefun starpu_data_handle_t starpu_data_get_sub_data (starpu_data_handle_t @var{root_data}, unsigned @var{depth}, ... )
- After partitioning a StarPU data by applying a filter,
- @code{starpu_data_get_sub_data} can be used to get handles for each of
- the data portions. @var{root_data} is the parent data that was
- partitioned. @var{depth} is the number of filters to traverse (in
- case several filters have been applied, to e.g. partition in row
- blocks, and then in column blocks), and the subsequent
- parameters are the indexes. The function returns a handle to the
- subdata.
- @cartouche
- @smallexample
- h = starpu_data_get_sub_data(A_handle, 1, taskx);
- @end smallexample
- @end cartouche
- @end deftypefun
- @deftypefun starpu_data_handle_t starpu_data_vget_sub_data (starpu_data_handle_t @var{root_data}, unsigned @var{depth}, va_list @var{pa})
- This function is similar to @code{starpu_data_get_sub_data} but uses a
- va_list for the parameter list.
- @end deftypefun
- @deftypefun void starpu_data_map_filters (starpu_data_handle_t @var{root_data}, unsigned @var{nfilters}, ...)
- Applies @var{nfilters} filters to the handle designated by @var{root_handle}
- recursively. @var{nfilters} pointers to variables of the type
- starpu_data_filter should be given.
- @end deftypefun
- @deftypefun void starpu_data_vmap_filters (starpu_data_handle_t @var{root_data}, unsigned @var{nfilters}, va_list @var{pa})
- Applies @var{nfilters} filters to the handle designated by @var{root_handle}
- recursively. It uses a va_list of pointers to variables of the typer
- starpu_data_filter.
- @end deftypefun
- @node Predefined filter functions
- @subsection Predefined filter functions
- @menu
- * Partitioning Vector Data::
- * Partitioning Matrix Data::
- * Partitioning 3D Matrix Data::
- * Partitioning BCSR Data::
- @end menu
- This section gives a partial list of the predefined partitioning functions.
- Examples on how to use them are shown in @ref{Partitioning Data}. The complete
- list can be found in @code{starpu_data_filters.h} .
- @node Partitioning Vector Data
- @subsubsection Partitioning Vector Data
- @deftypefun void starpu_vector_filter_block (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- Return in @code{*@var{child_interface}} the @var{id}th element of the
- vector represented by @var{father_interface} once partitioned in
- @var{nparts} chunks of equal size.
- @end deftypefun
- @deftypefun void starpu_vector_filter_block_shadow (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- Return in @code{*@var{child_interface}} the @var{id}th element of the
- vector represented by @var{father_interface} once partitioned in
- @var{nparts} chunks of equal size with a shadow border @code{filter_arg_ptr}, thus getting a vector of size (n-2*shadow)/nparts+2*shadow
- The @code{filter_arg_ptr} field must be the shadow size casted into @code{void*}.
- IMPORTANT: This can only be used for read-only access, as no coherency is
- enforced for the shadowed parts.
- A usage example is available in examples/filters/shadow.c
- @end deftypefun
- @deftypefun void starpu_vector_filter_list (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- Return in @code{*@var{child_interface}} the @var{id}th element of the
- vector represented by @var{father_interface} once partitioned into
- @var{nparts} chunks according to the @code{filter_arg_ptr} field of
- @code{*@var{f}}.
- The @code{filter_arg_ptr} field must point to an array of @var{nparts}
- @code{uint32_t} elements, each of which specifies the number of elements
- in each chunk of the partition.
- @end deftypefun
- @deftypefun void starpu_vector_filter_divide_in_2 (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- Return in @code{*@var{child_interface}} the @var{id}th element of the
- vector represented by @var{father_interface} once partitioned in two
- chunks of equal size, ignoring @var{nparts}. Thus, @var{id} must be
- @code{0} or @code{1}.
- @end deftypefun
- @node Partitioning Matrix Data
- @subsubsection Partitioning Matrix Data
- @deftypefun void starpu_matrix_filter_block (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a dense Matrix along the x dimension, thus getting (x/nparts,y)
- matrices. If nparts does not divide x, the last submatrix contains the
- remainder.
- @end deftypefun
- @deftypefun void starpu_matrix_filter_block_shadow (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a dense Matrix along the x dimension, with a shadow border
- @code{filter_arg_ptr}, thus getting ((x-2*shadow)/nparts+2*shadow,y)
- matrices. If nparts does not divide x-2*shadow, the last submatrix contains the
- remainder.
- IMPORTANT: This can only be used for read-only access, as no coherency is
- enforced for the shadowed parts.
- A usage example is available in examples/filters/shadow2d.c
- @end deftypefun
- @deftypefun void starpu_matrix_filter_vertical_block (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a dense Matrix along the y dimension, thus getting (x,y/nparts)
- matrices. If nparts does not divide y, the last submatrix contains the
- remainder.
- @end deftypefun
- @deftypefun void starpu_matrix_filter_vertical_block_shadow (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a dense Matrix along the y dimension, with a shadow border
- @code{filter_arg_ptr}, thus getting (x,(y-2*shadow)/nparts+2*shadow)
- matrices. If nparts does not divide y-2*shadow, the last submatrix contains the
- remainder.
- IMPORTANT: This can only be used for read-only access, as no coherency is
- enforced for the shadowed parts.
- A usage example is available in examples/filters/shadow2d.c
- @end deftypefun
- @node Partitioning 3D Matrix Data
- @subsubsection Partitioning 3D Matrix Data
- A usage example is available in examples/filters/shadow3d.c
- @deftypefun void starpu_block_filter_block (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a 3D matrix along the X dimension, thus getting (x/nparts,y,z)
- 3D matrices. If nparts does not divide x, the last submatrix contains the
- remainder.
- @end deftypefun
- @deftypefun void starpu_block_filter_block_shadow (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a 3D matrix along the X dimension, with a shadow border
- @code{filter_arg_ptr}, thus getting ((x-2*shadow)/nparts+2*shadow,y,z) 3D
- matrices. If nparts does not divide x, the last submatrix contains the
- remainder.
- IMPORTANT: This can only be used for read-only access, as no coherency is
- enforced for the shadowed parts.
- @end deftypefun
- @deftypefun void starpu_block_filter_vertical_block (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a 3D matrix along the Y dimension, thus getting (x,y/nparts,z)
- 3D matrices. If nparts does not divide y, the last submatrix contains the
- remainder.
- @end deftypefun
- @deftypefun void starpu_block_filter_vertical_block_shadow (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a 3D matrix along the Y dimension, with a shadow border
- @code{filter_arg_ptr}, thus getting (x,(y-2*shadow)/nparts+2*shadow,z) 3D
- matrices. If nparts does not divide y, the last submatrix contains the
- remainder.
- IMPORTANT: This can only be used for read-only access, as no coherency is
- enforced for the shadowed parts.
- @end deftypefun
- @deftypefun void starpu_block_filter_depth_block (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a 3D matrix along the Z dimension, thus getting (x,y,z/nparts)
- 3D matrices. If nparts does not divide z, the last submatrix contains the
- remainder.
- @end deftypefun
- @deftypefun void starpu_block_filter_depth_block_shadow (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a 3D matrix along the Z dimension, with a shadow border
- @code{filter_arg_ptr}, thus getting (x,y,(z-2*shadow)/nparts+2*shadow)
- 3D matrices. If nparts does not divide z, the last submatrix contains the
- remainder.
- IMPORTANT: This can only be used for read-only access, as no coherency is
- enforced for the shadowed parts.
- @end deftypefun
- @node Partitioning BCSR Data
- @subsubsection Partitioning BCSR Data
- @deftypefun void starpu_bcsr_filter_canonical_block (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a block-sparse matrix into dense matrices.
- @end deftypefun
- @deftypefun void starpu_csr_filter_vertical_block (void *@var{father_interface}, void *@var{child_interface}, {struct starpu_data_filter} *@var{f}, unsigned @var{id}, unsigned @var{nparts})
- This partitions a block-sparse matrix into vertical block-sparse matrices.
- @end deftypefun
- @node Codelets and Tasks
- @section Codelets and Tasks
- This section describes the interface to manipulate codelets and tasks.
- @deftp {Data Type} {enum starpu_codelet_type}
- Describes the type of parallel task. The different values are:
- @table @asis
- @item @code{STARPU_SEQ} (default) for classical sequential tasks.
- @item @code{STARPU_SPMD} for a parallel task whose threads are handled by
- StarPU, the code has to use @code{starpu_combined_worker_get_size} and
- @code{starpu_combined_worker_get_rank} to distribute the work
- @item @code{STARPU_FORKJOIN} for a parallel task whose threads are started by
- the codelet function, which has to use @code{starpu_combined_worker_get_size} to
- determine how many threads should be started.
- @end table
- See @ref{Parallel Tasks} for details.
- @end deftp
- @defmac STARPU_CPU
- This macro is used when setting the field @code{where} of a @code{struct
- starpu_codelet} to specify the codelet may be executed on a CPU
- processing unit.
- @end defmac
- @defmac STARPU_CUDA
- This macro is used when setting the field @code{where} of a @code{struct
- starpu_codelet} to specify the codelet may be executed on a CUDA
- processing unit.
- @end defmac
- @defmac STARPU_OPENCL
- This macro is used when setting the field @code{where} of a @code{struct
- starpu_codelet} to specify the codelet may be executed on a OpenCL
- processing unit.
- @end defmac
- @defmac STARPU_MULTIPLE_CPU_IMPLEMENTATIONS
- Setting the field @code{cpu_func} of a @code{struct starpu_codelet}
- with this macro indicates the codelet will have several
- implementations. The use of this macro is deprecated. One should
- always only define the field @code{cpu_funcs}.
- @end defmac
- @defmac STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS
- Setting the field @code{cuda_func} of a @code{struct starpu_codelet}
- with this macro indicates the codelet will have several
- implementations. The use of this macro is deprecated. One should
- always only define the field @code{cuda_funcs}.
- @end defmac
- @defmac STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS
- Setting the field @code{opencl_func} of a @code{struct starpu_codelet}
- with this macro indicates the codelet will have several
- implementations. The use of this macro is deprecated. One should
- always only define the field @code{opencl_funcs}.
- @end defmac
- @deftp {Data Type} {struct starpu_codelet}
- The codelet structure describes a kernel that is possibly implemented on various
- targets. For compatibility, make sure to initialize the whole structure to zero,
- either by using explicit memset, or by letting the compiler implicitly do it in
- e.g. static storage case.
- @table @asis
- @item @code{uint32_t where} (optional)
- Indicates which types of processing units are able to execute the
- codelet. The different values
- @code{STARPU_CPU}, @code{STARPU_CUDA},
- @code{STARPU_OPENCL} can be combined to specify
- on which types of processing units the codelet can be executed.
- @code{STARPU_CPU|STARPU_CUDA} for instance indicates that the codelet is
- implemented for both CPU cores and CUDA devices while @code{STARPU_OPENCL}
- indicates that it is only available on OpenCL devices. If the field is
- unset, its value will be automatically set based on the availability
- of the @code{XXX_funcs} fields defined below.
- @item @code{int (*can_execute)(unsigned workerid, struct starpu_task *task, unsigned nimpl)} (optional)
- Defines a function which should return 1 if the worker designated by
- @var{workerid} can execute the @var{nimpl}th implementation of the
- given @var{task}, 0 otherwise.
- @item @code{enum starpu_codelet_type type} (optional)
- The default is @code{STARPU_SEQ}, i.e. usual sequential implementation. Other
- values (@code{STARPU_SPMD} or @code{STARPU_FORKJOIN} declare that a parallel
- implementation is also available. See @ref{Parallel Tasks} for details.
- @item @code{int max_parallelism} (optional)
- If a parallel implementation is available, this denotes the maximum combined
- worker size that StarPU will use to execute parallel tasks for this codelet.
- @item @code{starpu_cpu_func_t cpu_func} (optional)
- This field has been made deprecated. One should use instead the
- @code{cpu_funcs} field.
- @item @code{starpu_cpu_func_t cpu_funcs[STARPU_MAXIMPLEMENTATIONS]} (optional)
- Is an array of function pointers to the CPU implementations of the codelet.
- It must be terminated by a NULL value.
- The functions prototype must be: @code{void cpu_func(void *buffers[], void *cl_arg)}. The first
- argument being the array of data managed by the data management library, and
- the second argument is a pointer to the argument passed from the @code{cl_arg}
- field of the @code{starpu_task} structure.
- If the @code{where} field is set, then the @code{cpu_funcs} field is
- ignored if @code{STARPU_CPU} does not appear in the @code{where}
- field, it must be non-null otherwise.
- @item @code{starpu_cuda_func_t cuda_func} (optional)
- This field has been made deprecated. One should use instead the
- @code{cuda_funcs} field.
- @item @code{starpu_cuda_func_t cuda_funcs[STARPU_MAXIMPLEMENTATIONS]} (optional)
- Is an array of function pointers to the CUDA implementations of the codelet.
- It must be terminated by a NULL value.
- @emph{The functions must be host-functions written in the CUDA runtime
- API}. Their prototype must
- be: @code{void cuda_func(void *buffers[], void *cl_arg);}.
- If the @code{where} field is set, then the @code{cuda_funcs}
- field is ignored if @code{STARPU_CUDA} does not appear in the @code{where}
- field, it must be non-null otherwise.
- @item @code{starpu_opencl_func_t opencl_func} (optional)
- This field has been made deprecated. One should use instead the
- @code{opencl_funcs} field.
- @item @code{starpu_opencl_func_t opencl_funcs[STARPU_MAXIMPLEMENTATIONS]} (optional)
- Is an array of function pointers to the OpenCL implementations of the codelet.
- It must be terminated by a NULL value.
- The functions prototype must be:
- @code{void opencl_func(void *buffers[], void *cl_arg);}.
- If the @code{where} field is set, then the @code{opencl_funcs} field
- is ignored if @code{STARPU_OPENCL} does not appear in the @code{where}
- field, it must be non-null otherwise.
- @item @code{unsigned nbuffers}
- Specifies the number of arguments taken by the codelet. These arguments are
- managed by the DSM and are accessed from the @code{void *buffers[]}
- array. The constant argument passed with the @code{cl_arg} field of the
- @code{starpu_task} structure is not counted in this number. This value should
- not be above @code{STARPU_NMAXBUFS}.
- @item @code{enum starpu_access_mode modes[STARPU_NMAXBUFS]}
- Is an array of @code{enum starpu_access_mode}. It describes the
- required access modes to the data neeeded by the codelet (e.g.
- @code{STARPU_RW}). The number of entries in this array must be
- specified in the @code{nbuffers} field (defined above), and should not
- exceed @code{STARPU_NMAXBUFS}.
- If unsufficient, this value can be set with the @code{--enable-maxbuffers}
- option when configuring StarPU.
- @item @code{struct starpu_perfmodel *model} (optional)
- This is a pointer to the task duration performance model associated to this
- codelet. This optional field is ignored when set to @code{NULL} or
- when its @code{symbol} field is not set.
- @item @code{struct starpu_perfmodel *power_model} (optional)
- This is a pointer to the task power consumption performance model associated
- to this codelet. This optional field is ignored when set to
- @code{NULL} or when its @code{symbol} field is not set.
- In the case of parallel codelets, this has to account for all processing units
- involved in the parallel execution.
- @item @code{unsigned long per_worker_stats[STARPU_NMAXWORKERS]} (optional)
- Statistics collected at runtime: this is filled by StarPU and should not be
- accessed directly, but for example by calling the
- @code{starpu_display_codelet_stats} function (See
- @ref{starpu_display_codelet_stats} for details).
- @item @code{const char *name} (optional)
- Define the name of the codelet. This can be useful for debugging purposes.
- @end table
- @end deftp
- @deftypefun void starpu_codelet_init ({struct starpu_codelet} *@var{cl})
- Initialize @var{cl} with default values. Codelets should preferably be
- initialized statically as shown in @ref{Defining a Codelet}. However
- such a initialisation is not always possible, e.g. when using C++.
- @end deftypefun
- @deftp {Data Type} {enum starpu_task_status}
- State of a task, can be either of
- @table @asis
- @item @code{STARPU_TASK_INVALID} The task has just been initialized.
- @item @code{STARPU_TASK_BLOCKED} The task has just been submitted, and its dependencies has not been checked yet.
- @item @code{STARPU_TASK_READY} The task is ready for execution.
- @item @code{STARPU_TASK_RUNNING} The task is running on some worker.
- @item @code{STARPU_TASK_FINISHED} The task is finished executing.
- @item @code{STARPU_TASK_BLOCKED_ON_TAG} The task is waiting for a tag.
- @item @code{STARPU_TASK_BLOCKED_ON_TASK} The task is waiting for a task.
- @item @code{STARPU_TASK_BLOCKED_ON_DATA} The task is waiting for some data.
- @end table
- @end deftp
- @deftp {Data Type} {struct starpu_buffer_descr}
- This type is used to describe a data handle along with an
- access mode.
- @table @asis
- @item @code{starpu_data_handle_t handle} describes a data,
- @item @code{enum starpu_access_mode mode} describes its access mode
- @end table
- @end deftp
- @deftp {Data Type} {struct starpu_task}
- The @code{starpu_task} structure describes a task that can be offloaded on the various
- processing units managed by StarPU. It instantiates a codelet. It can either be
- allocated dynamically with the @code{starpu_task_create} method, or declared
- statically. In the latter case, the programmer has to zero the
- @code{starpu_task} structure and to fill the different fields properly. The
- indicated default values correspond to the configuration of a task allocated
- with @code{starpu_task_create}.
- @table @asis
- @item @code{struct starpu_codelet *cl}
- Is a pointer to the corresponding @code{struct starpu_codelet} data structure. This
- describes where the kernel should be executed, and supplies the appropriate
- implementations. When set to @code{NULL}, no code is executed during the tasks,
- such empty tasks can be useful for synchronization purposes.
- @item @code{struct starpu_buffer_descr buffers[STARPU_NMAXBUFS]}
- This field has been made deprecated. One should use instead the
- @code{handles} field to specify the handles to the data accessed by
- the task. The access modes are now defined in the @code{mode} field of
- the @code{struct starpu_codelet cl} field defined above.
- @item @code{starpu_data_handle_t handles[STARPU_NMAXBUFS]}
- Is an array of @code{starpu_data_handle_t}. It specifies the handles
- to the different pieces of data accessed by the task. The number
- of entries in this array must be specified in the @code{nbuffers} field of the
- @code{struct starpu_codelet} structure, and should not exceed
- @code{STARPU_NMAXBUFS}.
- If unsufficient, this value can be set with the @code{--enable-maxbuffers}
- option when configuring StarPU.
- @item @code{void *interfaces[STARPU_NMAXBUFS]}
- The actual data pointers to the memory node where execution will happen, managed
- by the DSM.
- @item @code{void *cl_arg} (optional; default: @code{NULL})
- This pointer is passed to the codelet through the second argument
- of the codelet implementation (e.g. @code{cpu_func} or @code{cuda_func}).
- @item @code{size_t cl_arg_size} (optional)
- For some specific drivers, the @code{cl_arg} pointer cannot not be directly
- given to the driver function. A buffer of size @code{cl_arg_size}
- needs to be allocated on the driver. This buffer is then filled with
- the @code{cl_arg_size} bytes starting at address @code{cl_arg}. In
- this case, the argument given to the codelet is therefore not the
- @code{cl_arg} pointer, but the address of the buffer in local store
- (LS) instead.
- This field is ignored for CPU, CUDA and OpenCL codelets, where the
- @code{cl_arg} pointer is given as such.
- @item @code{void (*callback_func)(void *)} (optional) (default: @code{NULL})
- This is a function pointer of prototype @code{void (*f)(void *)} which
- specifies a possible callback. If this pointer is non-null, the callback
- function is executed @emph{on the host} after the execution of the task. Tasks
- which depend on it might already be executing. The callback is passed the
- value contained in the @code{callback_arg} field. No callback is executed if the
- field is set to @code{NULL}.
- @item @code{void *callback_arg} (optional) (default: @code{NULL})
- This is the pointer passed to the callback function. This field is ignored if
- the @code{callback_func} is set to @code{NULL}.
- @item @code{unsigned use_tag} (optional) (default: @code{0})
- If set, this flag indicates that the task should be associated with the tag
- contained in the @code{tag_id} field. Tag allow the application to synchronize
- with the task and to express task dependencies easily.
- @item @code{starpu_tag_t tag_id}
- This fields contains the tag associated to the task if the @code{use_tag} field
- was set, it is ignored otherwise.
- @item @code{unsigned synchronous}
- If this flag is set, the @code{starpu_task_submit} function is blocking and
- returns only when the task has been executed (or if no worker is able to
- process the task). Otherwise, @code{starpu_task_submit} returns immediately.
- @item @code{int priority} (optional) (default: @code{STARPU_DEFAULT_PRIO})
- This field indicates a level of priority for the task. This is an integer value
- that must be set between the return values of the
- @code{starpu_sched_get_min_priority} function for the least important tasks,
- and that of the @code{starpu_sched_get_max_priority} for the most important
- tasks (included). The @code{STARPU_MIN_PRIO} and @code{STARPU_MAX_PRIO} macros
- are provided for convenience and respectively returns value of
- @code{starpu_sched_get_min_priority} and @code{starpu_sched_get_max_priority}.
- Default priority is @code{STARPU_DEFAULT_PRIO}, which is always defined as 0 in
- order to allow static task initialization. Scheduling strategies that take
- priorities into account can use this parameter to take better scheduling
- decisions, but the scheduling policy may also ignore it.
- @item @code{unsigned execute_on_a_specific_worker} (default: @code{0})
- If this flag is set, StarPU will bypass the scheduler and directly affect this
- task to the worker specified by the @code{workerid} field.
- @item @code{unsigned workerid} (optional)
- If the @code{execute_on_a_specific_worker} field is set, this field indicates
- which is the identifier of the worker that should process this task (as
- returned by @code{starpu_worker_get_id}). This field is ignored if
- @code{execute_on_a_specific_worker} field is set to 0.
- @item @code{starpu_task_bundle_t bundle} (optional)
- The bundle that includes this task. If no bundle is used, this should be NULL.
- @item @code{int detach} (optional) (default: @code{1})
- If this flag is set, it is not possible to synchronize with the task
- by the means of @code{starpu_task_wait} later on. Internal data structures
- are only guaranteed to be freed once @code{starpu_task_wait} is called if the
- flag is not set.
- @item @code{int destroy} (optional) (default: @code{0} for starpu_task_init, @code{1} for starpu_task_create)
- If this flag is set, the task structure will automatically be freed, either
- after the execution of the callback if the task is detached, or during
- @code{starpu_task_wait} otherwise. If this flag is not set, dynamically
- allocated data structures will not be freed until @code{starpu_task_destroy} is
- called explicitly. Setting this flag for a statically allocated task structure
- will result in undefined behaviour. The flag is set to 1 when the task is
- created by calling @code{starpu_task_create()}. Note that
- @code{starpu_task_wait_for_all} will not free any task.
- @item @code{int regenerate} (optional)
- If this flag is set, the task will be re-submitted to StarPU once it has been
- executed. This flag must not be set if the destroy flag is set too.
- @item @code{enum starpu_task_status status} (optional)
- Current state of the task.
- @item @code{struct starpu_task_profiling_info *profiling_info} (optional)
- Profiling information for the task.
- @item @code{double predicted} (output field)
- Predicted duration of the task. This field is only set if the scheduling
- strategy used performance models.
- @item @code{double predicted_transfer} (optional)
- Predicted data transfer duration for the task in microseconds. This field is
- only valid if the scheduling strategy uses performance models.
- @item @code{struct starpu_task *prev}
- A pointer to the previous task. This should only be used by StarPU.
- @item @code{struct starpu_task *next}
- A pointer to the next task. This should only be used by StarPU.
- @item @code{unsigned int mf_skip}
- This is only used for tasks that use multiformat handle. This should only be
- used by StarPU.
- @item @code{double flops}
- This can be set to the number of floating points operations that the task
- will have to achieve. This is useful for easily getting GFlops curves from
- @code{starpu_perfmodel_plot}, and for the hypervisor load balancing.
- @item @code{void *starpu_private}
- This is private to StarPU, do not modify. If the task is allocated by hand
- (without starpu_task_create), this field should be set to NULL.
- @item @code{int magic}
- This field is set when initializing a task. It prevents a task from being
- submitted if it has not been properly initialized.
- @end table
- @end deftp
- @deftypefun void starpu_task_init ({struct starpu_task} *@var{task})
- Initialize @var{task} with default values. This function is implicitly
- called by @code{starpu_task_create}. By default, tasks initialized with
- @code{starpu_task_init} must be deinitialized explicitly with
- @code{starpu_task_clean}. Tasks can also be initialized statically,
- using @code{STARPU_TASK_INITIALIZER} defined below.
- @end deftypefun
- @defmac STARPU_TASK_INITIALIZER
- It is possible to initialize statically allocated tasks with this
- value. This is equivalent to initializing a starpu_task structure with
- the @code{starpu_task_init} function defined above.
- @end defmac
- @deftypefun {struct starpu_task *} starpu_task_create (void)
- Allocate a task structure and initialize it with default values. Tasks
- allocated dynamically with @code{starpu_task_create} are automatically freed when the
- task is terminated. This means that the task pointer can not be used any more
- once the task is submitted, since it can be executed at any time (unless
- dependencies make it wait) and thus freed at any time.
- If the destroy flag is explicitly unset, the resources used
- by the task have to be freed by calling
- @code{starpu_task_destroy}.
- @end deftypefun
- @deftypefun void starpu_task_clean ({struct starpu_task} *@var{task})
- Release all the structures automatically allocated to execute @var{task}, but
- not the task structure itself and values set by the user remain unchanged.
- It is thus useful for statically allocated tasks for instance.
- It is also useful when the user wants to execute the same operation several
- times with as least overhead as possible.
- It is called automatically by @code{starpu_task_destroy}.
- It has to be called only after explicitly waiting for the task or after
- @code{starpu_shutdown} (waiting for the callback is not enough, since starpu
- still manipulates the task after calling the callback).
- @end deftypefun
- @deftypefun void starpu_task_destroy ({struct starpu_task} *@var{task})
- Free the resource allocated during @code{starpu_task_create} and
- associated with @var{task}. This function is already called automatically
- after the execution of a task when the @code{destroy} flag of the
- @code{starpu_task} structure is set, which is the default for tasks created by
- @code{starpu_task_create}. Calling this function on a statically allocated task
- results in an undefined behaviour.
- @end deftypefun
- @deftypefun int starpu_task_wait ({struct starpu_task} *@var{task})
- This function blocks until @var{task} has been executed. It is not possible to
- synchronize with a task more than once. It is not possible to wait for
- synchronous or detached tasks.
- Upon successful completion, this function returns 0. Otherwise, @code{-EINVAL}
- indicates that the specified task was either synchronous or detached.
- @end deftypefun
- @deftypefun int starpu_task_submit ({struct starpu_task} *@var{task})
- This function submits @var{task} to StarPU. Calling this function does
- not mean that the task will be executed immediately as there can be data or task
- (tag) dependencies that are not fulfilled yet: StarPU will take care of
- scheduling this task with respect to such dependencies.
- This function returns immediately if the @code{synchronous} field of the
- @code{starpu_task} structure was set to 0, and block until the termination of
- the task otherwise. It is also possible to synchronize the application with
- asynchronous tasks by the means of tags, using the @code{starpu_tag_wait}
- function for instance.
- In case of success, this function returns 0, a return value of @code{-ENODEV}
- means that there is no worker able to process this task (e.g. there is no GPU
- available and this task is only implemented for CUDA devices).
- starpu_task_submit() can be called from anywhere, including codelet
- functions and callbacks, provided that the @code{synchronous} field of the
- @code{starpu_task} structure is left to 0.
- @end deftypefun
- @deftypefun int starpu_task_wait_for_all (void)
- This function blocks until all the tasks that were submitted are terminated. It
- does not destroy these tasks.
- @end deftypefun
- @deftypefun int starpu_task_nready (void)
- @end deftypefun
- @deftypefun int starpu_task_nsubmitted (void)
- Return the number of submitted tasks which have not completed yet.
- @end deftypefun
- @deftypefun int starpu_task_nready (void)
- Return the number of submitted tasks which are ready for execution are already
- executing. It thus does not include tasks waiting for dependencies.
- @end deftypefun
- @deftypefun {struct starpu_task *} starpu_task_get_current (void)
- This function returns the task currently executed by the worker, or
- NULL if it is called either from a thread that is not a task or simply
- because there is no task being executed at the moment.
- @end deftypefun
- @deftypefun void starpu_display_codelet_stats ({struct starpu_codelet} *@var{cl})
- @anchor{starpu_display_codelet_stats}
- Output on @code{stderr} some statistics on the codelet @var{cl}.
- @end deftypefun
- @deftypefun int starpu_task_wait_for_no_ready (void)
- This function waits until there is no more ready task.
- @end deftypefun
- @c Callbacks: what can we put in callbacks ?
- @node Explicit Dependencies
- @section Explicit Dependencies
- @deftypefun void starpu_task_declare_deps_array ({struct starpu_task} *@var{task}, unsigned @var{ndeps}, {struct starpu_task} *@var{task_array}[])
- Declare task dependencies between a @var{task} and an array of tasks of length
- @var{ndeps}. This function must be called prior to the submission of the task,
- but it may called after the submission or the execution of the tasks in the
- array, provided the tasks are still valid (ie. they were not automatically
- destroyed). Calling this function on a task that was already submitted or with
- an entry of @var{task_array} that is not a valid task anymore results in an
- undefined behaviour. If @var{ndeps} is null, no dependency is added. It is
- possible to call @code{starpu_task_declare_deps_array} multiple times on the
- same task, in this case, the dependencies are added. It is possible to have
- redundancy in the task dependencies.
- @end deftypefun
- @deftp {Data Type} {starpu_tag_t}
- This type defines a task logical identifer. It is possible to associate a task with a unique ``tag'' chosen by the application, and to express
- dependencies between tasks by the means of those tags. To do so, fill the
- @code{tag_id} field of the @code{starpu_task} structure with a tag number (can
- be arbitrary) and set the @code{use_tag} field to 1.
- If @code{starpu_tag_declare_deps} is called with this tag number, the task will
- not be started until the tasks which holds the declared dependency tags are
- completed.
- @end deftp
- @deftypefun void starpu_tag_declare_deps (starpu_tag_t @var{id}, unsigned @var{ndeps}, ...)
- Specify the dependencies of the task identified by tag @var{id}. The first
- argument specifies the tag which is configured, the second argument gives the
- number of tag(s) on which @var{id} depends. The following arguments are the
- tags which have to be terminated to unlock the task.
- This function must be called before the associated task is submitted to StarPU
- with @code{starpu_task_submit}.
- Because of the variable arity of @code{starpu_tag_declare_deps}, note that the
- last arguments @emph{must} be of type @code{starpu_tag_t}: constant values
- typically need to be explicitly casted. Using the
- @code{starpu_tag_declare_deps_array} function avoids this hazard.
- @cartouche
- @smallexample
- /* Tag 0x1 depends on tags 0x32 and 0x52 */
- starpu_tag_declare_deps((starpu_tag_t)0x1,
- 2, (starpu_tag_t)0x32, (starpu_tag_t)0x52);
- @end smallexample
- @end cartouche
- @end deftypefun
- @deftypefun void starpu_tag_declare_deps_array (starpu_tag_t @var{id}, unsigned @var{ndeps}, {starpu_tag_t *}@var{array})
- This function is similar to @code{starpu_tag_declare_deps}, except
- that its does not take a variable number of arguments but an array of
- tags of size @var{ndeps}.
- @cartouche
- @smallexample
- /* Tag 0x1 depends on tags 0x32 and 0x52 */
- starpu_tag_t tag_array[2] = @{0x32, 0x52@};
- starpu_tag_declare_deps_array((starpu_tag_t)0x1, 2, tag_array);
- @end smallexample
- @end cartouche
- @end deftypefun
- @deftypefun int starpu_tag_wait (starpu_tag_t @var{id})
- This function blocks until the task associated to tag @var{id} has been
- executed. This is a blocking call which must therefore not be called within
- tasks or callbacks, but only from the application directly. It is possible to
- synchronize with the same tag multiple times, as long as the
- @code{starpu_tag_remove} function is not called. Note that it is still
- possible to synchronize with a tag associated to a task which @code{starpu_task}
- data structure was freed (e.g. if the @code{destroy} flag of the
- @code{starpu_task} was enabled).
- @end deftypefun
- @deftypefun int starpu_tag_wait_array (unsigned @var{ntags}, starpu_tag_t *@var{id})
- This function is similar to @code{starpu_tag_wait} except that it blocks until
- @emph{all} the @var{ntags} tags contained in the @var{id} array are
- terminated.
- @end deftypefun
- @deftypefun void starpu_tag_restart (starpu_tag_t @var{id})
- This function can be used to clear the "already notified" status
- of a tag which is not associated with a task. Before that, calling
- @code{starpu_tag_notify_from_apps} again will not notify the successors. After
- that, the next call to @code{starpu_tag_notify_from_apps} will notify the
- successors.
- @end deftypefun
- @deftypefun void starpu_tag_remove (starpu_tag_t @var{id})
- This function releases the resources associated to tag @var{id}. It can be
- called once the corresponding task has been executed and when there is
- no other tag that depend on this tag anymore.
- @end deftypefun
- @deftypefun void starpu_tag_notify_from_apps (starpu_tag_t @var{id})
- This function explicitly unlocks tag @var{id}. It may be useful in the
- case of applications which execute part of their computation outside StarPU
- tasks (e.g. third-party libraries). It is also provided as a
- convenient tool for the programmer, for instance to entirely construct the task
- DAG before actually giving StarPU the opportunity to execute the tasks. When
- called several times on the same tag, notification will be done only on first
- call, thus implementing "OR" dependencies, until the tag is restarted using
- @code{starpu_tag_restart}.
- @end deftypefun
- @node Implicit Data Dependencies
- @section Implicit Data Dependencies
- In this section, we describe how StarPU makes it possible to insert implicit
- task dependencies in order to enforce sequential data consistency. When this
- data consistency is enabled on a specific data handle, any data access will
- appear as sequentially consistent from the application. For instance, if the
- application submits two tasks that access the same piece of data in read-only
- mode, and then a third task that access it in write mode, dependencies will be
- added between the two first tasks and the third one. Implicit data dependencies
- are also inserted in the case of data accesses from the application.
- @deftypefun void starpu_data_set_default_sequential_consistency_flag (unsigned @var{flag})
- Set the default sequential consistency flag. If a non-zero value is passed, a
- sequential data consistency will be enforced for all handles registered after
- this function call, otherwise it is disabled. By default, StarPU enables
- sequential data consistency. It is also possible to select the data consistency
- mode of a specific data handle with the
- @code{starpu_data_set_sequential_consistency_flag} function.
- @end deftypefun
- @deftypefun unsigned starpu_data_get_default_sequential_consistency_flag (void)
- Return the default sequential consistency flag
- @end deftypefun
- @deftypefun void starpu_data_set_sequential_consistency_flag (starpu_data_handle_t @var{handle}, unsigned @var{flag})
- Sets the data consistency mode associated to a data handle. The consistency
- mode set using this function has the priority over the default mode which can
- be set with @code{starpu_data_set_default_sequential_consistency_flag}.
- @end deftypefun
- @node Performance Model API
- @section Performance Model API
- @deftp {Data Type} {enum starpu_perf_archtype}
- Enumerates the various types of architectures.
- CPU types range within STARPU_CPU_DEFAULT (1 CPU), STARPU_CPU_DEFAULT+1 (2 CPUs), ... STARPU_CPU_DEFAULT + STARPU_MAXCPUS - 1 (STARPU_MAXCPUS CPUs).
- CUDA types range within STARPU_CUDA_DEFAULT (GPU number 0), STARPU_CUDA_DEFAULT + 1 (GPU number 1), ..., STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS - 1 (GPU number STARPU_MAXCUDADEVS - 1).
- OpenCL types range within STARPU_OPENCL_DEFAULT (GPU number 0), STARPU_OPENCL_DEFAULT + 1 (GPU number 1), ..., STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS - 1 (GPU number STARPU_MAXOPENCLDEVS - 1).
- @table @asis
- @item @code{STARPU_CPU_DEFAULT}
- @item @code{STARPU_CUDA_DEFAULT}
- @item @code{STARPU_OPENCL_DEFAULT}
- @end table
- @end deftp
- @deftp {Data Type} {enum starpu_perfmodel_type}
- The possible values are:
- @table @asis
- @item @code{STARPU_PER_ARCH} for application-provided per-arch cost model functions.
- @item @code{STARPU_COMMON} for application-provided common cost model function, with per-arch factor.
- @item @code{STARPU_HISTORY_BASED} for automatic history-based cost model.
- @item @code{STARPU_REGRESSION_BASED} for automatic linear regression-based cost model (alpha * size ^ beta).
- @item @code{STARPU_NL_REGRESSION_BASED} for automatic non-linear regression-based cost mode (a * size ^ b + c).
- @end table
- @end deftp
- @deftp {Data Type} {struct starpu_perfmodel}
- @anchor{struct starpu_perfmodel}
- contains all information about a performance model. At least the
- @code{type} and @code{symbol} fields have to be filled when defining a
- performance model for a codelet. For compatibility, make sure to initialize the
- whole structure to zero, either by using explicit memset, or by letting the
- compiler implicitly do it in e.g. static storage case.
- If not provided, other fields have to be zero.
- @table @asis
- @item @code{type}
- is the type of performance model @code{enum starpu_perfmodel_type}:
- @code{STARPU_HISTORY_BASED},
- @code{STARPU_REGRESSION_BASED}, @code{STARPU_NL_REGRESSION_BASED}: No
- other fields needs to be provided, this is purely history-based. @code{STARPU_PER_ARCH}:
- @code{per_arch} has to be filled with functions which return the cost in
- micro-seconds. @code{STARPU_COMMON}: @code{cost_function} has to be filled with
- a function that returns the cost in micro-seconds on a CPU, timing on other
- archs will be determined by multiplying by an arch-specific factor.
- @item @code{const char *symbol}
- is the symbol name for the performance model, which will be used as
- file name to store the model. It must be set otherwise the model will
- be ignored.
- @item @code{double (*cost_model)(struct starpu_buffer_descr *)}
- This field is deprecated. Use instead the @code{cost_function} field.
- @item @code{double (*cost_function)(struct starpu_task *, unsigned nimpl)}
- Used by @code{STARPU_COMMON}: takes a task and
- implementation number, and must return a task duration estimation in micro-seconds.
- @item @code{size_t (*size_base)(struct starpu_task *, unsigned nimpl)}
- Used by @code{STARPU_HISTORY_BASED} and
- @code{STARPU_*REGRESSION_BASED}. If not NULL, takes a task and
- implementation number, and returns the size to be used as index for
- history and regression.
- @item @code{struct starpu_perfmodel_per_arch per_arch[STARPU_NARCH_VARIATIONS][STARPU_MAXIMPLEMENTATIONS]}
- Used by @code{STARPU_PER_ARCH}: array of @code{struct
- starpu_per_arch_perfmodel} structures.
- @item @code{unsigned is_loaded}
- Whether the performance model is already loaded from the disk.
- @item @code{unsigned benchmarking}
- Whether the performance model is still being calibrated.
- @item @code{pthread_rwlock_t model_rwlock}
- Lock to protect concurrency between loading from disk (W), updating the values
- (W), and making a performance estimation (R).
- @end table
- @end deftp
- @deftp {Data Type} {struct starpu_perfmodel_regression_model}
- @table @asis
- @item @code{double sumlny} sum of ln(measured)
- @item @code{double sumlnx} sum of ln(size)
- @item @code{double sumlnx2} sum of ln(size)^2
- @item @code{unsigned long minx} minimum size
- @item @code{unsigned long maxx} maximum size
- @item @code{double sumlnxlny} sum of ln(size)*ln(measured)
- @item @code{double alpha} estimated = alpha * size ^ beta
- @item @code{double beta}
- @item @code{unsigned valid} whether the linear regression model is valid (i.e. enough measures)
- @item @code{double a, b, c} estimaed = a size ^b + c
- @item @code{unsigned nl_valid} whether the non-linear regression model is valid (i.e. enough measures)
- @item @code{unsigned nsample} number of sample values for non-linear regression
- @end table
- @end deftp
- @deftp {Data Type} {struct starpu_perfmodel_per_arch}
- contains information about the performance model of a given arch.
- @table @asis
- @item @code{double (*cost_model)(struct starpu_buffer_descr *t)}
- This field is deprecated. Use instead the @code{cost_function} field.
- @item @code{double (*cost_function)(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl)}
- Used by @code{STARPU_PER_ARCH}, must point to functions which take a task, the
- target arch and implementation number (as mere conveniency, since the array
- is already indexed by these), and must return a task duration estimation in
- micro-seconds.
- @item @code{size_t (*size_base)(struct starpu_task *, enum
- starpu_perf_archtype arch, unsigned nimpl)}
- Same as in @ref{struct starpu_perfmodel}, but per-arch, in
- case it depends on the architecture-specific implementation.
- @item @code{struct starpu_htbl32_node *history}
- The history of performance measurements.
- @item @code{struct starpu_perfmodel_history_list *list}
- Used by @code{STARPU_HISTORY_BASED} and @code{STARPU_NL_REGRESSION_BASED},
- records all execution history measures.
- @item @code{struct starpu_perfmodel_regression_model regression}
- Used by @code{STARPU_HISTORY_REGRESION_BASED} and
- @code{STARPU_NL_REGRESSION_BASED}, contains the estimated factors of the
- regression.
- @end table
- @end deftp
- @deftypefun int starpu_perfmodel_load_symbol ({const char} *@var{symbol}, {struct starpu_perfmodel} *@var{model})
- loads a given performance model. The @var{model} structure has to be completely zero, and will be filled with the information saved in @code{$STARPU_HOME/.starpu}.
- @end deftypefun
- @deftypefun void starpu_perfmodel_debugfilepath ({struct starpu_perfmodel} *@var{model}, {enum starpu_perf_archtype} @var{arch}, char *@var{path}, size_t @var{maxlen}, unsigned nimpl)
- returns the path to the debugging information for the performance model.
- @end deftypefun
- @deftypefun void starpu_perfmodel_get_arch_name ({enum starpu_perf_archtype} @var{arch}, char *@var{archname}, size_t @var{maxlen}, unsigned nimpl)
- returns the architecture name for @var{arch}.
- @end deftypefun
- @deftypefun {enum starpu_perf_archtype} starpu_worker_get_perf_archtype (int @var{workerid})
- returns the architecture type of a given worker.
- @end deftypefun
- @deftypefun int starpu_perfmodel_list ({FILE *}@var{output})
- prints a list of all performance models on @var{output}.
- @end deftypefun
- @deftypefun void starpu_perfmodel_print ({struct starpu_perfmodel *}@var{model}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{nimpl}, {char *}@var{parameter}, {uint32_t *}footprint, {FILE *}@var{output})
- todo
- @end deftypefun
- @deftypefun int starpu_perfmodel_print_all ({struct starpu_perfmodel *}@var{model}, {char *}@var{arch}, @var{char *}parameter, {uint32_t *}@var{footprint}, {FILE *}@var{output})
- todo
- @end deftypefun
- @deftypefun void starpu_bus_print_bandwidth ({FILE *}@var{f})
- prints a matrix of bus bandwidths on @var{f}.
- @end deftypefun
- @deftypefun void starpu_bus_print_affinity ({FILE *}@var{f})
- prints the affinity devices on @var{f}.
- @end deftypefun
- @deftypefun void starpu_topology_print ({FILE *}@var{f})
- prints a description of the topology on @var{f}.
- @end deftypefun
- @deftypefun void starpu_perfmodel_update_history ({struct starpu_perfmodel *}@var{model}, {struct starpu_task *}@var{task}, {enum starpu_perf_archtype} @var{arch}, unsigned @var{cpuid}, unsigned @var{nimpl}, double @var{measured});
- This feeds the performance model @var{model} with an explicit measurement
- @var{measured}, in addition to measurements done by StarPU itself. This can be
- useful when the application already has an existing set of measurements done
- in good conditions, that StarPU could benefit from instead of doing on-line
- measurements. And example of use can be see in @ref{Performance model example}.
- @end deftypefun
- @node Profiling API
- @section Profiling API
- @deftypefun int starpu_profiling_status_set (int @var{status})
- Thie function sets the profiling status. Profiling is activated by passing
- @code{STARPU_PROFILING_ENABLE} in @var{status}. Passing
- @code{STARPU_PROFILING_DISABLE} disables profiling. Calling this function
- resets all profiling measurements. When profiling is enabled, the
- @code{profiling_info} field of the @code{struct starpu_task} structure points
- to a valid @code{struct starpu_task_profiling_info} structure containing
- information about the execution of the task.
- Negative return values indicate an error, otherwise the previous status is
- returned.
- @end deftypefun
- @deftypefun int starpu_profiling_status_get (void)
- Return the current profiling status or a negative value in case there was an error.
- @end deftypefun
- @deftypefun void starpu_set_profiling_id (int @var{new_id})
- This function sets the ID used for profiling trace filename. It needs to be
- called before starpu_init.
- @end deftypefun
- @deftp {Data Type} {struct starpu_task_profiling_info}
- This structure contains information about the execution of a task. It is
- accessible from the @code{.profiling_info} field of the @code{starpu_task}
- structure if profiling was enabled. The different fields are:
- @table @asis
- @item @code{struct timespec submit_time}
- Date of task submission (relative to the initialization of StarPU).
- @item @code{struct timespec push_start_time}
- Time when the task was submitted to the scheduler.
- @item @code{struct timespec push_end_time}
- Time when the scheduler finished with the task submission.
- @item @code{struct timespec pop_start_time}
- Time when the scheduler started to be requested for a task, and eventually gave
- that task.
- @item @code{struct timespec pop_end_time}
- Time when the scheduler finished providing the task for execution.
- @item @code{struct timespec acquire_data_start_time}
- Time when the worker started fetching input data.
- @item @code{struct timespec acquire_data_end_time}
- Time when the worker finished fetching input data.
- @item @code{struct timespec start_time}
- Date of task execution beginning (relative to the initialization of StarPU).
- @item @code{struct timespec end_time}
- Date of task execution termination (relative to the initialization of StarPU).
- @item @code{struct timespec release_data_start_time}
- Time when the worker started releasing data.
- @item @code{struct timespec release_data_end_time}
- Time when the worker finished releasing data.
- @item @code{struct timespec callback_start_time}
- Time when the worker started the application callback for the task.
- @item @code{struct timespec callback_end_time}
- Time when the worker finished the application callback for the task.
- @item @code{workerid}
- Identifier of the worker which has executed the task.
- @item @code{uint64_t used_cycles}
- Number of cycles used by the task, only available in the MoviSim
- @item @code{uint64_t stall_cycles}
- Number of cycles stalled within the task, only available in the MoviSim
- @item @code{double power_consumed}
- Power consumed by the task, only available in the MoviSim
- @end table
- @end deftp
- @deftp {Data Type} {struct starpu_worker_profiling_info}
- This structure contains the profiling information associated to a
- worker. The different fields are:
- @table @asis
- @item @code{struct timespec start_time}
- Starting date for the reported profiling measurements.
- @item @code{struct timespec total_time}
- Duration of the profiling measurement interval.
- @item @code{struct timespec executing_time}
- Time spent by the worker to execute tasks during the profiling measurement interval.
- @item @code{struct timespec sleeping_time}
- Time spent idling by the worker during the profiling measurement interval.
- @item @code{int executed_tasks}
- Number of tasks executed by the worker during the profiling measurement interval.
- @item @code{uint64_t used_cycles}
- Number of cycles used by the worker, only available in the MoviSim
- @item @code{uint64_t stall_cycles}
- Number of cycles stalled within the worker, only available in the MoviSim
- @item @code{double power_consumed}
- Power consumed by the worker, only available in the MoviSim
- @end table
- @end deftp
- @deftypefun int starpu_worker_get_profiling_info (int @var{workerid}, {struct starpu_worker_profiling_info *}@var{worker_info})
- Get the profiling info associated to the worker identified by @var{workerid},
- and reset the profiling measurements. If the @var{worker_info} argument is
- NULL, only reset the counters associated to worker @var{workerid}.
- Upon successful completion, this function returns 0. Otherwise, a negative
- value is returned.
- @end deftypefun
- @deftp {Data Type} {struct starpu_bus_profiling_info}
- The different fields are:
- @table @asis
- @item @code{struct timespec start_time}
- Time of bus profiling startup.
- @item @code{struct timespec total_time}
- Total time of bus profiling.
- @item @code{int long long transferred_bytes}
- Number of bytes transferred during profiling.
- @item @code{int transfer_count}
- Number of transfers during profiling.
- @end table
- @end deftp
- @deftypefun int starpu_bus_get_profiling_info (int @var{busid}, {struct starpu_bus_profiling_info *}@var{bus_info})
- Get the profiling info associated to the worker designated by @var{workerid},
- and reset the profiling measurements. If worker_info is NULL, only reset the
- counters.
- @end deftypefun
- @deftypefun int starpu_bus_get_count (void)
- Return the number of buses in the machine.
- @end deftypefun
- @deftypefun int starpu_bus_get_id (int @var{src}, int @var{dst})
- Return the identifier of the bus between @var{src} and @var{dst}
- @end deftypefun
- @deftypefun int starpu_bus_get_src (int @var{busid})
- Return the source point of bus @var{busid}
- @end deftypefun
- @deftypefun int starpu_bus_get_dst (int @var{busid})
- Return the destination point of bus @var{busid}
- @end deftypefun
- @deftypefun double starpu_timing_timespec_delay_us ({struct timespec} *@var{start}, {struct timespec} *@var{end})
- Returns the time elapsed between @var{start} and @var{end} in microseconds.
- @end deftypefun
- @deftypefun double starpu_timing_timespec_to_us ({struct timespec} *@var{ts})
- Converts the given timespec @var{ts} into microseconds.
- @end deftypefun
- @deftypefun void starpu_bus_profiling_helper_display_summary (void)
- Displays statistics about the bus on stderr. if the environment
- variable @code{STARPU_BUS_STATS} is defined. The function is called
- automatically by @code{starpu_shutdown()}.
- @end deftypefun
- @deftypefun void starpu_worker_profiling_helper_display_summary (void)
- Displays statistics about the workers on stderr if the environment
- variable @code{STARPU_WORKER_STATS} is defined. The function is called
- automatically by @code{starpu_shutdown()}.
- @end deftypefun
- @deftypefun void starpu_memory_display_stats ()
- Display statistics about the current data handles registered within
- StarPU. StarPU must have been configured with the option
- @code{----enable-memory-stats} (@pxref{Memory feedback}).
- @end deftypefun
- @node CUDA extensions
- @section CUDA extensions
- @defmac STARPU_USE_CUDA
- This macro is defined when StarPU has been installed with CUDA
- support. It should be used in your code to detect the availability of
- CUDA as shown in @ref{Full source code for the 'Scaling a Vector' example}.
- @end defmac
- @deftypefun cudaStream_t starpu_cuda_get_local_stream (void)
- This function gets the current worker's CUDA stream.
- StarPU provides a stream for every CUDA device controlled by StarPU. This
- function is only provided for convenience so that programmers can easily use
- asynchronous operations within codelets without having to create a stream by
- hand. Note that the application is not forced to use the stream provided by
- @code{starpu_cuda_get_local_stream} and may also create its own streams.
- Synchronizing with @code{cudaThreadSynchronize()} is allowed, but will reduce
- the likelihood of having all transfers overlapped.
- @end deftypefun
- @deftypefun {const struct cudaDeviceProp *} starpu_cuda_get_device_properties (unsigned @var{workerid})
- This function returns a pointer to device properties for worker @var{workerid}
- (assumed to be a CUDA worker).
- @end deftypefun
- @deftypefun void starpu_cuda_report_error ({const char *}@var{func}, {const char *}@var{file}, int @var{line}, cudaError_t @var{status})
- Report a CUDA error.
- @end deftypefun
- @defmac STARPU_CUDA_REPORT_ERROR (cudaError_t @var{status})
- Calls starpu_cuda_report_error, passing the current function, file and line
- position.
- @end defmac
- @deftypefun int starpu_cuda_copy_async_sync ({void *}@var{src_ptr}, unsigned @var{src_node}, {void *}@var{dst_ptr}, unsigned @var{dst_node}, size_t @var{ssize}, cudaStream_t @var{stream}, {enum cudaMemcpyKind} @var{kind})
- Copy @var{ssize} bytes from the pointer @var{src_ptr} on
- @var{src_node} to the pointer @var{dst_ptr} on @var{dst_node}.
- The function first tries to copy the data asynchronous (unless
- @var{stream} is @code{NULL}. If the asynchronous copy fails or if
- @var{stream} is @code{NULL}, it copies the data synchronously.
- The function returns @code{-EAGAIN} if the asynchronous launch was
- successfull. It returns 0 if the synchronous copy was successful, or
- fails otherwise.
- @end deftypefun
- @deftypefun void starpu_cuda_set_device (unsigned @var{devid})
- Calls @code{cudaSetDevice(devid)} or @code{cudaGLSetGLDevice(devid)}, according to
- whether @code{devid} is among the @code{cuda_opengl_interoperability} field of
- the @code{starpu_conf} structure.
- @end deftypefun
- @deftypefun void starpu_helper_cublas_init (void)
- This function initializes CUBLAS on every CUDA device.
- The CUBLAS library must be initialized prior to any CUBLAS call. Calling
- @code{starpu_helper_cublas_init} will initialize CUBLAS on every CUDA device
- controlled by StarPU. This call blocks until CUBLAS has been properly
- initialized on every device.
- @end deftypefun
- @deftypefun void starpu_helper_cublas_shutdown (void)
- This function synchronously deinitializes the CUBLAS library on every CUDA device.
- @end deftypefun
- @deftypefun void starpu_cublas_report_error ({const char *}@var{func}, {const char *}@var{file}, int @var{line}, cublasStatus @var{status})
- Report a cublas error.
- @end deftypefun
- @defmac STARPU_CUBLAS_REPORT_ERROR (cublasStatus @var{status})
- Calls starpu_cublas_report_error, passing the current function, file and line
- position.
- @end defmac
- @node OpenCL extensions
- @section OpenCL extensions
- @menu
- * Writing OpenCL kernels:: Writing OpenCL kernels
- * Compiling OpenCL kernels:: Compiling OpenCL kernels
- * Loading OpenCL kernels:: Loading OpenCL kernels
- * OpenCL statistics:: Collecting statistics from OpenCL
- * OpenCL utilities:: Utilities for OpenCL
- @end menu
- @defmac STARPU_USE_OPENCL
- This macro is defined when StarPU has been installed with OpenCL
- support. It should be used in your code to detect the availability of
- OpenCL as shown in @ref{Full source code for the 'Scaling a Vector' example}.
- @end defmac
- @node Writing OpenCL kernels
- @subsection Writing OpenCL kernels
- @deftypefun void starpu_opencl_get_context (int @var{devid}, {cl_context *}@var{context})
- Places the OpenCL context of the device designated by @var{devid} into @var{context}.
- @end deftypefun
- @deftypefun void starpu_opencl_get_device (int @var{devid}, {cl_device_id *}@var{device})
- Places the cl_device_id corresponding to @var{devid} in @var{device}.
- @end deftypefun
- @deftypefun void starpu_opencl_get_queue (int @var{devid}, {cl_command_queue *}@var{queue})
- Places the command queue of the the device designated by @var{devid} into @var{queue}.
- @end deftypefun
- @deftypefun void starpu_opencl_get_current_context ({cl_context *}@var{context})
- Return the context of the current worker.
- @end deftypefun
- @deftypefun void starpu_opencl_get_current_queue ({cl_command_queue *}@var{queue})
- Return the computation kernel command queue of the current worker.
- @end deftypefun
- @deftypefun int starpu_opencl_set_kernel_args ({cl_int *}@var{err}, {cl_kernel *}@var{kernel}, ...)
- Sets the arguments of a given kernel. The list of arguments must be given as
- (size_t @var{size_of_the_argument}, cl_mem * @var{pointer_to_the_argument}).
- The last argument must be 0. Returns the number of arguments that were
- successfully set. In case of failure, returns the id of the argument
- that could not be set and @var{err} is set to the error returned by
- OpenCL. Otherwise, returns the number of arguments that were set.
- @cartouche
- @smallexample
- int n;
- cl_int err;
- cl_kernel kernel;
- n = starpu_opencl_set_kernel_args(&err, 2, &kernel,
- sizeof(foo), &foo,
- sizeof(bar), &bar,
- 0);
- if (n != 2)
- fprintf(stderr, "Error : %d\n", err);
- @end smallexample
- @end cartouche
- @end deftypefun
- @node Compiling OpenCL kernels
- @subsection Compiling OpenCL kernels
- Source codes for OpenCL kernels can be stored in a file or in a
- string. StarPU provides functions to build the program executable for
- each available OpenCL device as a @code{cl_program} object. This
- program executable can then be loaded within a specific queue as
- explained in the next section. These are only helpers, Applications
- can also fill a @code{starpu_opencl_program} array by hand for more advanced
- use (e.g. different programs on the different OpenCL devices, for
- relocation purpose for instance).
- @deftp {Data Type} {struct starpu_opencl_program}
- Stores the OpenCL programs as compiled for the different OpenCL
- devices. The different fields are:
- @table @asis
- @item @code{cl_program programs[STARPU_MAXOPENCLDEVS]}
- Stores each program for each OpenCL device.
- @end table
- @end deftp
- @deftypefun int starpu_opencl_load_opencl_from_file ({const char} *@var{source_file_name}, {struct starpu_opencl_program} *@var{opencl_programs}, {const char}* @var{build_options})
- @anchor{starpu_opencl_load_opencl_from_file}
- This function compiles an OpenCL source code stored in a file.
- @end deftypefun
- @deftypefun int starpu_opencl_load_opencl_from_string ({const char} *@var{opencl_program_source}, {struct starpu_opencl_program} *@var{opencl_programs}, {const char}* @var{build_options})
- This function compiles an OpenCL source code stored in a string.
- @end deftypefun
- @deftypefun int starpu_opencl_unload_opencl ({struct starpu_opencl_program} *@var{opencl_programs})
- This function unloads an OpenCL compiled code.
- @end deftypefun
- @deftypefun void starpu_opencl_load_program_source ({const char *}@var{source_file_name}, char *@var{located_file_name}, char *@var{located_dir_name}, char *@var{opencl_program_source})
- @anchor{starpu_opencl_load_program_source}
- Store the contents of the file @var{source_file_name} in the buffer
- @var{opencl_program_source}. The file @var{source_file_name} can be
- located in the current directory, or in the directory specified by the
- environment variable @code{STARPU_OPENCL_PROGRAM_DIR} (@pxref{STARPU_OPENCL_PROGRAM_DIR}), or in the
- directory @code{share/starpu/opencl} of the installation directory of
- StarPU, or in the source directory of StarPU.
- When the file is found, @code{located_file_name} is the full name of
- the file as it has been located on the system, @code{located_dir_name}
- the directory where it has been located. Otherwise, they are both set
- to the empty string.
- @end deftypefun
- @deftypefun int starpu_opencl_compile_opencl_from_file ({const char *}@var{source_file_name}, {const char *} @var{build_options})
- Compile the OpenCL kernel stored in the file @code{source_file_name}
- with the given options @code{build_options} and stores the result in
- the directory @code{$STARPU_HOME/.starpu/opencl} with the same
- filename as @code{source_file_name}. The compilation is done for every
- OpenCL device, and the filename is suffixed with the vendor id and the
- device id of the OpenCL device.
- @end deftypefun
- @deftypefun int starpu_opencl_compile_opencl_from_string ({const char *}@var{opencl_program_source}, {const char *}@var{file_name}, {const char* }@var{build_options})
- Compile the OpenCL kernel in the string @code{opencl_program_source}
- with the given options @code{build_options} and stores the result in
- the directory @code{$STARPU_HOME/.starpu/opencl}
- with the filename
- @code{file_name}. The compilation is done for every
- OpenCL device, and the filename is suffixed with the vendor id and the
- device id of the OpenCL device.
- @end deftypefun
- @deftypefun int starpu_opencl_load_binary_opencl ({const char *}@var{kernel_id}, {struct starpu_opencl_program *}@var{opencl_programs})
- Compile the binary OpenCL kernel identified with @var{id}. For every
- OpenCL device, the binary OpenCL kernel will be loaded from the file
- @code{$STARPU_HOME/.starpu/opencl/<kernel_id>.<device_type>.vendor_id_<vendor_id>_device_id_<device_id>}.
- @end deftypefun
- @node Loading OpenCL kernels
- @subsection Loading OpenCL kernels
- @deftypefun int starpu_opencl_load_kernel (cl_kernel *@var{kernel}, cl_command_queue *@var{queue}, {struct starpu_opencl_program} *@var{opencl_programs}, {const char} *@var{kernel_name}, int @var{devid})
- Create a kernel @var{kernel} for device @var{devid}, on its computation command
- queue returned in @var{queue}, using program @var{opencl_programs} and name
- @var{kernel_name}
- @end deftypefun
- @deftypefun int starpu_opencl_release_kernel (cl_kernel @var{kernel})
- Release the given @var{kernel}, to be called after kernel execution.
- @end deftypefun
- @node OpenCL statistics
- @subsection OpenCL statistics
- @deftypefun int starpu_opencl_collect_stats (cl_event @var{event})
- This function allows to collect statistics on a kernel execution.
- After termination of the kernels, the OpenCL codelet should call this function
- to pass it the even returned by @code{clEnqueueNDRangeKernel}, to let StarPU
- collect statistics about the kernel execution (used cycles, consumed power).
- @end deftypefun
- @node OpenCL utilities
- @subsection OpenCL utilities
- @deftypefun {const char *} starpu_opencl_error_string (cl_int @var{status})
- Return the error message in English corresponding to @var{status}, an
- OpenCL error code.
- @end deftypefun
- @deftypefun void starpu_opencl_display_error ({const char *}@var{func}, {const char *}@var{file}, int @var{line}, {const char *}@var{msg}, cl_int @var{status})
- Given a valid error @var{status}, prints the corresponding error message on
- stdout, along with the given function name @var{func}, the given filename
- @var{file}, the given line number @var{line} and the given message @var{msg}.
- @end deftypefun
- @defmac STARPU_OPENCL_DISPLAY_ERROR (cl_int @var{status})
- Call the function @code{starpu_opencl_display_error} with the given
- error @var{status}, the current function name, current file and line
- number, and a empty message.
- @end defmac
- @deftypefun void starpu_opencl_report_error ({const char *}@var{func}, {const char *}@var{file}, int @var{line}, {const char *}@var{msg}, cl_int @var{status})
- Call the function @code{starpu_opencl_display_error} and abort.
- @end deftypefun
- @defmac STARPU_OPENCL_REPORT_ERROR (cl_int @var{status})
- Call the function @code{starpu_opencl_report_error} with the given
- error @var{status}, with the current function name, current file and
- line number, and a empty message.
- @end defmac
- @defmac STARPU_OPENCL_REPORT_ERROR_WITH_MSG ({const char *}@var{msg}, cl_int @var{status})
- Call the function @code{starpu_opencl_report_error} with the given
- message and the given error @var{status}, with the current function
- name, current file and line number.
- @end defmac
- @deftypefun cl_int starpu_opencl_allocate_memory ({cl_mem *}@var{addr}, size_t @var{size}, cl_mem_flags @var{flags})
- Allocate @var{size} bytes of memory, stored in @var{addr}. @var{flags} must be a
- valid combination of cl_mem_flags values.
- @end deftypefun
- @deftypefun cl_int starpu_opencl_copy_ram_to_opencl ({void *}@var{ptr}, unsigned @var{src_node}, cl_mem @var{buffer}, unsigned @var{dst_node}, size_t @var{size}, size_t @var{offset}, {cl_event *}@var{event}, {int *}@var{ret})
- Copy @var{size} bytes from the given @var{ptr} on
- RAM @var{src_node} to the given @var{buffer} on OpenCL @var{dst_node}.
- @var{offset} is the offset, in bytes, in @var{buffer}.
- if @var{event} is NULL, the copy is synchronous, i.e the queue is
- synchronised before returning. If non NULL, @var{event} can be used
- after the call to wait for this particular copy to complete.
- This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL error code
- otherwise. The integer pointed to by @var{ret} is set to -EAGAIN if the asynchronous launch
- was successful, or to 0 if event was NULL.
- @end deftypefun
- @deftypefun cl_int starpu_opencl_copy_opencl_to_ram (cl_mem @var{buffer}, unsigned @var{src_node}, void *@var{ptr}, unsigned @var{dst_node}, size_t @var{size}, size_t @var{offset}, {cl_event *}@var{event}, {int *}@var{ret})
- Copy @var{size} bytes asynchronously from the given @var{buffer} on
- OpenCL @var{src_node} to the given @var{ptr} on RAM @var{dst_node}.
- @var{offset} is the offset, in bytes, in @var{buffer}.
- if @var{event} is NULL, the copy is synchronous, i.e the queue is
- synchronised before returning. If non NULL, @var{event} can be used
- after the call to wait for this particular copy to complete.
- This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL error code
- otherwise. The integer pointed to by @var{ret} is set to -EAGAIN if the asynchronous launch
- was successful, or to 0 if event was NULL.
- @end deftypefun
- @deftypefun cl_int starpu_opencl_copy_opencl_to_opencl (cl_mem @var{src}, unsigned @var{src_node}, size_t @var{src_offset}, cl_mem @var{dst}, unsigned @var{dst_node}, size_t @var{dst_offset}, size_t @var{size}, {cl_event *}@var{event}, {int *}@var{ret})
- Copy @var{size} bytes asynchronously from byte offset @var{src_offset} of
- @var{src} on OpenCL @var{src_node} to byte offset @var{dst_offset} of @var{dst} on
- OpenCL @var{dst_node}.
- if @var{event} is NULL, the copy is synchronous, i.e the queue is
- synchronised before returning. If non NULL, @var{event} can be used
- after the call to wait for this particular copy to complete.
- This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL error code
- otherwise. The integer pointed to by @var{ret} is set to -EAGAIN if the asynchronous launch
- was successful, or to 0 if event was NULL.
- @end deftypefun
- @deftypefun cl_int starpu_opencl_copy_async_sync (uintptr_t @var{src}, size_t @var{src_offset}, unsigned @var{src_node}, uintptr_t @var{dst}, size_t @var{dst_offset}, unsigned @var{dst_node}, size_t @var{size}, {cl_event *}@var{event})
- Copy @var{size} bytes from byte offset @var{src_offset} of
- @var{src} on @var{src_node} to byte offset @var{dst_offset} of @var{dst} on
- @var{dst_node}. if @var{event} is NULL, the copy is synchronous, i.e the queue is
- synchronised before returning. If non NULL, @var{event} can be used
- after the call to wait for this particular copy to complete.
- The function returns @code{-EAGAIN} if the asynchronous launch was
- successfull. It returns 0 if the synchronous copy was successful, or
- fails otherwise.
- @end deftypefun
- @node Miscellaneous helpers
- @section Miscellaneous helpers
- @deftypefun int starpu_data_cpy (starpu_data_handle_t @var{dst_handle}, starpu_data_handle_t @var{src_handle}, int @var{asynchronous}, void (*@var{callback_func})(void*), void *@var{callback_arg})
- Copy the content of the @var{src_handle} into the @var{dst_handle} handle.
- The @var{asynchronous} parameter indicates whether the function should
- block or not. In the case of an asynchronous call, it is possible to
- synchronize with the termination of this operation either by the means of
- implicit dependencies (if enabled) or by calling
- @code{starpu_task_wait_for_all()}. If @var{callback_func} is not @code{NULL},
- this callback function is executed after the handle has been copied, and it is
- given the @var{callback_arg} pointer as argument.
- @end deftypefun
- @deftypefun void starpu_execute_on_each_worker (void (*@var{func})(void *), void *@var{arg}, uint32_t @var{where})
- This function executes the given function on a subset of workers.
- When calling this method, the offloaded function specified by the first argument is
- executed by every StarPU worker that may execute the function.
- The second argument is passed to the offloaded function.
- The last argument specifies on which types of processing units the function
- should be executed. Similarly to the @var{where} field of the
- @code{struct starpu_codelet} structure, it is possible to specify that the function
- should be executed on every CUDA device and every CPU by passing
- @code{STARPU_CPU|STARPU_CUDA}.
- This function blocks until the function has been executed on every appropriate
- processing units, so that it may not be called from a callback function for
- instance.
- @end deftypefun
|