Browse Source

Merge branch 'fpga' of gitlab.inria.fr:starpu/starpu into fpga

Samuel Thibault 4 years ago
parent
commit
8b4e256881
100 changed files with 639 additions and 240 deletions
  1. 1 0
      AUTHORS
  2. 1 0
      ChangeLog
  3. 59 35
      doc/doxygen/chapters/380_offline_performance_tools.doxy
  4. 4 3
      doc/doxygen/chapters/410_mpi_support.doxy
  5. 1 1
      doc/doxygen/chapters/470_simgrid.doxy
  6. BIN
      doc/doxygen/chapters/images/starvz_visu.eps
  7. BIN
      doc/doxygen/chapters/images/starvz_visu.pdf
  8. BIN
      doc/doxygen/chapters/images/starvz_visu.png
  9. BIN
      doc/doxygen/chapters/images/starvz_visu_r.pdf
  10. BIN
      doc/doxygen/chapters/images/starvz_visu_r.png
  11. 27 0
      examples/native_fortran/nf_vector.f90
  12. 1 6
      examples/spmv/dw_block_spmv.c
  13. 7 7
      examples/worker_collections/worker_list_example.c
  14. 7 7
      examples/worker_collections/worker_tree_example.c
  15. 29 0
      include/fstarpu_mod.f90
  16. 5 0
      include/starpu.h
  17. 8 0
      include/starpu_data_filters.h
  18. 8 1
      include/starpu_task.h
  19. 5 6
      include/starpu_task_util.h
  20. 21 3
      include/starpu_util.h
  21. 14 2
      include/starpu_worker.h
  22. 1 0
      mpi/src/starpu_mpi_task_insert.c
  23. 1 0
      src/Makefile.am
  24. 4 0
      src/common/barrier.h
  25. 5 0
      src/common/barrier_counter.h
  26. 19 12
      src/common/fxt.h
  27. 4 4
      src/common/graph.c
  28. 4 0
      src/common/graph.h
  29. 1 1
      src/common/knobs.c
  30. 4 0
      src/common/knobs.h
  31. 4 0
      src/common/rwlock.h
  32. 5 4
      src/common/thread.c
  33. 6 2
      src/common/thread.h
  34. 5 1
      src/common/timing.h
  35. 4 0
      src/common/uthash.h
  36. 1 1
      src/common/utils.c
  37. 7 3
      src/common/utils.h
  38. 4 0
      src/core/combined_workers.h
  39. 1 1
      src/core/debug.c
  40. 4 0
      src/core/debug.h
  41. 4 0
      src/core/dependencies/cg.h
  42. 4 0
      src/core/dependencies/data_concurrency.h
  43. 6 1
      src/core/dependencies/implicit_data_deps.h
  44. 4 0
      src/core/dependencies/tags.h
  45. 5 0
      src/core/detect_combined_workers.h
  46. 4 0
      src/core/disk.h
  47. 1 1
      src/core/disk_ops/disk_unistd.c
  48. 1 1
      src/core/disk_ops/disk_unistd_o_direct.c
  49. 2 2
      src/core/disk_ops/unistd/disk_unistd_global.c
  50. 6 1
      src/core/disk_ops/unistd/disk_unistd_global.h
  51. 4 0
      src/core/drivers.h
  52. 4 0
      src/core/errorcheck.h
  53. 4 0
      src/core/idle_hook.h
  54. 4 0
      src/core/jobs.h
  55. 4 0
      src/core/perfmodel/multiple_regression.h
  56. 1 1
      src/core/perfmodel/perfmodel.c
  57. 7 3
      src/core/perfmodel/perfmodel.h
  58. 4 0
      src/core/perfmodel/regression.h
  59. 4 0
      src/core/progress_hook.h
  60. 17 17
      src/core/sched_ctx.c
  61. 5 0
      src/core/sched_ctx.h
  62. 25 21
      src/core/sched_ctx_list.h
  63. 6 2
      src/core/sched_policy.h
  64. 42 33
      src/core/simgrid.c
  65. 14 0
      src/core/simgrid.h
  66. 3 0
      src/core/task.h
  67. 1 1
      src/core/task_bundle.c
  68. 6 2
      src/core/task_bundle.h
  69. 1 1
      src/core/topology.c
  70. 6 2
      src/core/topology.h
  71. 16 6
      src/core/workers.c
  72. 10 6
      src/core/workers.h
  73. 1 1
      src/datawizard/coherency.c
  74. 5 2
      src/datawizard/coherency.h
  75. 4 0
      src/datawizard/copy_driver.h
  76. 5 0
      src/datawizard/data_request.h
  77. 4 0
      src/datawizard/datastats.h
  78. 3 0
      src/datawizard/datawizard.h
  79. 5 0
      src/datawizard/filters.h
  80. 4 0
      src/datawizard/footprint.h
  81. 5 0
      src/datawizard/interfaces/bcsr_filters.c
  82. 12 11
      src/datawizard/interfaces/data_interface.h
  83. 5 0
      src/datawizard/malloc.h
  84. 4 3
      src/datawizard/memalloc.c
  85. 4 0
      src/datawizard/memalloc.h
  86. 4 0
      src/datawizard/memory_manager.h
  87. 4 0
      src/datawizard/memory_nodes.h
  88. 4 0
      src/datawizard/memstats.h
  89. 4 0
      src/datawizard/node_ops.h
  90. 4 0
      src/datawizard/sort_data_handles.h
  91. 4 0
      src/datawizard/write_back.h
  92. 6 2
      src/debug/starpu_debug_helpers.h
  93. 14 14
      src/debug/traces/starpu_fxt.c
  94. 4 0
      src/debug/traces/starpu_fxt.h
  95. 6 6
      src/debug/traces/starpu_fxt_mpi.c
  96. 4 0
      src/drivers/cpu/driver_cpu.h
  97. 4 0
      src/drivers/cuda/driver_cuda.h
  98. 4 0
      src/drivers/disk/driver_disk.h
  99. 5 0
      src/drivers/driver_common/driver_common.h
  100. 0 0
      src/drivers/max/driver_fpga_init.c

+ 1 - 0
AUTHORS

@@ -31,6 +31,7 @@ Namyst Raymond, Université de Bordeaux, <raymond.namyst@labri.fr>
 Nesi Lucas Leandro, Federal University of Rio Grande do Sul (UFRGS), <llnesi@inf.ufrgs.br>
 Pablo Joris, Inria, <joris.pablo@orange.fr>
 Pasqualinotto Damien, Université de Bordeaux, <dam.pasqualinotto@wanadoo.fr>
+Pinto Vinicius Garcia, <vgpinto@inf.ufrgs.br>
 Pitoiset Samuel, Inria, <samuel.pitoiset@inria.fr>
 Quôc-Dinh Nguyen, IT Sud-Paris, <nguyen.quocdinh@gmail.com>
 Roelandt Cyril, Inria, <cyril.roelandt@inria.fr>

+ 1 - 0
ChangeLog

@@ -86,6 +86,7 @@ Small features:
   * Add STARPU_SCHED_SORTED_ABOVE and STARPU_SCHED_SORTED_BELOW environment
     variables.
   * Add STARPU_SCHED_SIMPLE_PRE_DECISION.
+  * Add starpu_bcsr_filter_canonical_block_get_nchildren.
 
 StarPU 1.3.7
 ====================================================================

+ 59 - 35
doc/doxygen/chapters/380_offline_performance_tools.doxy

@@ -753,66 +753,90 @@ to a less optimal solution. This increases even more computation time.
 
 \section starvz Trace visualization with StarVZ
 
-Creating views with StarVZ (see: https://github.com/schnorr/starvz) is made up of two steps. The initial
-stage consists of a pre-processing of the traces generated by the application.
-The second step consists of the analysis itself and is carried out with the
-aid of R packages. To download and install StarVZ, it is necessary to have R,
-pajeng and the following packages:
+Creating views with StarVZ (see: https://github.com/schnorr/starvz) is
+made up of two steps. The initial stage consists of a pre-processing
+of the traces generated by the application, while the second one
+consists of the analysis itself and is carried out with R packages'
+aid. StarVZ is available at CRAN
+(https://cran.r-project.org/package=starvz) and depends on pj_dump
+(from pajeng) and rec2csv (from recutils).
+
+To download and install StarVZ, it is necessary to have R,
+pajeng, and recutils:
 
 \verbatim
-# For pajeng
-apt install -y git cmake build-essential libboost-dev asciidoc flex bison
-git clone git://github.com/schnorr/pajeng.git
-mkdir -p pajeng/b ; cd pajeng/b
-cmake ..
-make
+# For pj_dump and rec2csv
+apt install -y pajeng recutils
 
-# For R tidyverse
+# For R
 apt install -y r-base libxml2-dev libssl-dev libcurl4-openssl-dev libgit2-dev libboost-dev
 \endverbatim
 
-To install the StarVZ the following commands can be used:
+To install the StarVZ, the following command can be used:
 
 \verbatim
-git clone https://github.com/schnorr/starvz.git
-echo "install.packages(c('tidyverse', 'devtools'), repos = 'https://cloud.r-project.org')" | R --vanilla
-echo "library(devtools); devtools::install_local(path='./starvz/R_package')" | R --vanilla
+echo "install.packages('starvz', repos = 'https://cloud.r-project.org')" | R --vanilla
 \endverbatim
 
-To generate traces from an application, it is necessary to set \ref STARPU_GENERATE_TRACE.
-and build StarPU with FxT. Then, Step 1 of StarVZ can be used on a folder with
-StarPU FxT traces:
+To generate traces from an application, it is necessary to set \ref STARPU_GENERATE_TRACE
+and build StarPU with FxT. Then, StarVZ can be used on a folder with
+StarPU FxT traces to produce a default view:
+
+\verbatim
+export PATH=$(Rscript -e 'cat(system.file("tools/", package = "starvz"), sep="\n")'):$PATH
+
+starvz /foo/path-to-fxt-files
+\endverbatim
+
+An example of default view:
+
+\image html starvz_visu.png
+\image latex starvz_visu.pdf "" width=\textwidth
 
+One can also use existing trace files (paje.trace, tasks.rec,
+data.rec, papi.rec and dag.dot) skipping the StarVZ internal call to
+starpu_fxt_tool with:
 \verbatim
-export PATH=starvz/:$PATH
-export PATH=pajeng/b:$PATH
-export PATH=$STARPU_HOME/bin:$PATH
+starvz --use-paje-trace /foo/path-to-trace-files
+\endverbatim
 
-./starvz/src/phase1-workflow.sh /tmp/ ""
+Alternatively, each StarVZ step can be executed separately. Step 1 can
+be used on a folder with:
+\verbatim
+starvz -1 /foo/path-to-fxt-files
 \endverbatim
 
-Then the second step can be executed directly in R, StarVZ enables a set of
-different plots that can be configured on a .yaml file. A default file is provided
-<c>full_config.yaml</c>; also the options can be changed directly in R.
+Then the second step can be
+executed directly in R. StarVZ enables a set of different plots that
+can be configured on a .yaml file. A default file is provided
+(<c>default.yaml</c>); also, the options can be changed directly in
+R.
 
 \verbatim
 library(starvz)
-dtrace <- the_fast_reader_function("./")
+library(dplyr)
+
+dtrace <- starvz_read("./", selective = FALSE)
 
-pajer <- config::get(file = "starvz/full_config.yaml")
+# show idleness ratio
+dtrace$config$st$idleness = TRUE
 
-pajer$starpu$active = TRUE
-pajer$submitted$active = TRUE
-pajer$st$abe$active = TRUE
+# show ABE bound
+dtrace$config$st$abe$active = TRUE
 
-plot <- the_master_function(dtrace)
+# find the last task with dplyr
+dtrace$config$st$tasks$list = dtrace$Application %>% filter(End == max(End)) %>% .$JobId
+# show last task dependencies
+dtrace$config$st$tasks$active = TRUE
+dtrace$config$st$tasks$levels = 50
+
+plot <- starvz_plot(dtrace)
 \endverbatim
 
 An example of visualization follows:
 
-\image html starvz_visu.png
-\image latex starvz_visu.eps "" width=\textwidth
-
+\image html starvz_visu_r.png
+\image latex starvz_visu_r.pdf "" width=\textwidth
 
 \section MemoryFeedback Memory Feedback
 

+ 4 - 3
doc/doxygen/chapters/410_mpi_support.doxy

@@ -470,7 +470,7 @@ starpu_mpi_barrier(MPI_COMM_WORLD);
 \section MPIInsertTaskUtility MPI Insert Task Utility
 
 To save the programmer from having to explicit all communications, StarPU
-provides an "MPI Insert Task Utility". The principe is that the application
+provides an "MPI Insert Task Utility". The principle is that the application
 decides a distribution of the data over the MPI nodes by allocating it and
 notifying StarPU of this decision, i.e. tell StarPU which MPI node "owns"
 which data. It also decides, for each handle, an MPI tag which will be used to
@@ -571,7 +571,7 @@ to provide a dynamic policy.
 
 A function starpu_mpi_task_build() is also provided with the aim to
 only construct the task structure. All MPI nodes need to call the
-function, which posts the required send/recv on the various nodes which have to.
+function, which posts the required send/recv on the various nodes as needed.
 Only the node which is to execute the task will then return a
 valid task structure, others will return <c>NULL</c>. This node must submit the task.
 All nodes then need to call the function starpu_mpi_task_post_build() -- with the same
@@ -637,7 +637,7 @@ saves, a quick and easy way is to measure the submission time of just one of the
 MPI nodes. This can be achieved by running the application on just one MPI node
 with the following environment variables:
 
-\code
+\code{.sh}
 export STARPU_DISABLE_KERNELS=1
 export STARPU_MPI_FAKE_RANK=2
 export STARPU_MPI_FAKE_SIZE=1024
@@ -1095,6 +1095,7 @@ disabled in NewMadeleine by compiling it with the profile
 
 To build NewMadeleine, download the latest version from the website (or,
 better, use the Git version to use the most recent version), then:
+
 \code{.sh}
 cd pm2/scripts
 ./pm2-build-packages ./<the profile you chose> --prefix=<installation prefix>

+ 1 - 1
doc/doxygen/chapters/470_simgrid.doxy

@@ -23,7 +23,7 @@
 
 StarPU can use Simgrid in order to simulate execution on an arbitrary
 platform. This was tested with SimGrid from 3.11 to 3.16, and 3.18 to
-3.26. SimGrid version 3.25 needs to be configured with -Denable_msg=ON .
+3.27. SimGrid version 3.25 needs to be configured with -Denable_msg=ON .
 Other versions may have compatibility issues. 3.17 notably does not build at
 all. MPI simulation does not work with version 3.22.
 

BIN
doc/doxygen/chapters/images/starvz_visu.eps


BIN
doc/doxygen/chapters/images/starvz_visu.pdf


BIN
doc/doxygen/chapters/images/starvz_visu.png


BIN
doc/doxygen/chapters/images/starvz_visu_r.pdf


BIN
doc/doxygen/chapters/images/starvz_visu_r.png


+ 27 - 0
examples/native_fortran/nf_vector.f90

@@ -29,6 +29,7 @@ program nf_vector
         type(c_ptr) :: dh_vb    ! a pointer for the 'vb' vector data handle
         integer(c_int) :: err   ! return status for fstarpu_init
         integer(c_int) :: ncpu  ! number of cpus workers
+        integer(c_int) :: bool_ret
 
         allocate(va(5))
         va = (/ (i,i=1,5) /)
@@ -49,6 +50,26 @@ program nf_vector
                 stop 77
         end if
 
+        ! illustrate use of pause/resume/is_paused
+        bool_ret = fstarpu_is_paused()
+        if (bool_ret /= 0) then
+                stop 1
+        end if
+
+        call fstarpu_pause
+
+        bool_ret = fstarpu_is_paused()
+        if (bool_ret == 0) then
+                stop 1
+        end if
+
+        call fstarpu_resume
+
+        bool_ret = fstarpu_is_paused()
+        if (bool_ret /= 0) then
+                stop 1
+        end if
+
         ! allocate an empty perfmodel structure
         perfmodel_vec = fstarpu_perfmodel_allocate()
 
@@ -73,6 +94,12 @@ program nf_vector
         ! optionally set 'where' field to CPU only
         call fstarpu_codelet_set_where(cl_vec, FSTARPU_CPU)
 
+        ! set 'type' field to SEQ (for demonstration purpose)
+        call fstarpu_codelet_set_type(cl_vec, FSTARPU_SEQ)
+
+        ! set 'max_parallelism' field to 1 (for demonstration purpose)
+        call fstarpu_codelet_set_max_parallelism(cl_vec, 1)
+
         ! add a Read-only mode data buffer to the codelet
         call fstarpu_codelet_add_buffer(cl_vec, FSTARPU_R)
 

+ 1 - 6
examples/spmv/dw_block_spmv.c

@@ -119,11 +119,6 @@ void init_problem_callback(void *arg)
 	}
 }
 
-unsigned get_bcsr_nchildren(struct starpu_data_filter *f, starpu_data_handle_t handle)
-{
-  return (unsigned)starpu_bcsr_get_nnz(handle);
-}
-
 void call_filters(void)
 {
 
@@ -131,7 +126,7 @@ void call_filters(void)
 	struct starpu_data_filter vector_in_f, vector_out_f;
 
 	bcsr_f.filter_func    = starpu_bcsr_filter_canonical_block;
-	bcsr_f.get_nchildren = get_bcsr_nchildren;
+	bcsr_f.get_nchildren = starpu_bcsr_filter_canonical_block_get_nchildren;
 	/* the children use a matrix interface ! */
 	bcsr_f.get_child_ops = starpu_bcsr_filter_canonical_block_child_ops;
 

+ 7 - 7
examples/worker_collections/worker_list_example.c

@@ -35,13 +35,13 @@ int main()
         starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, ncpus);
 
 	struct starpu_worker_collection *co = (struct starpu_worker_collection*)malloc(sizeof(struct starpu_worker_collection));
-	co->has_next = worker_list.has_next;
-	co->get_next = worker_list.get_next;
-	co->add = worker_list.add;
-	co->remove = worker_list.remove;
-	co->init = worker_list.init;
-	co->deinit = worker_list.deinit;
-	co->init_iterator = worker_list.init_iterator;
+	co->has_next = starpu_worker_list.has_next;
+	co->get_next = starpu_worker_list.get_next;
+	co->add = starpu_worker_list.add;
+	co->remove = starpu_worker_list.remove;
+	co->init = starpu_worker_list.init;
+	co->deinit = starpu_worker_list.deinit;
+	co->init_iterator = starpu_worker_list.init_iterator;
 	co->type = STARPU_WORKER_LIST;
 
 	FPRINTF(stderr, "ncpus %u\n", ncpus);

+ 7 - 7
examples/worker_collections/worker_tree_example.c

@@ -44,13 +44,13 @@ int main()
         starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, ncpus);
 
 	struct starpu_worker_collection *co = (struct starpu_worker_collection*)calloc(1, sizeof(struct starpu_worker_collection));
-	co->has_next = worker_tree.has_next;
-	co->get_next = worker_tree.get_next;
-	co->add = worker_tree.add;
-	co->remove = worker_tree.remove;
-	co->init = worker_tree.init;
-	co->deinit = worker_tree.deinit;
-	co->init_iterator = worker_tree.init_iterator;
+	co->has_next = starpu_worker_tree.has_next;
+	co->get_next = starpu_worker_tree.get_next;
+	co->add = starpu_worker_tree.add;
+	co->remove = starpu_worker_tree.remove;
+	co->init = starpu_worker_tree.init;
+	co->deinit = starpu_worker_tree.deinit;
+	co->init_iterator = starpu_worker_tree.init_iterator;
 	co->type = STARPU_WORKER_TREE;
 
 	FPRINTF(stderr, "ncpus %u \n", ncpus);

+ 29 - 0
include/fstarpu_mod.f90

@@ -103,6 +103,10 @@ module fstarpu_mod
         type(c_ptr), bind(C) :: FSTARPU_NL_REGRESSION_BASED
         type(c_ptr), bind(C) :: FSTARPU_MULTIPLE_REGRESSION_BASED
 
+        type(c_ptr), bind(C) :: FSTARPU_SEQ
+        type(c_ptr), bind(C) :: FSTARPU_SPMD
+        type(c_ptr), bind(C) :: FSTARPU_FORKJOIN
+
         ! (some) portable iso_c_binding types
         type(c_ptr), bind(C) :: FSTARPU_SZ_C_DOUBLE
         type(c_ptr), bind(C) :: FSTARPU_SZ_C_FLOAT
@@ -199,6 +203,12 @@ module fstarpu_mod
                 subroutine fstarpu_resume() bind(C,name="starpu_resume")
                 end subroutine fstarpu_resume
 
+                ! int starpu_is_paused(void);
+                function fstarpu_is_paused() bind(C,name="starpu_is_paused")
+                        use iso_c_binding, only: c_int
+                        integer(c_int) :: fstarpu_is_paused
+                end function fstarpu_is_paused
+
                 ! void starpu_shutdown(void);
                 subroutine fstarpu_shutdown () bind(C,name="starpu_shutdown")
                 end subroutine fstarpu_shutdown
@@ -713,6 +723,18 @@ module fstarpu_mod
                         type(c_ptr), value, intent(in) :: where ! C function expects an intptr_t
                 end subroutine fstarpu_codelet_set_where
 
+                subroutine fstarpu_codelet_set_type (cl, type_constant) bind(C)
+                        use iso_c_binding, only: c_ptr
+                        type(c_ptr), value, intent(in) :: cl
+                        type(c_ptr), value, intent(in) :: type_constant ! C function expects an intptr_t
+                end subroutine fstarpu_codelet_set_type
+
+                subroutine fstarpu_codelet_set_max_parallelism (cl, max_parallelism) bind(C)
+                        use iso_c_binding, only: c_ptr,c_int
+                        type(c_ptr), value, intent(in) :: cl
+                        integer(c_int), value, intent(in) :: max_parallelism
+                end subroutine fstarpu_codelet_set_max_parallelism
+
                 function fstarpu_perfmodel_allocate () bind(C)
                         use iso_c_binding, only: c_ptr
                         type(c_ptr) :: fstarpu_perfmodel_allocate
@@ -2475,6 +2497,13 @@ module fstarpu_mod
                         FSTARPU_MULTIPLE_REGRESSION_BASED = &
                                 fstarpu_get_constant(C_CHAR_"FSTARPU_MULTIPLE_REGRESSION_BASED"//C_NULL_CHAR)
 
+                        FSTARPU_SEQ = &
+                                fstarpu_get_constant(C_CHAR_"FSTARPU_SEQ"//C_NULL_CHAR)
+                        FSTARPU_SPMD = &
+                                fstarpu_get_constant(C_CHAR_"FSTARPU_SPMD"//C_NULL_CHAR)
+                        FSTARPU_FORKJOIN = &
+                                fstarpu_get_constant(C_CHAR_"FSTARPU_FORKJOIN"//C_NULL_CHAR)
+
                         ! Initialize size constants as 'c_ptr'
                         FSTARPU_SZ_C_DOUBLE        = sz_to_p(c_sizeof(FSTARPU_SZ_C_DOUBLE_dummy))
                         FSTARPU_SZ_C_FLOAT        = sz_to_p(c_sizeof(FSTARPU_SZ_C_FLOAT_dummy))

+ 5 - 0
include/starpu.h

@@ -593,6 +593,11 @@ void starpu_pause(void);
 void starpu_resume(void);
 
 /**
+   Return !0 if task processing by workers is currently paused, 0 otherwise.
+ */
+int starpu_is_paused(void);
+
+/**
    Value to be passed to starpu_get_next_bindid() and
    starpu_bind_thread_on() when binding a thread which will
    significantly eat CPU time, and should thus have its own dedicated

+ 8 - 0
include/starpu_data_filters.h

@@ -314,8 +314,16 @@ void starpu_data_partition_not_automatic(starpu_data_handle_t handle);
    Partition a block-sparse matrix into dense matrices.
    starpu_data_filter::get_child_ops needs to be set to
    starpu_bcsr_filter_canonical_block_child_ops()
+   and starpu_data_filter::get_nchildren set to
+   starpu_bcsr_filter_canonical_block_get_nchildren().
 */
 void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Return the number of children obtained with starpu_bcsr_filter_canonical_block().
+*/
+unsigned starpu_bcsr_filter_canonical_block_get_nchildren(struct starpu_data_filter *f, starpu_data_handle_t handle)
+;
 /**
    Return the child_ops of the partition obtained with starpu_bcsr_filter_canonical_block().
 */

+ 8 - 1
include/starpu_task.h

@@ -715,6 +715,7 @@ struct starpu_task
 	   when using ::STARPU_R and alike.
 	*/
 	starpu_data_handle_t handles[STARPU_NMAXBUFS];
+
 	/**
 	   Array of Data pointers to the memory node where execution
 	   will happen, managed by the DSM.
@@ -722,6 +723,7 @@ struct starpu_task
 	   This is filled by StarPU.
 	*/
 	void *interfaces[STARPU_NMAXBUFS];
+
 	/**
 	   Used only when starpu_codelet::nbuffers is \ref
 	   STARPU_VARIABLE_NBUFFERS.
@@ -809,6 +811,9 @@ struct starpu_task
 	   already executing. The callback is passed
 	   the value contained in the starpu_task::epilogue_callback_arg field.
 	   No callback is executed if the field is set to <c>NULL</c>.
+
+	   With starpu_task_insert() and alike this can be specified thanks to
+	   ::STARPU_EPILOGUE_CALLBACK followed by the function pointer.
 	*/
 	void (*epilogue_callback_func)(void *);
 
@@ -880,7 +885,8 @@ struct starpu_task
 	*/
 	void *prologue_callback_arg;
 
-	/** Optional field, the default value is <c>NULL</c>. This is a
+	/**
+	   Optional field, the default value is <c>NULL</c>. This is a
 	   function pointer of prototype <c>void (*f)(void*)</c>
 	   which specifies a possible callback. If this pointer is
 	   non-<c>NULL</c>, the callback function is executed on the host
@@ -893,6 +899,7 @@ struct starpu_task
 	   ::STARPU_PROLOGUE_CALLBACK_POP followed by the function pointer.
 	*/
 	void (*prologue_callback_pop_func)(void *);
+
 	/**
 	   Optional field, the default value is <c>NULL</c>. This is
 	   the pointer passed to the prologue_callback_pop function. This

+ 5 - 6
include/starpu_task_util.h

@@ -88,14 +88,15 @@ extern "C"
 #define STARPU_EXECUTE_ON_DATA	 (7<<STARPU_MODE_SHIFT)
 
 /**
-   Used when calling starpu_task_in_sert(), must be followed by an array of
+   Used when calling starpu_task_insert(), must be followed by an array of
    handles and the number of elements in the array (as int). This is equivalent
-   to passing the handles as separate parameters with STARPU_R/W/RW.
+   to passing the handles as separate parameters with ::STARPU_R,
+   ::STARPU_W or ::STARPU_RW.
 */
 #define STARPU_DATA_ARRAY        (8<<STARPU_MODE_SHIFT)
 
 /**
-   Used when calling starpu_task_in_sert(), must be followed by an array of
+   Used when calling starpu_task_insert(), must be followed by an array of
    struct starpu_data_descr and the number of elements in the array (as int).
    This is equivalent to passing the handles with the corresponding modes.
 */
@@ -322,8 +323,7 @@ extern "C"
 
 /**
    Used when calling starpu_task_insert() and alike, must be followed
-   by a void* specifying the value to be set in the sched_data field of the
-   task.
+   by a void* specifying the value to be set in starpu_task::sched_data
  */
 #define STARPU_TASK_SCHED_DATA (41<<STARPU_MODE_SHIFT)
 
@@ -375,7 +375,6 @@ int starpu_task_set(struct starpu_task *task, struct starpu_codelet *cl, ...);
 	starpu_task_set((task), (cl), STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__)
 #endif
 
-
 /**
    Create a task corresponding to \p cl with the following arguments.
    The argument list must be zero-terminated. The arguments

+ 21 - 3
include/starpu_util.h

@@ -95,12 +95,30 @@ extern "C"
 #endif
 
 /**
-   When building with a GNU C Compiler, defined to __attribute__((visibility ("internal")))
+   When building with a GNU C Compiler, defined to __attribute__((visibility ("default")))
 */
 #ifdef __GNUC__
-#  define STARPU_ATTRIBUTE_INTERNAL      __attribute__ ((visibility ("internal")))
+#  define STARPU_ATTRIBUTE_VISIBILITY_DEFAULT      __attribute__ ((visibility ("default")))
 #else
-#  define STARPU_ATTRIBUTE_INTERNAL
+#  define STARPU_ATTRIBUTE_VISIBILITY_DEFAULT
+#endif
+
+/**
+   When building with a GNU C Compiler, defined to #pragma GCC visibility push(hidden)
+*/
+#ifdef __GNUC__
+#  define STARPU_VISIBILITY_PUSH_HIDDEN      #pragma GCC visibility push(hidden)
+#else
+#  define STARPU_VISIBILITY_PUSH_HIDDEN
+#endif
+
+/**
+   When building with a GNU C Compiler, defined to #pragma GCC visibility pop
+*/
+#ifdef __GNUC__
+#  define STARPU_VISIBILITY_POP      #pragma GCC visibility pop
+#else
+#  define STARPU_VISIBILITY_POP
 #endif
 
 /**

+ 14 - 2
include/starpu_worker.h

@@ -156,8 +156,8 @@ struct starpu_worker_collection
 	void (*init_iterator_for_parallel_tasks)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task);
 };
 
-extern struct starpu_worker_collection worker_list;
-extern struct starpu_worker_collection worker_tree;
+extern struct starpu_worker_collection starpu_worker_list;
+extern struct starpu_worker_collection starpu_worker_tree;
 
 /**
    Return the number of workers (i.e. processing units executing
@@ -346,8 +346,20 @@ unsigned starpu_worker_get_local_memory_node(void);
 */
 unsigned starpu_worker_get_memory_node(unsigned workerid);
 
+/**
+   Return the number of memory nodes
+*/
 unsigned starpu_memory_nodes_get_count(void);
+
+/**
+   Return in \p name the name of a memory node (NUMA 0, CUDA 0, etc.)
+   \p size is the size of the \p name array.
+*/
 int starpu_memory_node_get_name(unsigned node, char *name, size_t size);
+
+/**
+   Return the number of NUMA nodes used by StarPU
+*/
 int starpu_memory_nodes_get_numa_count(void);
 
 /**

+ 1 - 0
mpi/src/starpu_mpi_task_insert.c

@@ -781,6 +781,7 @@ static struct starpu_perfmodel dumb_model =
 	.cost_function	= cost_function
 };
 
+/* FIXME: we can probably use STARPU_NOWHERE for these? */
 static
 struct starpu_codelet _starpu_mpi_redux_data_read_cl =
 {

+ 1 - 0
src/Makefile.am

@@ -416,3 +416,4 @@ dist-hook:
 		nm $$j | $(GREP) -e "U \($$look\)$$" && { echo $$j ; failed=1 ; } ; \
 	done ; \
 	[ $$failed == 0 ]
+	nm -n .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.so | grep -v " [Ua-z] " | grep -ve " _\?_\?_\?f\?starpu" | grep -ve " \(_init\|_fini\|_edata\|__bss_start\|_end\)" | (! grep .)

+ 4 - 0
src/common/barrier.h

@@ -19,6 +19,8 @@
 
 #include <starpu_thread.h>
 
+#pragma GCC visibility push(hidden)
+
 /** @file */
 
 struct _starpu_barrier
@@ -38,4 +40,6 @@ int _starpu_barrier_destroy(struct _starpu_barrier *barrier);
 
 int _starpu_barrier_wait(struct _starpu_barrier *barrier);
 
+#pragma GCC visibility pop
+
 #endif // __COMMON_BARRIER_H__

+ 5 - 0
src/common/barrier_counter.h

@@ -23,6 +23,8 @@
 #include <common/utils.h>
 #include <common/barrier.h>
 
+#pragma GCC visibility push(hidden)
+
 struct _starpu_barrier_counter
 {
 	struct _starpu_barrier barrier;
@@ -53,4 +55,7 @@ int _starpu_barrier_counter_check(struct _starpu_barrier_counter *barrier_c);
 int _starpu_barrier_counter_get_reached_start(struct _starpu_barrier_counter *barrier_c);
 
 double _starpu_barrier_counter_get_reached_flops(struct _starpu_barrier_counter *barrier_c);
+
+#pragma GCC visibility pop
+
 #endif

+ 19 - 12
src/common/fxt.h

@@ -36,6 +36,13 @@
 #include <common/utils.h>
 #include <starpu.h>
 
+#ifdef STARPU_USE_FXT
+#include <fxt/fxt.h>
+#include <fxt/fut.h>
+#endif
+
+#pragma GCC visibility push(hidden)
+
 /* some key to identify the worker kind */
 #define _STARPU_FUT_WORKER_KEY(kind) (kind + 0x100)
 #define _STARPU_FUT_KEY_WORKER(key) (key - 0x100)
@@ -268,8 +275,6 @@ static inline unsigned long _starpu_fxt_get_job_id(void)
 }
 
 #ifdef STARPU_USE_FXT
-#include <fxt/fxt.h>
-#include <fxt/fut.h>
 
 /* Some versions of FxT do not include the declaration of the function */
 #ifdef HAVE_ENABLE_FUT_FLUSH
@@ -283,10 +288,10 @@ void fut_set_filename(char *filename);
 #endif
 #endif
 
-extern int _starpu_fxt_started;
-extern int _starpu_fxt_willstart;
-extern starpu_pthread_mutex_t _starpu_fxt_started_mutex;
-extern starpu_pthread_cond_t _starpu_fxt_started_cond;
+extern int _starpu_fxt_started STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+extern int _starpu_fxt_willstart STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+extern starpu_pthread_mutex_t _starpu_fxt_started_mutex STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+extern starpu_pthread_cond_t _starpu_fxt_started_cond STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 /** Wait until FXT is started (or not). Returns if FXT was started */
 static inline int _starpu_fxt_wait_initialisation()
@@ -308,7 +313,7 @@ static inline unsigned long _starpu_fxt_get_submit_order(void)
 	return ret;
 }
 
-long _starpu_gettid(void);
+long _starpu_gettid(void) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 /** Initialize the FxT library. */
 void _starpu_fxt_init_profiling(uint64_t trace_buffer_size);
@@ -867,21 +872,21 @@ do {									\
 	const char *name = _starpu_job_get_task_name((job));			\
 	if (name)					                        \
 	{									\
-		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, name);\
+		_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), name); \
 	}									\
 	else {									\
-		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, "unknown");\
+		_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), "unknown");\
 	}									\
 	if (model_name)					\
-		_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, model_name); \
+		_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, _starpu_gettid(), model_name); \
 } while(0)
 
 #define _STARPU_TRACE_TASK_COLOR(job)						\
 do { \
 	if ((job)->task->color != 0) \
-		FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->color, _starpu_gettid()); \
+		FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->color); \
 	else if ((job)->task->cl && (job)->task->cl->color != 0) \
-		FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->cl->color, _starpu_gettid()); \
+		FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->cl->color); \
 } while(0)
 
 #define _STARPU_TRACE_TASK_DONE(job)						\
@@ -1456,4 +1461,6 @@ do {										\
 
 #endif // STARPU_USE_FXT
 
+#pragma GCC visibility pop
+
 #endif // __FXT_H__

+ 4 - 4
src/common/graph.c

@@ -37,16 +37,16 @@ static starpu_pthread_rwlock_t graph_lock;
 int _starpu_graph_record;
 
 /* This list contains all nodes without incoming dependency */
-struct _starpu_graph_node_multilist_top top;
+static struct _starpu_graph_node_multilist_top top;
 /* This list contains all nodes without outgoing dependency */
-struct _starpu_graph_node_multilist_bottom bottom;
+static struct _starpu_graph_node_multilist_bottom bottom;
 /* This list contains all nodes */
-struct _starpu_graph_node_multilist_all all;
+static struct _starpu_graph_node_multilist_all all;
 
 /* Protects the dropped list, always taken before graph lock */
 static starpu_pthread_mutex_t dropped_lock;
 /* This list contains all dropped nodes, i.e. the job terminated by the corresponding node is still int he graph */
-struct _starpu_graph_node_multilist_dropped dropped;
+static struct _starpu_graph_node_multilist_dropped dropped;
 
 void _starpu_graph_init(void)
 {

+ 4 - 0
src/common/graph.h

@@ -19,6 +19,8 @@
 
 #include <common/list.h>
 
+#pragma GCC visibility push(hidden)
+
 /** @file */
 
 MULTILIST_CREATE_TYPE(_starpu_graph_node, all)
@@ -118,4 +120,6 @@ void _starpu_graph_compute_descendants(void);
 */
 void _starpu_graph_foreach(void (*func)(void *data, struct _starpu_graph_node *node), void *data);
 
+#pragma GCC visibility pop
+
 #endif /* __GRAPH_H__ */

+ 1 - 1
src/common/knobs.c

@@ -426,7 +426,7 @@ void starpu_perf_counter_set_per_codelet_listener(struct starpu_codelet *cl, str
 
 /* - */
 
-void unset_listener(struct starpu_perf_counter_sample *sample)
+static void unset_listener(struct starpu_perf_counter_sample *sample)
 {
 	_starpu_spin_lock(&sample->lock);
 	STARPU_ASSERT(sample->listener != NULL);

+ 4 - 0
src/common/knobs.h

@@ -25,6 +25,8 @@
 #include <starpu.h>
 #include <common/config.h>
 
+#pragma GCC visibility push(hidden)
+
 /** Performance Monitoring */
 #define STARPU_ASSERT_PERF_COUNTER_SCOPE_DEFINED(t) STARPU_ASSERT( \
 		(t == starpu_perf_counter_scope_global ) \
@@ -358,4 +360,6 @@ void _starpu__workers_c__unregister_knobs(void);	/* module: workers.c */
 void _starpu__task_c__unregister_knobs(void); /* module: task.c */
 void _starpu__dmda_c__unregister_knobs(void); /* module: dmda.c */
 
+#pragma GCC visibility pop
+
 #endif // __KNOBS_H__

+ 4 - 0
src/common/rwlock.h

@@ -20,6 +20,8 @@
 #include <stdint.h>
 #include <starpu.h>
 
+#pragma GCC visibility push(hidden)
+
 /** @file */
 
 /** Dummy implementation of a RW-lock using a spinlock. */
@@ -50,4 +52,6 @@ int _starpu_take_rw_lock_read_try(struct _starpu_rw_lock *lock);
 /** Unlock the RW-lock. */
 void _starpu_release_rw_lock(struct _starpu_rw_lock *lock);
 
+#pragma GCC visibility pop
+
 #endif

+ 5 - 4
src/common/thread.c

@@ -57,8 +57,6 @@ static int _starpu_futex_wake = FUTEX_WAKE;
 
 #ifdef STARPU_SIMGRID
 
-extern int _starpu_simgrid_thread_start(int argc, char *argv[]);
-
 int starpu_pthread_equal(starpu_pthread_t t1, starpu_pthread_t t2)
 {
 	return t1 == t2;
@@ -76,9 +74,12 @@ starpu_pthread_t starpu_pthread_self(void)
 int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr STARPU_ATTRIBUTE_UNUSED, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host)
 {
 	char **_args;
+	int ret;
 	_STARPU_MALLOC(_args, 3*sizeof(char*));
-	asprintf(&_args[0], "%p", start_routine);
-	asprintf(&_args[1], "%p", arg);
+	ret = asprintf(&_args[0], "%p", start_routine);
+	STARPU_ASSERT(ret);
+	ret = asprintf(&_args[1], "%p", arg);
+	STARPU_ASSERT(ret);
 	_args[2] = NULL;
 	if (!host)
 		host = _starpu_simgrid_get_host_by_name("MAIN");

+ 6 - 2
src/common/thread.h

@@ -21,8 +21,10 @@
 
 #include <common/utils.h>
 
+#pragma GCC visibility push(hidden)
+
 #if defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)
-int _starpu_pthread_spin_do_lock(starpu_pthread_spinlock_t *lock);
+int _starpu_pthread_spin_do_lock(starpu_pthread_spinlock_t *lock) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 #endif
 
 #if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK)
@@ -107,7 +109,7 @@ static inline int _starpu_pthread_spin_trylock(starpu_pthread_spinlock_t *lock)
 #define starpu_pthread_spin_trylock _starpu_pthread_spin_trylock
 
 #if defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)
-void _starpu_pthread_spin_do_unlock(starpu_pthread_spinlock_t *lock);
+void _starpu_pthread_spin_do_unlock(starpu_pthread_spinlock_t *lock) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 #endif
 
 static inline int _starpu_pthread_spin_unlock(starpu_pthread_spinlock_t *lock)
@@ -139,6 +141,8 @@ static inline void _starpu_pthread_spin_checklocked(starpu_pthread_spinlock_t *l
 #endif /* defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) */
 
 
+#pragma GCC visibility pop
+
 #endif /* __COMMON_THREAD_H__ */
 
 

+ 5 - 1
src/common/timing.h

@@ -27,12 +27,16 @@
 #include <starpu.h>
 #include <starpu_util.h>
 
+#pragma GCC visibility push(hidden)
+
 /**
  * _starpu_timing_init must be called prior to using any of these timing
  * functions.
  */
 void _starpu_timing_init(void);
-void _starpu_clock_gettime(struct timespec *ts);
+void _starpu_clock_gettime(struct timespec *ts) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+
+#pragma GCC visibility pop
 
 #endif /* TIMING_H */
 

+ 4 - 0
src/common/uthash.h

@@ -64,6 +64,8 @@ typedef unsigned int uint32_t;
 #include <inttypes.h>   /* uint32_t */
 #endif
 
+#pragma GCC visibility push(hidden)
+
 #define UTHASH_VERSION 1.9.3
 
 #define uthash_fatal(msg) exit(-1)        /* fatal error (out of memory,etc) */
@@ -1023,4 +1025,6 @@ typedef struct UT_hash_handle {
    unsigned hashv;                   /* result of hash-fcn(key)        */
 } UT_hash_handle;
 
+#pragma GCC visibility pop
+
 #endif /* UTHASH_H */

+ 1 - 1
src/common/utils.c

@@ -623,7 +623,7 @@ char *starpu_getenv(const char *str)
 	return getenv(str);
 }
 
-int _strings_ncmp(const char *strings[], const char *str)
+static int _strings_ncmp(const char *strings[], const char *str)
 {
 	int pos = 0;
 	while (strings[pos])

+ 7 - 3
src/common/utils.h

@@ -33,6 +33,8 @@
 #include <valgrind/helgrind.h>
 #endif
 
+#pragma GCC visibility push(hidden)
+
 #ifndef DO_CREQ_v_WW
 #define DO_CREQ_v_WW(_creqF, _ty1F, _arg1F, _ty2F, _arg2F) ((void)0)
 #endif
@@ -143,8 +145,8 @@
 #define _STARPU_IS_ZERO(a) (fpclassify(a) == FP_ZERO)
 #endif
 
-char *_starpu_mkdtemp_internal(char *tmpl);
-char *_starpu_mkdtemp(char *tmpl);
+char *_starpu_mkdtemp_internal(char *tmpl) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+char *_starpu_mkdtemp(char *tmpl) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 int _starpu_mkpath(const char *s, mode_t mode);
 void _starpu_mkpath_and_check(const char *s, mode_t mode);
 char *_starpu_mktemp(const char *directory, int flags, int *fd);
@@ -160,7 +162,7 @@ int _starpu_frdunlock(FILE *file);
 int _starpu_fwrlock(FILE *file);
 int _starpu_fwrunlock(FILE *file);
 char *_starpu_get_home_path(void);
-void _starpu_gethostname(char *hostname, size_t size);
+void _starpu_gethostname(char *hostname, size_t size) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 /** If FILE is currently on a comment line, eat it.  */
 void _starpu_drop_comments(FILE *f);
@@ -184,4 +186,6 @@ void _starpu_util_init(void);
 
 enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED };
 
+#pragma GCC visibility pop
+
 #endif // __COMMON_UTILS_H__

+ 4 - 0
src/core/combined_workers.h

@@ -22,4 +22,8 @@
 #include <starpu.h>
 #include <common/config.h>
 
+#pragma GCC visibility push(hidden)
+
+#pragma GCC visibility pop
+
 #endif // __COMBINED_WORKERS_H__

+ 1 - 1
src/core/debug.c

@@ -81,7 +81,7 @@ void _starpu_print_to_logfile(const char *format STARPU_ATTRIBUTE_UNUSED, ...)
 
 /* Record codelet to give ayudame nice function ids starting from 0. */
 #if defined(STARPU_USE_AYUDAME1)
-struct ayudame_codelet
+static struct ayudame_codelet
 {
 	char *name;
 	struct starpu_codelet *cl;

+ 4 - 0
src/core/debug.h

@@ -290,6 +290,8 @@
 
 #endif
 
+#pragma GCC visibility push(hidden)
+
 /** Create a file that will contain StarPU's log */
 void _starpu_open_debug_logfile(void);
 
@@ -310,4 +312,6 @@ int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl);
 void _starpu_watchdog_init(void);
 void _starpu_watchdog_shutdown(void);
 
+#pragma GCC visibility pop
+
 #endif // __DEBUG_H__

+ 4 - 0
src/core/dependencies/cg.h

@@ -22,6 +22,8 @@
 #include <starpu.h>
 #include <common/config.h>
 
+#pragma GCC visibility push(hidden)
+
 /** we do not necessarily want to allocate room for 256 dependencies, but we
    want to handle the few situation where there are a lot of dependencies as
    well */
@@ -132,4 +134,6 @@ void _starpu_notify_job_start_cg_list(void *pred, struct _starpu_cg_list *succes
 void _starpu_notify_task_dependencies(struct _starpu_job *j);
 void _starpu_notify_job_start_tasks(struct _starpu_job *j, _starpu_notify_job_start_data *data);
 
+#pragma GCC visibility pop
+
 #endif // __CG_H__

+ 4 - 0
src/core/dependencies/data_concurrency.h

@@ -21,6 +21,8 @@
 
 #include <core/jobs.h>
 
+#pragma GCC visibility push(hidden)
+
 void _starpu_job_set_ordered_buffers(struct _starpu_job *j);
 
 unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j);
@@ -40,5 +42,7 @@ unsigned _starpu_attempt_to_submit_arbitered_data_request(unsigned request_from_
 						       void (*callback)(void *), void *argcb,
 						       struct _starpu_job *j, unsigned buffer_index);
 
+#pragma GCC visibility pop
+
 #endif // __DATA_CONCURRENCY_H__
 

+ 6 - 1
src/core/dependencies/implicit_data_deps.h

@@ -22,6 +22,8 @@
 #include <starpu.h>
 #include <common/config.h>
 
+#pragma GCC visibility push(hidden)
+
 struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot,
 								  starpu_data_handle_t handle, enum starpu_data_access_mode mode, unsigned task_handle_sequential_consistency);
 int _starpu_test_implicit_data_deps_with_handle(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
@@ -33,11 +35,14 @@ void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data
 void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
 
 /** Register a hook to be called when a write is submitted */
-void _starpu_implicit_data_deps_write_hook(void (*func)(starpu_data_handle_t));
+void _starpu_implicit_data_deps_write_hook(void (*func)(starpu_data_handle_t)) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 /** This function blocks until the handle is available in the requested mode */
 int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_data_access_mode mode, const char *sync_name);
 
 void _starpu_data_clear_implicit(starpu_data_handle_t handle);
+
+#pragma GCC visibility pop
+
 #endif // __IMPLICIT_DATA_DEPS_H__
 

+ 4 - 0
src/core/dependencies/tags.h

@@ -24,6 +24,8 @@
 #include <common/starpu_spinlock.h>
 #include <core/dependencies/cg.h>
 
+#pragma GCC visibility push(hidden)
+
 #define _STARPU_TAG_SIZE        (sizeof(starpu_tag_t)*8)
 
 enum _starpu_tag_state
@@ -78,4 +80,6 @@ unsigned _starpu_submit_job_enforce_task_deps(struct _starpu_job *j);
 
 void _starpu_tag_clear(void);
 
+#pragma GCC visibility pop
+
 #endif // __TAGS_H__

+ 5 - 0
src/core/detect_combined_workers.h

@@ -16,9 +16,14 @@
 
 #include <starpu.h>
 
+#pragma GCC visibility push(hidden)
+
 /** @file */
 
 /** Initialize combined workers */
 void _starpu_sched_find_worker_combinations(int *workerids, int nworkers);
 
 extern int _starpu_initialized_combined_workers;
+
+#pragma GCC visibility pop
+

+ 4 - 0
src/core/disk.h

@@ -31,6 +31,8 @@ extern "C"
 #include <datawizard/copy_driver.h>
 #include <datawizard/malloc.h>
 
+#pragma GCC visibility push(hidden)
+
 /** interface to manipulate memory disk */
 void * _starpu_disk_alloc (unsigned node, size_t size) STARPU_ATTRIBUTE_MALLOC;
 
@@ -67,4 +69,6 @@ void _starpu_swap_init(void);
 }
 #endif
 
+#pragma GCC visibility pop
+
 #endif /* __DISK_H__ */

+ 1 - 1
src/core/disk_ops/disk_unistd.c

@@ -66,7 +66,7 @@ struct starpu_disk_ops starpu_disk_unistd_ops =
 #else
 	.copy = NULL,
 #endif
-	.bandwidth = get_unistd_global_bandwidth_between_disk_and_main_ram,
+	.bandwidth = _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram,
 #ifdef HAVE_AIO_H
 	.async_read = starpu_unistd_global_async_read,
 	.async_write = starpu_unistd_global_async_write,

+ 1 - 1
src/core/disk_ops/disk_unistd_o_direct.c

@@ -138,7 +138,7 @@ struct starpu_disk_ops starpu_disk_unistd_o_direct_ops =
 #else
 	.copy = NULL,
 #endif
-	.bandwidth = get_unistd_global_bandwidth_between_disk_and_main_ram,
+	.bandwidth = _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram,
 #if defined(HAVE_AIO_H) || defined(HAVE_LIBAIO_H)
         .async_read = starpu_unistd_o_direct_global_async_read,
         .async_write = starpu_unistd_o_direct_global_async_write,

+ 2 - 2
src/core/disk_ops/unistd/disk_unistd_global.c

@@ -87,7 +87,7 @@ struct starpu_unistd_copy_thread
 	struct starpu_unistd_work_copy_list list;
 };
 
-struct starpu_unistd_copy_thread copy_thread[STARPU_MAXNODES][STARPU_MAXNODES];
+static struct starpu_unistd_copy_thread copy_thread[STARPU_MAXNODES][STARPU_MAXNODES];
 static unsigned starpu_unistd_nb_disk_opened = 0;
 /* copy_file_range syscall can return ENOSYS. Use global var to catch
  * and prevent StarPU using direct disk to disk copy */
@@ -735,7 +735,7 @@ void starpu_unistd_global_unplug(void *base)
 	free(fileBase);
 }
 
-int get_unistd_global_bandwidth_between_disk_and_main_ram(unsigned node, void *base)
+int _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram(unsigned node, void *base)
 {
 	int res;
 	unsigned iter;

+ 6 - 1
src/core/disk_ops/unistd/disk_unistd_global.h

@@ -25,6 +25,8 @@
 #include <sys/syscall.h>
 #endif
 
+#pragma GCC visibility push(hidden)
+
 #ifndef O_BINARY
 #define O_BINARY 0
 #endif
@@ -51,7 +53,7 @@ int starpu_unistd_global_read (void *base, void *obj, void *buf, off_t offset, s
 int starpu_unistd_global_write (void *base, void *obj, const void *buf, off_t offset, size_t size);
 void * starpu_unistd_global_plug (void *parameter, starpu_ssize_t size);
 void starpu_unistd_global_unplug (void *base);
-int get_unistd_global_bandwidth_between_disk_and_main_ram(unsigned node, void *base);
+int _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram(unsigned node, void *base);
 void* starpu_unistd_global_async_read (void *base, void *obj, void *buf, off_t offset, size_t size);
 void* starpu_unistd_global_async_write (void *base, void *obj, void *buf, off_t offset, size_t size);
 void * starpu_unistd_global_async_full_write (void * base, void * obj, void * ptr, size_t size);
@@ -64,4 +66,7 @@ int starpu_unistd_global_full_write (void * base, void * obj, void * ptr, size_t
 #ifdef STARPU_UNISTD_USE_COPY
 void *  starpu_unistd_global_copy(void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size);
 #endif
+
+#pragma GCC visibility pop
+
 #endif

+ 4 - 0
src/core/drivers.h

@@ -18,6 +18,8 @@
 #ifndef __DRIVERS_H__
 #define __DRIVERS_H__
 
+#pragma GCC visibility push(hidden)
+
 /** @file */
 
 struct _starpu_driver_ops
@@ -28,4 +30,6 @@ struct _starpu_driver_ops
 	int (*deinit)(struct _starpu_worker *worker);
 };
 
+#pragma GCC visibility pop
+
 #endif // __DRIVERS_H__

+ 4 - 0
src/core/errorcheck.h

@@ -21,6 +21,8 @@
 
 #include <starpu.h>
 
+#pragma GCC visibility push(hidden)
+
 /** This type describes in which state a worker may be. */
 enum _starpu_worker_status
 {
@@ -60,4 +62,6 @@ enum _starpu_worker_status _starpu_get_local_worker_status(void);
  * legal to call a blocking operation in the current context. */
 unsigned _starpu_worker_may_perform_blocking_calls(void);
 
+#pragma GCC visibility pop
+
 #endif // __ERRORCHECK_H__

+ 4 - 0
src/core/idle_hook.h

@@ -17,10 +17,14 @@
 #ifndef __IDLE_HOOK_H__
 #define __IDLE_HOOK_H__
 
+#pragma GCC visibility push(hidden)
+
 /** @file */
 
 void _starpu_init_idle_hooks(void);
 
 unsigned _starpu_execute_registered_idle_hooks(void);
 
+#pragma GCC visibility pop
+
 #endif /* !__IDLE_HOOK_H__ */

+ 4 - 0
src/core/jobs.h

@@ -47,6 +47,8 @@
 #include <cuda.h>
 #endif
 
+#pragma GCC visibility push(hidden)
+
 struct _starpu_worker;
 
 /** codelet function */
@@ -287,4 +289,6 @@ int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *t
 
 #define _STARPU_JOB_GET_DEP_SLOTS(job) (((job)->dyn_dep_slots) ? (job)->dyn_dep_slots : (job)->dep_slots)
 
+#pragma GCC visibility pop
+
 #endif // __JOBS_H__

+ 4 - 0
src/core/perfmodel/multiple_regression.h

@@ -25,6 +25,10 @@
 #include <core/perfmodel/perfmodel.h>
 #include <starpu.h>
 
+#pragma GCC visibility push(hidden)
+
 int _starpu_multiple_regression(struct starpu_perfmodel_history_list *ptr, double *coeff, unsigned ncoeff, unsigned nparameters, const char **parameters_names, unsigned **combinations, const char *codelet_name);
 
+#pragma GCC visibility pop
+
 #endif // __MULTIPLE_REGRESSION_H__

+ 1 - 1
src/core/perfmodel/perfmodel.c

@@ -481,7 +481,7 @@ double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundl
 
 				/* Insert the handle in the sorted list in case
 				 * it's not already in that list. */
-				_insertion_handle_sorted(&handles, handle, mode);
+				_starpu_insertion_handle_sorted(&handles, handle, mode);
 			}
 		}
 

+ 7 - 3
src/core/perfmodel/perfmodel.h

@@ -26,6 +26,8 @@
 #include <core/task_bundle.h>
 #include <stdio.h>
 
+#pragma GCC visibility push(hidden)
+
 #ifdef __cplusplus
 extern "C"
 {
@@ -63,7 +65,7 @@ struct starpu_perfmodel_arch;
 
 extern unsigned _starpu_calibration_minimum;
 
-char *_starpu_get_perf_model_dir_codelet();
+char *_starpu_get_perf_model_dir_codelet() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 char *_starpu_get_perf_model_dir_bus();
 char *_starpu_get_perf_model_dir_debug();
 
@@ -100,8 +102,8 @@ unsigned *_starpu_get_fpga_affinity_vector(unsigned fpgaid);
 
 void _starpu_save_bandwidth_and_latency_disk(double bandwidth_write, double bandwidth_read, double latency_write, double latency_read, unsigned node, const char *name);
 
-void _starpu_write_double(FILE *f, const char *format, double val);
-int _starpu_read_double(FILE *f, char *format, double *val);
+void _starpu_write_double(FILE *f, const char *format, double val) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+int _starpu_read_double(FILE *f, char *format, double *val) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 void _starpu_simgrid_get_platform_path(int version, char *path, size_t maxlen);
 
 void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb);
@@ -116,4 +118,6 @@ hwloc_topology_t _starpu_perfmodel_get_hwtopology();
 }
 #endif
 
+#pragma GCC visibility pop
+
 #endif // __PERFMODEL_H__

+ 4 - 0
src/core/perfmodel/regression.h

@@ -25,6 +25,10 @@
 #include <core/perfmodel/perfmodel.h>
 #include <starpu.h>
 
+#pragma GCC visibility push(hidden)
+
 int _starpu_regression_non_linear_power(struct starpu_perfmodel_history_list *ptr, double *a, double *b, double *c);
 
+#pragma GCC visibility pop
+
 #endif // __REGRESSION_H__

+ 4 - 0
src/core/progress_hook.h

@@ -17,10 +17,14 @@
 #ifndef __PROGRESS_HOOK_H__
 #define __PROGRESS_HOOK_H__
 
+#pragma GCC visibility push(hidden)
+
 /** @file */
 
 void _starpu_init_progression_hooks(void);
 
 unsigned _starpu_execute_registered_progression_hooks(void);
 
+#pragma GCC visibility pop
+
 #endif /* !__PROGRESS_HOOK_H__ */

+ 17 - 17
src/core/sched_ctx.c

@@ -32,7 +32,7 @@ enum _starpu_ctx_change_op
 static starpu_pthread_mutex_t sched_ctx_manag = STARPU_PTHREAD_MUTEX_INITIALIZER;
 static starpu_pthread_mutex_t finished_submit_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
 static struct starpu_task stop_submission_task = STARPU_TASK_INITIALIZER;
-starpu_pthread_key_t sched_ctx_key;
+static starpu_pthread_key_t sched_ctx_key;
 static unsigned with_hypervisor = 0;
 static double hyp_start_sample[STARPU_NMAX_SCHED_CTXS];
 static double hyp_start_allow_sample[STARPU_NMAX_SCHED_CTXS];
@@ -1773,27 +1773,27 @@ struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsig
 	{
 #ifdef STARPU_HAVE_HWLOC
 	case STARPU_WORKER_TREE:
-		sched_ctx->workers->has_next = worker_tree.has_next;
-		sched_ctx->workers->get_next = worker_tree.get_next;
-		sched_ctx->workers->add = worker_tree.add;
-		sched_ctx->workers->remove = worker_tree.remove;
-		sched_ctx->workers->init = worker_tree.init;
-		sched_ctx->workers->deinit = worker_tree.deinit;
-		sched_ctx->workers->init_iterator = worker_tree.init_iterator;
-		sched_ctx->workers->init_iterator_for_parallel_tasks = worker_tree.init_iterator_for_parallel_tasks;
+		sched_ctx->workers->has_next = starpu_worker_tree.has_next;
+		sched_ctx->workers->get_next = starpu_worker_tree.get_next;
+		sched_ctx->workers->add = starpu_worker_tree.add;
+		sched_ctx->workers->remove = starpu_worker_tree.remove;
+		sched_ctx->workers->init = starpu_worker_tree.init;
+		sched_ctx->workers->deinit = starpu_worker_tree.deinit;
+		sched_ctx->workers->init_iterator = starpu_worker_tree.init_iterator;
+		sched_ctx->workers->init_iterator_for_parallel_tasks = starpu_worker_tree.init_iterator_for_parallel_tasks;
 		sched_ctx->workers->type = STARPU_WORKER_TREE;
 		break;
 #endif
 //	case STARPU_WORKER_LIST:
 	default:
-		sched_ctx->workers->has_next = worker_list.has_next;
-		sched_ctx->workers->get_next = worker_list.get_next;
-		sched_ctx->workers->add = worker_list.add;
-		sched_ctx->workers->remove = worker_list.remove;
-		sched_ctx->workers->init = worker_list.init;
-		sched_ctx->workers->deinit = worker_list.deinit;
-		sched_ctx->workers->init_iterator = worker_list.init_iterator;
-		sched_ctx->workers->init_iterator_for_parallel_tasks = worker_list.init_iterator_for_parallel_tasks;
+		sched_ctx->workers->has_next = starpu_worker_list.has_next;
+		sched_ctx->workers->get_next = starpu_worker_list.get_next;
+		sched_ctx->workers->add = starpu_worker_list.add;
+		sched_ctx->workers->remove = starpu_worker_list.remove;
+		sched_ctx->workers->init = starpu_worker_list.init;
+		sched_ctx->workers->deinit = starpu_worker_list.deinit;
+		sched_ctx->workers->init_iterator = starpu_worker_list.init_iterator;
+		sched_ctx->workers->init_iterator_for_parallel_tasks = starpu_worker_list.init_iterator_for_parallel_tasks;
 		sched_ctx->workers->type = STARPU_WORKER_LIST;
 		break;
 

+ 5 - 0
src/core/sched_ctx.h

@@ -36,6 +36,8 @@
 #include <hwloc.h>
 #endif
 
+#pragma GCC visibility push(hidden)
+
 #define NO_RESIZE -1
 #define REQ_RESIZE 0
 #define DO_RESIZE 1
@@ -317,4 +319,7 @@ static inline unsigned _starpu_sched_ctx_worker_is_master_for_child_ctx(unsigned
 /** Go through the list of deferred ctx changes of the current worker and apply
  * any ctx change operation found until the list is empty */
 void _starpu_worker_apply_deferred_ctx_changes(void);
+
+#pragma GCC visibility pop
+
 #endif // __SCHED_CONTEXT_H__

+ 25 - 21
src/core/sched_ctx_list.h

@@ -17,6 +17,8 @@
 #ifndef __SCHED_CONTEXT_LIST_H__
 #define __SCHED_CONTEXT_LIST_H__
 
+#pragma GCC visibility push(hidden)
+
 /** @file */
 
 /** Represents a non circular list of priorities and contains a list of sched context */
@@ -47,27 +49,27 @@ struct _starpu_sched_ctx_list_iterator
 };
 
 /** Element (sched_ctx) level operations */
-struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_find(struct _starpu_sched_ctx_list *list, unsigned sched_ctx);
-void _starpu_sched_ctx_elt_ensure_consistency(struct _starpu_sched_ctx_list *list, unsigned sched_ctx);
-void _starpu_sched_ctx_elt_init(struct _starpu_sched_ctx_elt *elt, unsigned sched_ctx);
-struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_after(struct _starpu_sched_ctx_list *list, unsigned sched_ctx);
-struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_before(struct _starpu_sched_ctx_list *list, unsigned sched_ctx);
-struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add(struct _starpu_sched_ctx_list *list, unsigned sched_ctx);
-void _starpu_sched_ctx_elt_remove(struct _starpu_sched_ctx_list *list, struct _starpu_sched_ctx_elt *elt);
-int _starpu_sched_ctx_elt_exists(struct _starpu_sched_ctx_list *list, unsigned sched_ctx);
-int _starpu_sched_ctx_elt_get_priority(struct _starpu_sched_ctx_list *list, unsigned sched_ctx);
+struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_find(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+void _starpu_sched_ctx_elt_ensure_consistency(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+void _starpu_sched_ctx_elt_init(struct _starpu_sched_ctx_elt *elt, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_after(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_before(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+void _starpu_sched_ctx_elt_remove(struct _starpu_sched_ctx_list *list, struct _starpu_sched_ctx_elt *elt) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+int _starpu_sched_ctx_elt_exists(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+int _starpu_sched_ctx_elt_get_priority(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 
 /** List (priority) level operations */
-struct _starpu_sched_ctx_list* _starpu_sched_ctx_list_find(struct _starpu_sched_ctx_list *list, unsigned prio);
-struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_add_prio(struct _starpu_sched_ctx_list **list, unsigned prio, unsigned sched_ctx);
-int _starpu_sched_ctx_list_add(struct _starpu_sched_ctx_list **list, unsigned sched_ctx);
-void _starpu_sched_ctx_list_remove_elt(struct _starpu_sched_ctx_list **list, struct _starpu_sched_ctx_elt *rm);
-int _starpu_sched_ctx_list_remove(struct _starpu_sched_ctx_list **list, unsigned sched_ctx);
-int _starpu_sched_ctx_list_move(struct _starpu_sched_ctx_list **list, unsigned sched_ctx, unsigned prio_to);
-int _starpu_sched_ctx_list_exists(struct _starpu_sched_ctx_list *list, unsigned prio);
-void _starpu_sched_ctx_list_remove_all(struct _starpu_sched_ctx_list *list);
-void _starpu_sched_ctx_list_delete(struct _starpu_sched_ctx_list **list);
+struct _starpu_sched_ctx_list* _starpu_sched_ctx_list_find(struct _starpu_sched_ctx_list *list, unsigned prio) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_add_prio(struct _starpu_sched_ctx_list **list, unsigned prio, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+int _starpu_sched_ctx_list_add(struct _starpu_sched_ctx_list **list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+void _starpu_sched_ctx_list_remove_elt(struct _starpu_sched_ctx_list **list, struct _starpu_sched_ctx_elt *rm) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+int _starpu_sched_ctx_list_remove(struct _starpu_sched_ctx_list **list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+int _starpu_sched_ctx_list_move(struct _starpu_sched_ctx_list **list, unsigned sched_ctx, unsigned prio_to) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+int _starpu_sched_ctx_list_exists(struct _starpu_sched_ctx_list *list, unsigned prio) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+void _starpu_sched_ctx_list_remove_all(struct _starpu_sched_ctx_list *list) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+void _starpu_sched_ctx_list_delete(struct _starpu_sched_ctx_list **list) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 /** Task number management */
 int _starpu_sched_ctx_list_push_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx);
@@ -75,8 +77,10 @@ int _starpu_sched_ctx_list_pop_event(struct _starpu_sched_ctx_list *list, unsign
 int _starpu_sched_ctx_list_pop_all_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx);
 
 /** Iterator operations */
-int _starpu_sched_ctx_list_iterator_init(struct _starpu_sched_ctx_list *list, struct _starpu_sched_ctx_list_iterator *it);
-int _starpu_sched_ctx_list_iterator_has_next(struct _starpu_sched_ctx_list_iterator *it);
-struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_iterator_get_next(struct _starpu_sched_ctx_list_iterator *it);
+int _starpu_sched_ctx_list_iterator_init(struct _starpu_sched_ctx_list *list, struct _starpu_sched_ctx_list_iterator *it) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+int _starpu_sched_ctx_list_iterator_has_next(struct _starpu_sched_ctx_list_iterator *it) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_iterator_get_next(struct _starpu_sched_ctx_list_iterator *it) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+
+#pragma GCC visibility pop
 
 #endif // __SCHED_CONTEXT_H__

+ 6 - 2
src/core/sched_policy.h

@@ -28,6 +28,8 @@
 
 #include <core/simgrid.h>
 
+#pragma GCC visibility push(hidden)
+
 #define _STARPU_SCHED_BEGIN \
 	_STARPU_TRACE_WORKER_SCHEDULING_PUSH;	\
 	_SIMGRID_TIMER_BEGIN(_starpu_simgrid_sched_cost())
@@ -80,13 +82,13 @@ extern struct starpu_sched_policy _starpu_sched_ws_policy;
 extern struct starpu_sched_policy _starpu_sched_prio_policy;
 extern struct starpu_sched_policy _starpu_sched_random_policy;
 extern struct starpu_sched_policy _starpu_sched_dm_policy;
-extern struct starpu_sched_policy _starpu_sched_dmda_policy;
+extern struct starpu_sched_policy _starpu_sched_dmda_policy STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 extern struct starpu_sched_policy _starpu_sched_dmda_prio_policy;
 extern struct starpu_sched_policy _starpu_sched_dmda_ready_policy;
 extern struct starpu_sched_policy _starpu_sched_dmda_sorted_policy;
 extern struct starpu_sched_policy _starpu_sched_dmda_sorted_decision_policy;
 extern struct starpu_sched_policy _starpu_sched_eager_policy;
-extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy;
+extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 extern struct starpu_sched_policy _starpu_sched_peager_policy;
 extern struct starpu_sched_policy _starpu_sched_heteroprio_policy;
 extern struct starpu_sched_policy _starpu_sched_modular_eager_policy;
@@ -125,4 +127,6 @@ extern long _starpu_task_break_on_exec;
 #define _STARPU_TASK_BREAK_ON(task, what) ((void) 0)
 #endif
 
+#pragma GCC visibility pop
+
 #endif // __SCHED_POLICY_H__

+ 42 - 33
src/core/simgrid.c

@@ -376,9 +376,8 @@ void _starpu_start_simgrid(int *argc, char **argv)
 	simgrid_transfer_cost = starpu_get_env_number_default("STARPU_SIMGRID_TRANSFER_COST", 1);
 }
 
-static int main_ret;
-
-int do_starpu_main(int argc, char *argv[])
+static int
+run_starpu_main(int argc, char *argv[])
 {
 	/* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */
 	starpu_sleep(0.000001);
@@ -389,8 +388,16 @@ int do_starpu_main(int argc, char *argv[])
 		_STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main\n");
 	}
 
-	main_ret = starpu_main(argc, argv);
-	return main_ret;
+	return starpu_main(argc, argv);
+}
+
+static int main_ret;
+
+static _starpu_simgrid_main_ret
+do_starpu_main(int argc, char *argv[])
+{
+	main_ret = run_starpu_main(argc, argv);
+	_STARPU_SIMGRID_MAIN_RETURN;
 }
 
 /* We need it only when using smpi */
@@ -427,7 +434,7 @@ int main(int argc, char **argv)
          * constructor and MSG_process_attach, directly jump to real main */
 	if (simgrid_started == 3)
 	{
-		return do_starpu_main(argc, argv);
+		return run_starpu_main(argc, argv);
 	}
 
 	/* Managed to catch application's main, initialize simgrid first */
@@ -719,7 +726,7 @@ void _starpu_simgrid_wait_tasks(int workerid)
 }
 
 /* Task execution submitted by StarPU */
-void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch, double length, double energy, unsigned *finished)
+void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, double length, double energy, unsigned *finished)
 {
 	struct starpu_task *starpu_task = j->task;
 	double flops;
@@ -1205,7 +1212,7 @@ void _starpu_simgrid_sync_gpus(void)
 	_starpu_simgrid_wait_transfers();
 }
 
-int
+_starpu_simgrid_main_ret
 _starpu_simgrid_thread_start(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[])
 {
 	void *(*f)(void*) = (void*) (uintptr_t) strtol(argv[0], NULL, 16);
@@ -1217,7 +1224,7 @@ _starpu_simgrid_thread_start(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[])
 
 	/* _args is freed with process context */
 	f(arg);
-	return 0;
+	_STARPU_SIMGRID_MAIN_RETURN;
 }
 
 starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t code, starpu_sg_host_t host, int argc, char *argv[])
@@ -1286,12 +1293,8 @@ void _starpu_simgrid_count_ngpus(void)
 		{
 			int busid;
 			starpu_sg_host_t srchost, dsthost;
-#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
 			xbt_dynar_t route_dynar = xbt_dynar_new(sizeof(SD_link_t), NULL);
-			SD_link_t *route;
-#else
-			const SD_link_t *route;
-#endif
+			SD_link_t link;
 			int i, routesize;
 			int through;
 			unsigned src2;
@@ -1313,23 +1316,28 @@ void _starpu_simgrid_count_ngpus(void)
 			sg_host_route(srchost, dsthost, route_dynar);
 #endif
 			routesize = xbt_dynar_length(route_dynar);
-			route = xbt_dynar_to_array(route_dynar);
 #else
+			const SD_link_t *route = SD_route_get_list(srchost, dsthost);
 			routesize = SD_route_get_size(srchost, dsthost);
-			route = SD_route_get_list(srchost, dsthost);
+			for (i = 0; i < routesize; i++)
+				xbt_dynar_push(route_dynar, &route[i]);
+			free(route);
 #endif
 
 			/* If it goes through "Host", do not care, there is no
 			 * direct transfer support */
 			for (i = 0; i < routesize; i++)
+			{
+				xbt_dynar_get_cpy(route_dynar, i, &link);
 				if (
 #ifdef HAVE_SG_LINK_GET_NAME
-					!strcmp(sg_link_get_name(route[i]), "Host")
+					!strcmp(sg_link_get_name(link), "Host")
 #else
-					!strcmp(sg_link_name(route[i]), "Host")
+					!strcmp(sg_link_name(link), "Host")
 #endif
 					)
 					break;
+			}
 			if (i < routesize)
 				continue;
 
@@ -1337,10 +1345,11 @@ void _starpu_simgrid_count_ngpus(void)
 			through = -1;
 			for (i = 0; i < routesize; i++)
 			{
+				xbt_dynar_get_cpy(route_dynar, i, &link);
 #ifdef HAVE_SG_LINK_GET_NAME
-				name = sg_link_get_name(route[i]);
+				name = sg_link_get_name(link);
 #else
-				name = sg_link_name(route[i]);
+				name = sg_link_name(link);
 #endif
 				size_t len = strlen(name);
 				if (!strcmp(" through", name+len-8))
@@ -1354,10 +1363,12 @@ void _starpu_simgrid_count_ngpus(void)
 				_STARPU_DEBUG("Didn't find through-link for %d->%d\n", src, dst);
 				continue;
 			}
+
+			xbt_dynar_get_cpy(route_dynar, through, &link);
 #ifdef HAVE_SG_LINK_GET_NAME
-			name = sg_link_get_name(route[through]);
+			name = sg_link_get_name(link);
 #else
-			name = sg_link_name(route[through]);
+			name = sg_link_name(link);
 #endif
 
 			/*
@@ -1382,27 +1393,30 @@ void _starpu_simgrid_count_ngpus(void)
 
 				starpu_sg_host_t srchost2 = _starpu_simgrid_get_memnode_host(src2);
 				int routesize2;
-#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
 				xbt_dynar_t route_dynar2 = xbt_dynar_new(sizeof(SD_link_t), NULL);
-				SD_link_t *route2;
+#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
 #ifdef HAVE_SG_HOST_GET_ROUTE
 				sg_host_get_route(srchost2, ramhost, route_dynar2);
 #else
 				sg_host_route(srchost2, ramhost, route_dynar2);
 #endif
 				routesize2 = xbt_dynar_length(route_dynar2);
-				route2 = xbt_dynar_to_array(route_dynar2);
 #else
 				const SD_link_t *route2 = SD_route_get_list(srchost2, ramhost);
 				routesize2 = SD_route_get_size(srchost2, ramhost);
+				for (i = 0; i < routesize2; i++)
+					xbt_dynar_push(route_dynar2, &route2[i]);
+				free(route2);
 #endif
 
 				for (i = 0; i < routesize2; i++)
+				{
+					xbt_dynar_get_cpy(route_dynar, i, &link);
 					if (
 #ifdef HAVE_SG_LINK_GET_NAME
-						!strcmp(name, sg_link_get_name(route2[i]))
+						!strcmp(name, sg_link_get_name(link))
 #else
-						!strcmp(name, sg_link_name(route2[i]))
+						!strcmp(name, sg_link_name(link))
 #endif
 						)
 					{
@@ -1410,15 +1424,10 @@ void _starpu_simgrid_count_ngpus(void)
 						ngpus++;
 						break;
 					}
-#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
-				free(route2);
-#endif
+				}
 			}
 			_STARPU_DEBUG("%d->%d through %s, %u GPUs\n", src, dst, name, ngpus);
 			starpu_bus_set_ngpus(busid, ngpus);
-#if defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route)
-			free(route);
-#endif
 		}
 #endif
 }

+ 14 - 0
src/core/simgrid.h

@@ -49,12 +49,24 @@ extern "C"
 
 #include <xbt/xbt_os_time.h>
 
+#pragma GCC visibility push(hidden)
+
 struct _starpu_pthread_args
 {
 	void *(*f)(void*);
 	void *arg;
 };
 
+#if (SIMGRID_VERSION >= 32600)
+typedef void _starpu_simgrid_main_ret;
+#define _STARPU_SIMGRID_MAIN_RETURN do { } while (0)
+#else
+typedef int _starpu_simgrid_main_ret;
+#define _STARPU_SIMGRID_MAIN_RETURN return 0
+#endif
+_starpu_simgrid_main_ret
+_starpu_simgrid_thread_start(int argc, char *argv[]);
+
 #define MAX_TSD 16
 
 #define STARPU_MPI_AS_PREFIX "StarPU-MPI"
@@ -127,6 +139,8 @@ void _starpu_simgrid_xbt_thread_create(const char *name, starpu_pthread_attr_t *
 		}	\
 	}
 
+#pragma GCC visibility pop
+
 #else // !STARPU_SIMGRID
 #define _SIMGRID_TIMER_BEGIN(cond) {
 #define _SIMGRID_TIMER_END }

+ 3 - 0
src/core/task.h

@@ -23,6 +23,8 @@
 #include <common/config.h>
 #include <core/jobs.h>
 
+#pragma GCC visibility push(hidden)
+
 /** Internal version of starpu_task_destroy: don't check task->destroy flag */
 void _starpu_task_destroy(struct starpu_task *task);
 
@@ -126,6 +128,7 @@ void _starpu_watchdog_shutdown(void);
 int _starpu_task_wait_for_all_and_return_nb_waited_tasks(void);
 int _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(unsigned sched_ctx);
 
+#pragma GCC visibility pop
 
 #ifdef BUILDING_STARPU
 LIST_CREATE_TYPE_NOSTRUCT(starpu_task, prev, next);

+ 1 - 1
src/core/task_bundle.c

@@ -185,7 +185,7 @@ void _starpu_task_bundle_destroy(starpu_task_bundle_t bundle)
 	free(bundle);
 }
 
-void _insertion_handle_sorted(struct _starpu_handle_list **listp, starpu_data_handle_t handle, enum starpu_data_access_mode mode)
+void _starpu_insertion_handle_sorted(struct _starpu_handle_list **listp, starpu_data_handle_t handle, enum starpu_data_access_mode mode)
 {
 	STARPU_ASSERT(listp);
 

+ 6 - 2
src/core/task_bundle.h

@@ -21,6 +21,8 @@
 
 #include <starpu_thread.h>
 
+#pragma GCC visibility push(hidden)
+
 /** struct _starpu_task_bundle_entry
  * ================================
  * Purpose
@@ -108,7 +110,7 @@ struct _starpu_handle_list
  */
 void _starpu_task_bundle_destroy(starpu_task_bundle_t bundle);
 
-/** _insertion_handle_sorted
+/** _starpu_insertion_handle_sorted
  * ========================
  * Purpose
  * =======
@@ -131,6 +133,8 @@ void _starpu_task_bundle_destroy(starpu_task_bundle_t bundle);
  * mode			(input)
  * 			Access mode of the handle.
  */
-void _insertion_handle_sorted(struct _starpu_handle_list **listp, starpu_data_handle_t handle, enum starpu_data_access_mode mode);
+void _starpu_insertion_handle_sorted(struct _starpu_handle_list **listp, starpu_data_handle_t handle, enum starpu_data_access_mode mode);
+
+#pragma GCC visibility pop
 
 #endif // __CORE_TASK_BUNDLE_H__

+ 1 - 1
src/core/topology.c

@@ -95,7 +95,7 @@ struct handle_entry
 static struct handle_entry *devices_using_cuda;
 #  endif
 
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_FPGA) || defined(STARPU_SIMGRID)
 static unsigned may_bind_automatically[STARPU_NARCH] = { 0 };
 #endif
 

+ 6 - 2
src/core/topology.h

@@ -24,6 +24,8 @@
 #include <common/list.h>
 #include <common/fxt.h>
 
+#pragma GCC visibility push(hidden)
+
 struct _starpu_machine_config;
 
 #ifndef STARPU_SIMGRID
@@ -60,7 +62,7 @@ unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config);
 unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config);
 
 /** returns the number of hyperthreads per core */
-unsigned _starpu_get_nhyperthreads();
+unsigned _starpu_get_nhyperthreads() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 #ifdef STARPU_HAVE_HWLOC
 /** Small convenient function to filter hwloc topology depending on HWLOC API version */
@@ -81,11 +83,13 @@ void _starpu_bind_thread_on_cpus(struct _starpu_combined_worker *combined_worker
 
 struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d);
 
-int starpu_memory_nodes_get_numa_count(void);
+int starpu_memory_nodes_get_numa_count(void) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 int starpu_memory_nodes_numa_id_to_hwloclogid(unsigned id);
 
 /** Get the memory node for data number i when task is to be executed on memory node target_node */
 int _starpu_task_data_get_node_on_node(struct starpu_task *task, unsigned index, unsigned target_node);
 int _starpu_task_data_get_node_on_worker(struct starpu_task *task, unsigned index, unsigned worker);
 
+#pragma GCC visibility pop
+
 #endif // __TOPOLOGY_H__

+ 16 - 6
src/core/workers.c

@@ -110,7 +110,7 @@ static void global_knobs__get(const struct starpu_perf_knob * const knob, void *
 	}
 }
 
-void worker_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value)
+static void worker_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value)
 {
 	const unsigned workerid = *(unsigned *)context;
 	struct _starpu_worker * const worker = _starpu_get_worker_struct(workerid);
@@ -129,7 +129,7 @@ void worker_knobs__set(const struct starpu_perf_knob * const knob, void *context
 		abort();
 	}
 }
-void worker_knobs__get(const struct starpu_perf_knob * const knob, void *context,       struct starpu_perf_knob_value * const value)
+static void worker_knobs__get(const struct starpu_perf_knob * const knob, void *context,       struct starpu_perf_knob_value * const value)
 {
 	const unsigned workerid = *(unsigned *)context;
 	struct _starpu_worker * const worker = _starpu_get_worker_struct(workerid);
@@ -186,11 +186,11 @@ static starpu_pthread_cond_t init_cond = STARPU_PTHREAD_COND_INITIALIZER;
 static int init_count = 0;
 static enum initialization initialized = UNINITIALIZED;
 
-int _starpu_keys_initialized STARPU_ATTRIBUTE_INTERNAL;
-starpu_pthread_key_t _starpu_worker_key STARPU_ATTRIBUTE_INTERNAL;
-starpu_pthread_key_t _starpu_worker_set_key STARPU_ATTRIBUTE_INTERNAL;
+int _starpu_keys_initialized;
+starpu_pthread_key_t _starpu_worker_key;
+starpu_pthread_key_t _starpu_worker_set_key;
 
-struct _starpu_machine_config _starpu_config STARPU_ATTRIBUTE_INTERNAL;
+struct _starpu_machine_config _starpu_config;
 
 static int check_entire_platform;
 
@@ -920,6 +920,9 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
 #if defined(STARPU_USE_FPGA)
 			case STARPU_FPGA_WORKER:
+			{
+				struct starpu_driver driver;
+				driver.type = workerarg->arch;
 				if (!_starpu_may_launch_driver(&pconfig->conf, &driver))
 				{
 					workerarg->run_by_starpu = 0;
@@ -939,6 +942,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
 #endif
 				break;
+			}
 #endif
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
@@ -1836,6 +1840,12 @@ void starpu_resume()
 	starpu_fxt_trace_user_event_string("starpu_resume");
 }
 
+int starpu_is_paused()
+{
+	STARPU_RMB();
+	return _starpu_config.pause_depth > 0;
+}
+
 unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED)
 {
 #ifdef STARPU_NON_BLOCKING_DRIVERS

+ 10 - 6
src/core/workers.h

@@ -55,6 +55,8 @@
 
 #include <datawizard/datawizard.h>
 
+#pragma GCC visibility push(hidden)
+
 #define STARPU_MAX_PIPELINE 4
 
 struct _starpu_ctx_change_list;
@@ -438,10 +440,10 @@ void _starpu_memory_driver_info_register(enum starpu_node_kind kind, const struc
 
 extern int _starpu_worker_parallel_blocks;
 
-extern struct _starpu_machine_config _starpu_config STARPU_ATTRIBUTE_INTERNAL;
-extern int _starpu_keys_initialized STARPU_ATTRIBUTE_INTERNAL;
-extern starpu_pthread_key_t _starpu_worker_key STARPU_ATTRIBUTE_INTERNAL;
-extern starpu_pthread_key_t _starpu_worker_set_key STARPU_ATTRIBUTE_INTERNAL;
+extern struct _starpu_machine_config _starpu_config;
+extern int _starpu_keys_initialized;
+extern starpu_pthread_key_t _starpu_worker_key;
+extern starpu_pthread_key_t _starpu_worker_set_key;
 
 /** Three functions to manage argv, argc */
 void _starpu_set_argc_argv(int *argc, char ***argv);
@@ -472,7 +474,7 @@ static inline unsigned _starpu_machine_is_running(void)
 void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu_machine_config *pconfig);
 
 /** Check if there is a worker that may execute the task. */
-uint32_t _starpu_worker_exists(struct starpu_task *);
+uint32_t _starpu_worker_exists(struct starpu_task *) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 /** Is there a worker that can execute CUDA code ? */
 uint32_t _starpu_can_submit_cuda_task(void);
@@ -1163,7 +1165,7 @@ static inline int _starpu_wake_worker_relax(int workerid)
 }
 #define starpu_wake_worker_relax _starpu_wake_worker_relax
 
-int starpu_wake_worker_relax_light(int workerid);
+int starpu_wake_worker_relax_light(int workerid) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 /**
  * Allow a worker pulling a task it cannot execute to properly refuse it and
@@ -1183,4 +1185,6 @@ static inline int _starpu_perf_counter_paused(void)
 
 /* @}*/
 
+#pragma GCC visibility pop
+
 #endif // __WORKERS_H__

+ 1 - 1
src/datawizard/coherency.c

@@ -1033,7 +1033,7 @@ int starpu_idle_prefetch_task_input_for(struct starpu_task *task, unsigned worke
 	return starpu_idle_prefetch_task_input_for_prio(task, worker, prio);
 }
 
-struct _starpu_data_replicate *get_replicate(starpu_data_handle_t handle, enum starpu_data_access_mode mode, int workerid, unsigned node)
+static struct _starpu_data_replicate *get_replicate(starpu_data_handle_t handle, enum starpu_data_access_mode mode, int workerid, unsigned node)
 {
 	if (mode & (STARPU_SCRATCH|STARPU_REDUX))
 	{

+ 5 - 2
src/datawizard/coherency.h

@@ -33,6 +33,8 @@
 #include <datawizard/memstats.h>
 #include <datawizard/data_request.h>
 
+#pragma GCC visibility push(hidden)
+
 enum _starpu_cache_state
 {
 	STARPU_OWNER,
@@ -378,7 +380,8 @@ void _starpu_data_start_reduction_mode(starpu_data_handle_t handle);
 void _starpu_data_end_reduction_mode(starpu_data_handle_t handle);
 void _starpu_data_end_reduction_mode_terminate(starpu_data_handle_t handle);
 
-void _starpu_data_set_unregister_hook(starpu_data_handle_t handle, _starpu_data_handle_unregister_hook func);
-struct _starpu_data_replicate *get_replicate(starpu_data_handle_t handle, enum starpu_data_access_mode mode, int workerid, unsigned node);
+void _starpu_data_set_unregister_hook(starpu_data_handle_t handle, _starpu_data_handle_unregister_hook func) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
+
+#pragma GCC visibility pop
 
 #endif // __COHERENCY__H__

+ 4 - 0
src/datawizard/copy_driver.h

@@ -39,6 +39,8 @@
 #include <mpi.h>
 #endif
 
+#pragma GCC visibility push(hidden)
+
 #ifdef __cplusplus
 extern "C"
 {
@@ -133,4 +135,6 @@ void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_
 }
 #endif
 
+#pragma GCC visibility pop
+
 #endif // __COPY_DRIVER_H__

+ 5 - 0
src/datawizard/data_request.h

@@ -28,6 +28,8 @@
 #include <common/prio_list.h>
 #include <common/starpu_spinlock.h>
 
+#pragma GCC visibility push(hidden)
+
 /* TODO: This should be tuned according to driver capabilities
  * Data interfaces should also have to declare how many asynchronous requests
  * they have actually started (think of e.g. csr).
@@ -184,4 +186,7 @@ void _starpu_data_request_append_callback(struct _starpu_data_request *r,
 					  void *callback_arg);
 
 void _starpu_update_prefetch_status(struct _starpu_data_request *r, enum starpu_is_prefetch prefetch);
+
+#pragma GCC visibility pop
+
 #endif // __DATA_REQUEST_H__

+ 4 - 0
src/datawizard/datastats.h

@@ -24,6 +24,8 @@
 #include <stdint.h>
 #include <stdlib.h>
 
+#pragma GCC visibility push(hidden)
+
 extern int _starpu_enable_stats;
 
 void _starpu_datastats_init();
@@ -63,4 +65,6 @@ void __starpu_data_allocation_inc_stats(unsigned node STARPU_ATTRIBUTE_UNUSED);
 
 void _starpu_display_alloc_cache_stats(FILE *stream);
 
+#pragma GCC visibility pop
+
 #endif // __DATASTATS_H__

+ 3 - 0
src/datawizard/datawizard.h

@@ -34,6 +34,7 @@
 
 #include <core/dependencies/implicit_data_deps.h>
 
+#pragma GCC visibility push(hidden)
 
 /** Make data transfers progress on all memory nodes driven by the current worker.
  *
@@ -49,4 +50,6 @@ void _starpu_datawizard_progress(enum _starpu_may_alloc may_alloc);
 /** Check for all pending data request progress on node \p memory_node */
 void _starpu_datawizard_handle_all_pending_node_data_requests(unsigned memnode);
 
+#pragma GCC visibility pop
+
 #endif // __DATAWIZARD_H__

+ 5 - 0
src/datawizard/filters.h

@@ -26,6 +26,11 @@
 #include <starpu.h>
 #include <common/config.h>
 
+#pragma GCC visibility push(hidden)
+
 /** submit asynchronous unpartitioning / partitioning to make target active read-only or read-write */
 void _starpu_data_partition_access_submit(starpu_data_handle_t target, int write);
+
+#pragma GCC visibility pop
+
 #endif

+ 4 - 0
src/datawizard/footprint.h

@@ -24,6 +24,8 @@
 #include <common/config.h>
 #include <core/jobs.h>
 
+#pragma GCC visibility push(hidden)
+
 /** Compute the footprint that characterizes the job and cache it into the job
  * structure. */
 uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, unsigned nimpl, struct _starpu_job *j);
@@ -34,4 +36,6 @@ uint32_t _starpu_compute_data_footprint(starpu_data_handle_t handle);
 /** Compute the footprint that characterizes the allocation of the data handle. */
 uint32_t _starpu_compute_data_alloc_footprint(starpu_data_handle_t handle);
 
+#pragma GCC visibility pop
+
 #endif // __FOOTPRINT_H__

+ 5 - 0
src/datawizard/interfaces/bcsr_filters.c

@@ -91,6 +91,11 @@ void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_inte
 	}
 }
 
+unsigned starpu_bcsr_filter_canonical_block_get_nchildren(struct starpu_data_filter *f, starpu_data_handle_t handle)
+{
+  return (unsigned)starpu_bcsr_get_nnz(handle);
+}
+
 struct starpu_data_interface_ops *starpu_bcsr_filter_canonical_block_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child)
 {
 	return &starpu_interface_matrix_ops;

+ 12 - 11
src/datawizard/interfaces/data_interface.h

@@ -26,6 +26,8 @@
 #include <util/openmp_runtime_support.h>
 #endif
 
+#pragma GCC visibility push(hidden)
+
 /** Generic type representing an interface, for now it's only used before
  * execution on message-passing devices but it can be useful in other cases.
  */
@@ -44,43 +46,42 @@ union _starpu_interface
 /** Some data interfaces or filters use this interface internally */
 extern struct starpu_data_interface_ops starpu_interface_matrix_ops;
 extern struct starpu_data_interface_ops starpu_interface_block_ops;
-extern struct starpu_data_interface_ops starpu_interface_vector_ops;
+extern struct starpu_data_interface_ops starpu_interface_vector_ops STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 extern struct starpu_data_interface_ops starpu_interface_csr_ops;
 extern struct starpu_data_interface_ops starpu_interface_bcsr_ops;
 extern struct starpu_data_interface_ops starpu_interface_variable_ops;
 extern struct starpu_data_interface_ops starpu_interface_void_ops;
 extern struct starpu_data_interface_ops starpu_interface_multiformat_ops;
 
-void _starpu_data_free_interfaces(starpu_data_handle_t handle)
-	STARPU_ATTRIBUTE_INTERNAL;
+void _starpu_data_free_interfaces(starpu_data_handle_t handle);
 
 extern
 int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_interface_ops *interface_ops, unsigned int mf_node);
 void _starpu_data_initialize_per_worker(starpu_data_handle_t handle);
 
 extern struct starpu_arbiter *_starpu_global_arbiter;
-extern void _starpu_data_interface_init(void) STARPU_ATTRIBUTE_INTERNAL;
-extern int __starpu_data_check_not_busy(starpu_data_handle_t handle) STARPU_ATTRIBUTE_INTERNAL STARPU_ATTRIBUTE_WARN_UNUSED_RESULT;
+extern void _starpu_data_interface_init(void);
+extern int __starpu_data_check_not_busy(starpu_data_handle_t handle) STARPU_ATTRIBUTE_WARN_UNUSED_RESULT;
 #define _starpu_data_check_not_busy(handle) \
 	(STARPU_UNLIKELY(!handle->busy_count && \
 			 (handle->busy_waiting || handle->lazy_unregister)) ? \
 		__starpu_data_check_not_busy(handle) : 0)
-extern void _starpu_data_interface_shutdown(void) STARPU_ATTRIBUTE_INTERNAL;
+extern void _starpu_data_interface_shutdown(void);
 
 #ifdef STARPU_OPENMP
 void _starpu_omp_unregister_region_handles(struct starpu_omp_region *region);
 void _starpu_omp_unregister_task_handles(struct starpu_omp_task *task);
 #endif
 
-struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id);
+struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 extern void _starpu_data_register_ram_pointer(starpu_data_handle_t handle,
-						void *ptr)
-	STARPU_ATTRIBUTE_INTERNAL;
+						void *ptr);
 
-extern void _starpu_data_unregister_ram_pointer(starpu_data_handle_t handle, unsigned node)
-	STARPU_ATTRIBUTE_INTERNAL;
+extern void _starpu_data_unregister_ram_pointer(starpu_data_handle_t handle, unsigned node);
 
 #define _starpu_data_is_multiformat_handle(handle) handle->ops->is_multiformat
 
+#pragma GCC visibility pop
+
 #endif // __DATA_INTERFACE_H__

+ 5 - 0
src/datawizard/malloc.h

@@ -17,6 +17,8 @@
 #ifndef __ALLOC_H__
 #define __ALLOC_H__
 
+#pragma GCC visibility push(hidden)
+
 /** @file */
 
 void _starpu_malloc_init(unsigned dst_node);
@@ -33,4 +35,7 @@ int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags
    critical path
   */
 int _starpu_malloc_willpin_on_node(unsigned dst_node);
+
+#pragma GCC visibility pop
+
 #endif

+ 4 - 3
src/datawizard/memalloc.c

@@ -1400,12 +1400,13 @@ void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _star
 	/*
 	 * This is particularly important when
 	 * STARPU_USE_ALLOCATION_CACHE is not enabled, as we
-	 * wouldn't even re-use these allocations!
+	 * wouldn't even ever re-use these allocations!
 	 */
 	if (handle->ops->dontcache
-			|| !_starpu_memory_manager_get_global_memory_size(node)
+			|| (starpu_node_get_kind(node) == STARPU_CPU_RAM &&
+			    !_starpu_malloc_willpin_on_node(node))
 #ifndef STARPU_USE_ALLOCATION_CACHE
-			|| starpu_node_get_kind(node) == STARPU_CPU_RAM
+			|| !_starpu_memory_manager_get_global_memory_size(node)
 #endif
 			)
 	{

+ 4 - 0
src/datawizard/memalloc.h

@@ -28,6 +28,8 @@
 #include <datawizard/copy_driver.h>
 #include <datawizard/data_request.h>
 
+#pragma GCC visibility push(hidden)
+
 struct _starpu_data_replicate;
 
 /** While associated with a handle, the content is protected by the handle lock, except a few fields
@@ -95,4 +97,6 @@ int _starpu_is_reclaiming(unsigned node);
 
 void _starpu_mem_chunk_disk_register(unsigned disk_memnode);
 
+#pragma GCC visibility pop
+
 #endif

+ 4 - 0
src/datawizard/memory_manager.h

@@ -21,6 +21,8 @@
 
 #include <starpu.h>
 
+#pragma GCC visibility push(hidden)
+
 #ifdef __cplusplus
 extern "C"
 {
@@ -49,4 +51,6 @@ int _starpu_memory_manager_test_allocate_size(unsigned node, size_t size);
 }
 #endif
 
+#pragma GCC visibility pop
+
 #endif /* __MEMORY_MANAGER_H__ */

+ 4 - 0
src/datawizard/memory_nodes.h

@@ -31,6 +31,8 @@
 #include <core/simgrid.h>
 #endif
 
+#pragma GCC visibility push(hidden)
+
 extern char _starpu_worker_drives_memory[STARPU_NMAXWORKERS][STARPU_MAXNODES];
 
 struct _starpu_cond_and_worker
@@ -162,4 +164,6 @@ static inline unsigned _starpu_worker_get_local_memory_node(void)
 #endif
 #define starpu_worker_get_local_memory_node _starpu_worker_get_local_memory_node
 
+#pragma GCC visibility pop
+
 #endif // __MEMORY_NODES_H__

+ 4 - 0
src/datawizard/memstats.h

@@ -22,6 +22,8 @@
 #include <starpu.h>
 #include <common/config.h>
 
+#pragma GCC visibility push(hidden)
+
 #ifdef STARPU_MEMORY_STATS
 struct _starpu_memory_stats
 {
@@ -51,4 +53,6 @@ void _starpu_memory_handle_stats_loaded_owner(starpu_data_handle_t handle, unsig
 void _starpu_memory_handle_stats_shared_to_owner(starpu_data_handle_t handle, unsigned node);
 void _starpu_memory_handle_stats_invalidated(starpu_data_handle_t handle, unsigned node);
 
+#pragma GCC visibility pop
+
 #endif /* __MEMSTATS_H__ */

+ 4 - 0
src/datawizard/node_ops.h

@@ -23,6 +23,8 @@
 #include <common/config.h>
 #include <datawizard/copy_driver.h>
 
+#pragma GCC visibility push(hidden)
+
 typedef int (*copy_interface_func_t)(starpu_data_handle_t handle, void *src_interface, unsigned src_node,
 				void *dst_interface, unsigned dst_node,
 				struct _starpu_data_request *req);
@@ -60,4 +62,6 @@ struct _starpu_node_ops
 
 const char* _starpu_node_get_prefix(enum starpu_node_kind kind);
 
+#pragma GCC visibility pop
+
 #endif // __NODE_OPS_H__

+ 4 - 0
src/datawizard/sort_data_handles.h

@@ -28,9 +28,13 @@
 #include <datawizard/coherency.h>
 #include <datawizard/memalloc.h>
 
+#pragma GCC visibility push(hidden)
+
 /** To avoid deadlocks, we reorder the different buffers accessed to by the task
  * so that we always grab the rw-lock associated to the handles in the same
  * order. */
 void _starpu_sort_task_handles(struct _starpu_data_descr descr[], unsigned nbuffers);
 
+#pragma GCC visibility pop
+
 #endif // SORT_DATA_HANDLES

+ 4 - 0
src/datawizard/write_back.h

@@ -22,10 +22,14 @@
 #include <starpu.h>
 #include <datawizard/coherency.h>
 
+#pragma GCC visibility push(hidden)
+
 /** If a write-through mask is associated to that data handle, this propagates
  * the the current value of the data onto the different memory nodes in the
  * write_through_mask. */
 void _starpu_write_through_data(starpu_data_handle_t handle, unsigned requesting_node,
 					   uint32_t write_through_mask);
 
+#pragma GCC visibility pop
+
 #endif // __DW_WRITE_BACK_H__

+ 6 - 2
src/debug/starpu_debug_helpers.h

@@ -23,19 +23,23 @@
 #include <starpu_config.h>
 #include <starpu_util.h>
 
+#pragma GCC visibility push(hidden)
+
 #ifdef __cplusplus
 extern "C"
 {
 #endif
 
 /** Perform a ping pong between the two memory nodes */
-void _starpu_benchmark_ping_pong(starpu_data_handle_t handle, unsigned node0, unsigned node1, unsigned niter);
+void _starpu_benchmark_ping_pong(starpu_data_handle_t handle, unsigned node0, unsigned node1, unsigned niter) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 /** Display the size of different data structures */
-void _starpu_debug_display_structures_size(FILE *stream);
+void _starpu_debug_display_structures_size(FILE *stream) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 
 #ifdef __cplusplus
 }
 #endif
 
+#pragma GCC visibility pop
+
 #endif // __STARPU_DEBUG_HELPERS_H__

+ 14 - 14
src/debug/traces/starpu_fxt.c

@@ -120,7 +120,7 @@ struct task_info
 	int mpi_rank;
 };
 
-struct task_info *tasks_info;
+static struct task_info *tasks_info;
 
 static struct task_info *get_task(unsigned long job_id, int mpi_rank)
 {
@@ -300,7 +300,7 @@ struct data_info
 	long mpi_tag;
 };
 
-struct data_info *data_info;
+static struct data_info *data_info;
 
 static struct data_info *get_data(unsigned long handle, int mpi_rank)
 {
@@ -600,7 +600,7 @@ static char *scheduler_container_alias(char *output, int len, const char *prefix
 
 static int nworkers = 0;
 
-struct worker_entry
+static struct worker_entry
 {
 	UT_hash_handle hh;
 	unsigned long tid;
@@ -1548,7 +1548,7 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 static void handle_model_name(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 	struct task_info *task = get_task(ev->param[0], options->file_rank);
-	char *name = get_fxt_string(ev, 1);
+	char *name = get_fxt_string(ev, 2);
 	task->model_name = strdup(name);
 }
 
@@ -1693,20 +1693,20 @@ static struct starpu_fxt_codelet_event *dumped_codelets;
 
 static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
+	unsigned long job_id = ev->param[0];
+	size_t codelet_size = ev->param[1];
+	uint32_t codelet_hash = ev->param[2];
 	int worker = ev->param[3];
+	long unsigned int threadid = ev->param[4];
+	char *name = get_fxt_string(ev, 5);
+
 	if (worker < 0) return;
 
 	char *prefix = options->file_prefix;
-
 	double end_codelet_time = get_event_time_stamp(ev, options);
 	double last_end_codelet_time = last_codelet_end[worker];
 	last_codelet_end[worker] = end_codelet_time;
 
-	size_t codelet_size = ev->param[1];
-	uint32_t codelet_hash = ev->param[2];
-	long unsigned int threadid = ev->param[4];
-	char *name = get_fxt_string(ev, 5);
-
 	const char *state = "I";
 	if (find_sync(prefixTOnodeid(prefix), threadid))
 		state = "B";
@@ -1715,9 +1715,9 @@ static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_opti
 	if (trace_file)
 		recfmt_worker_set_state(end_codelet_time, worker, state, "Other");
 
-	struct task_info *task = get_task(ev->param[0], options->file_rank);
+	struct task_info *task = get_task(job_id, options->file_rank);
 
-	get_task(ev->param[0], options->file_rank)->end_time = end_codelet_time;
+	task->end_time = end_codelet_time;
 	update_accumulated_time(worker, 0.0, end_codelet_time - task->start_time, end_codelet_time, 0);
 
 	struct _starpu_computation *peer = ongoing_computation[worker];
@@ -2817,7 +2817,7 @@ static void handle_task_exclude_from_dag(struct fxt_ev_64 *ev, struct starpu_fxt
 static void handle_task_name(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 	unsigned long job_id = ev->param[0];
-	char *name = get_fxt_string(ev,1);
+	char *name = get_fxt_string(ev,2);
 
 	char *prefix = options->file_prefix;
 	struct task_info *task = get_task(job_id, options->file_rank);
@@ -4832,7 +4832,7 @@ struct parse_task
 
 static struct parse_task tasks[STARPU_NMAXWORKERS];
 
-struct starpu_data_trace_kernel
+static struct starpu_data_trace_kernel
 {
 	UT_hash_handle hh;
 	char *name;

+ 4 - 0
src/debug/traces/starpu_fxt.h

@@ -41,6 +41,8 @@
 #include <starpu.h>
 #include "../../../include/starpu_fxt.h"
 
+#pragma GCC visibility push(hidden)
+
 extern char _starpu_last_codelet_symbol[STARPU_NMAXWORKERS][(FXT_MAX_PARAMS-5)*sizeof(unsigned long)];
 
 void _starpu_fxt_dag_init(char *dag_filename);
@@ -87,4 +89,6 @@ void _starpu_fxt_component_deinit(void);
 
 #endif // STARPU_USE_FXT
 
+#pragma GCC visibility pop
+
 #endif // __STARPU__FXT_H__

+ 6 - 6
src/debug/traces/starpu_fxt_mpi.c

@@ -109,18 +109,18 @@ static struct mpi_transfer *mpi_sends[STARPU_FXT_MAX_FILES] = {NULL};
 static struct mpi_transfer *mpi_recvs[STARPU_FXT_MAX_FILES] = {NULL};
 
 /* number of available slots in the lists  */
-unsigned mpi_sends_list_size[STARPU_FXT_MAX_FILES] = {0};
-unsigned mpi_recvs_list_size[STARPU_FXT_MAX_FILES] = {0};
+static unsigned mpi_sends_list_size[STARPU_FXT_MAX_FILES] = {0};
+static unsigned mpi_recvs_list_size[STARPU_FXT_MAX_FILES] = {0};
 
 /* number of slots actually used in the list  */
-unsigned mpi_sends_used[STARPU_FXT_MAX_FILES] = {0};
-unsigned mpi_recvs_used[STARPU_FXT_MAX_FILES] = {0};
+static unsigned mpi_sends_used[STARPU_FXT_MAX_FILES] = {0};
+static unsigned mpi_recvs_used[STARPU_FXT_MAX_FILES] = {0};
 
 /* number of slots already matched at the beginning of the list. This permits
  * going through the lists from the beginning to match each and every
  * transfer, thus avoiding a quadratic complexity. */
-unsigned mpi_recvs_matched[STARPU_FXT_MAX_FILES][STARPU_FXT_MAX_FILES] = { {0} };
-unsigned mpi_sends_matched[STARPU_FXT_MAX_FILES][STARPU_FXT_MAX_FILES] = { {0} };
+static unsigned mpi_recvs_matched[STARPU_FXT_MAX_FILES][STARPU_FXT_MAX_FILES] = { {0} };
+static unsigned mpi_sends_matched[STARPU_FXT_MAX_FILES][STARPU_FXT_MAX_FILES] = { {0} };
 
 void _starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED, long mpi_tag, size_t size, float date, long jobid, unsigned long handle, unsigned type, int prio)
 {

+ 4 - 0
src/drivers/cpu/driver_cpu.h

@@ -22,6 +22,8 @@
 #include <common/config.h>
 #include <datawizard/node_ops.h>
 
+#pragma GCC visibility push(hidden)
+
 void _starpu_cpu_preinit(void);
 
 extern struct _starpu_driver_ops _starpu_driver_cpu_ops;
@@ -36,4 +38,6 @@ int _starpu_cpu_is_direct_access_supported(unsigned node, unsigned handling_node
 uintptr_t _starpu_cpu_malloc_on_node(unsigned dst_node, size_t size, int flags);
 void _starpu_cpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
 
+#pragma GCC visibility pop
+
 #endif //  __DRIVER_CPU_H__

+ 4 - 0
src/drivers/cuda/driver_cuda.h

@@ -37,6 +37,8 @@ void _starpu_cuda_preinit(void);
 #include <core/workers.h>
 #include <datawizard/node_ops.h>
 
+#pragma GCC visibility push(hidden)
+
 extern struct _starpu_driver_ops _starpu_driver_cuda_ops;
 extern struct _starpu_node_ops _starpu_driver_cuda_node_ops;
 
@@ -86,5 +88,7 @@ int _starpu_cuda_is_direct_access_supported(unsigned node, unsigned handling_nod
 uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags);
 void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
 
+#pragma GCC visibility pop
+
 #endif //  __DRIVER_CUDA_H__
 

+ 4 - 0
src/drivers/disk/driver_disk.h

@@ -22,6 +22,8 @@
 
 #include <datawizard/node_ops.h>
 
+#pragma GCC visibility push(hidden)
+
 void _starpu_disk_preinit(void);
 
 int _starpu_disk_copy_src_to_disk(void * src, unsigned src_node, void * dst, size_t dst_offset, unsigned dst_node, size_t size, void * async_channel);
@@ -46,4 +48,6 @@ int _starpu_disk_is_direct_access_supported(unsigned node, unsigned handling_nod
 uintptr_t _starpu_disk_malloc_on_node(unsigned dst_node, size_t size, int flags);
 void _starpu_disk_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
 
+#pragma GCC visibility pop
+
 #endif

+ 5 - 0
src/drivers/driver_common/driver_common.h

@@ -25,6 +25,8 @@
 #include <core/jobs.h>
 #include <common/utils.h>
 
+#pragma GCC visibility push(hidden)
+
 void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch,
 			      int rank, int profiling);
 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch,
@@ -36,4 +38,7 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int workerid, unsigned memnode);
 /** Get from the scheduler tasks to be executed on the workers \p workers */
 int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_task ** tasks, int nworker, unsigned memnode);
+
+#pragma GCC visibility pop
+
 #endif // __DRIVER_COMMON_H__

+ 0 - 0
src/drivers/max/driver_fpga_init.c


Some files were not shown because too many files changed in this diff