Browse Source

more prefixing with starpu_ and s/CORE/CPU/g

Nathalie Furmento 15 years ago
parent
commit
c722a34e6e
100 changed files with 557 additions and 557 deletions
  1. 1 1
      configure.ac
  2. 12 12
      doc/starpu.texi
  3. 2 2
      examples/audio/starpu-audio-processing.c
  4. 2 2
      examples/axpy/axpy.c
  5. 2 2
      examples/basic-examples/hello-world.c
  6. 2 2
      examples/basic-examples/mult.c
  7. 2 2
      examples/basic-examples/vector-scal.c
  8. 6 6
      examples/cholesky/dw_cholesky.c
  9. 3 3
      examples/cholesky/dw_cholesky.h
  10. 6 6
      examples/cholesky/dw_cholesky_grain.c
  11. 6 6
      examples/cholesky/dw_cholesky_kernels.c
  12. 9 9
      examples/cholesky/dw_cholesky_models.c
  13. 6 6
      examples/cholesky/dw_cholesky_no_stride.c
  14. 8 8
      examples/heat/dw_factolu.c
  15. 4 4
      examples/heat/dw_factolu.h
  16. 8 8
      examples/heat/dw_factolu_grain.c
  17. 7 7
      examples/heat/dw_factolu_kernels.c
  18. 8 8
      examples/heat/dw_factolu_tag.c
  19. 18 18
      examples/heat/dw_sparse_cg.c
  20. 9 9
      examples/heat/dw_sparse_cg.h
  21. 9 9
      examples/heat/dw_sparse_cg_kernels.c
  22. 12 12
      examples/heat/lu_kernels_model.c
  23. 3 3
      examples/incrementer/incrementer.c
  24. 8 8
      examples/lu/xlu.c
  25. 4 4
      examples/lu/xlu.h
  26. 10 10
      examples/lu/xlu_pivot.c
  27. 2 2
      examples/mult/dw_mult.c
  28. 1 1
      examples/mult/dw_mult.h
  29. 2 2
      examples/mult/dw_mult_no_stride.c
  30. 2 2
      examples/mult/dw_mult_no_stride_no_tag.c
  31. 1 1
      examples/mult/sgemm_kernels.c
  32. 2 2
      examples/mult/xgemm.c
  33. 1 1
      examples/mult/xgemm_kernels.c
  34. 20 20
      examples/pastix-wrappers/starpu-blas-wrapper.c
  35. 2 2
      examples/pastix-wrappers/starpu-blas-wrapper.h
  36. 2 2
      examples/ppm-downscaler/yuv-downscaler.c
  37. 2 2
      examples/spmv/dw_block_spmv.c
  38. 1 1
      examples/spmv/dw_block_spmv.h
  39. 2 2
      examples/spmv/dw_block_spmv_kernels.c
  40. 3 3
      examples/spmv/dw_spmv.c
  41. 1 1
      examples/starpufft/starpufftx.c
  42. 11 11
      examples/starpufft/starpufftx1d.c
  43. 11 11
      examples/starpufft/starpufftx2d.c
  44. 10 10
      examples/strassen/strassen.c
  45. 5 5
      examples/strassen/strassen.h
  46. 28 28
      examples/strassen/strassen_kernels.c
  47. 6 6
      examples/strassen/strassen_models.c
  48. 19 19
      examples/strassen2/strassen2.c
  49. 5 5
      examples/strassen2/strassen2_kernels.c
  50. 8 8
      examples/tag_example/tag_example.c
  51. 5 5
      examples/tag_example/tag_example2.c
  52. 5 5
      examples/tag_example/tag_example3.c
  53. 5 5
      examples/tag_example/tag_restartable.c
  54. 1 1
      include/starpu-perfmodel.h
  55. 3 3
      include/starpu-task.h
  56. 1 1
      include/starpu-util.h
  57. 3 3
      include/starpu.h
  58. 8 8
      mpi/examples/mpi_lu/pxlu_kernels.c
  59. 3 3
      mpi/tests/ring.c
  60. 3 3
      mpi/tests/ring_async.c
  61. 2 2
      src/common/fxt.c
  62. 2 2
      src/common/fxt.h
  63. 2 2
      src/core/jobs.h
  64. 2 2
      src/core/perfmodel/perfmodel.c
  65. 24 24
      src/core/perfmodel/perfmodel_bus.c
  66. 4 4
      src/core/perfmodel/perfmodel_history.c
  67. 46 46
      src/core/topology.c
  68. 3 3
      src/core/topology.h
  69. 9 9
      src/core/workers.c
  70. 7 7
      src/core/workers.h
  71. 41 41
      src/drivers/core/driver_core.c
  72. 3 3
      src/drivers/core/driver_core.h
  73. 1 1
      src/util/execute_on_all.c
  74. 2 2
      tests/core/empty_task_sync_point.c
  75. 2 2
      tests/core/execute_on_a_specific_worker.c
  76. 2 2
      tests/core/multithreaded.c
  77. 2 2
      tests/core/starpu_wait_all_tasks.c
  78. 2 2
      tests/core/starpu_wait_task.c
  79. 2 2
      tests/core/static_restartable.c
  80. 2 2
      tests/core/static_restartable_tag.c
  81. 2 2
      tests/core/static_restartable_using_initializer.c
  82. 2 2
      tests/core/tag-wait-api.c
  83. 2 2
      tests/datawizard/dining_philosophers.c
  84. 3 3
      tests/datawizard/dsm_stress.c
  85. 2 2
      tests/datawizard/readers_and_writers.c
  86. 6 6
      tests/datawizard/sync_and_notify_data.c
  87. 2 2
      tests/datawizard/sync_with_data_with_mem.c
  88. 2 2
      tests/datawizard/sync_with_data_with_mem_non_blocking.c
  89. 2 2
      tests/datawizard/unpartition.c
  90. 5 5
      tests/datawizard/write_only_tmp_buffer.c
  91. 2 2
      tests/errorcheck/invalid_blocking_calls.c
  92. 1 1
      tests/heat/speedup.gp
  93. 2 2
      tests/helper/execute_on_all.c
  94. 2 2
      tests/helper/starpu_create_sync_task.c
  95. 2 2
      tests/microbenchs/async-tasks-overhead.c
  96. 2 2
      tests/microbenchs/prefetch_data_on_node.c
  97. 3 3
      tests/microbenchs/redundant_buffer.c
  98. 2 2
      tests/microbenchs/sync-tasks-overhead.c
  99. 2 2
      tests/microbenchs/tasks-overhead.c
  100. 0 0
      tests/overlap/overlap.c

+ 1 - 1
configure.ac

@@ -102,7 +102,7 @@ if test x$enable_cpu = xyes; then
 
 	# This value is set quite randomly, but StarPU should not take more
 	# core than there are in the system
-	AC_DEFINE(NMAXCORES, [16], [Maximum number of CPUs supported])
+	AC_DEFINE(NMAXCPUS, [16], [Maximum number of CPUs supported])
 fi
 
 ###############################################################################

+ 12 - 12
doc/starpu.texi

@@ -502,7 +502,7 @@ an integer between 0 and @code{starpu_get_worker_count() - 1}.
 @item @emph{Description}:
 This function returns the type of worker associated to an identifier (as
 returned by the @code{starpu_get_worker_id} function). The returned value
-indicates the architecture of the worker: @code{STARPU_CORE_WORKER} for a CPU
+indicates the architecture of the worker: @code{STARPU_CPU_WORKER} for a CPU
 core, @code{STARPU_CUDA_WORKER} for a CUDA device, and
 @code{STARPU_GORDON_WORKER} for a Cell SPU. The value returned for an invalid
 identifier is unspecified.
@@ -568,17 +568,17 @@ various targets.
 @table @asis
 @item @code{where}: 
 Indicates which types of processing units are able to execute that codelet.
-@code{CORE|CUDA} for instance indicates that the codelet is implemented for
+@code{CPU|CUDA} for instance indicates that the codelet is implemented for
 both CPU cores and CUDA devices while @code{GORDON} indicates that it is only
 available on Cell SPUs.
 
-@item @code{core_func} (optionnal):
+@item @code{cpu_func} (optionnal):
 Is a function pointer to the CPU implementation of the codelet. Its prototype
-must be: @code{void core_func(starpu_data_interface_t *descr, void *arg)}. The
+must be: @code{void cpu_func(starpu_data_interface_t *descr, void *arg)}. The
 first argument being the array of data managed by the data management library,
 and the second argument is a pointer to the argument (possibly a copy of it)
 passed from the @code{.cl_arg} field of the @code{starpu_task} structure. This
-pointer is ignored if @code{CORE} does not appear in the @code{.where} field,
+pointer is ignored if @code{CPU} does not appear in the @code{.where} field,
 it must be non-null otherwise.
 
 @item @code{cuda_func} (optionnal):
@@ -986,8 +986,8 @@ void cpu_func(starpu_data_interface_t *buffers, void *func_arg)
 
 starpu_codelet cl =
 @{
-    .where = CORE,
-    .core_func = cpu_func,
+    .where = CPU,
+    .cpu_func = cpu_func,
     .nbuffers = 0
 @};
 @c @end cartouche
@@ -1007,12 +1007,12 @@ management library.
 @c TODO need a crossref to the proper description of "where" see bla for more ...
 We create a codelet which may only be executed on the CPUs. The ''@code{.where}''
 field is a bitmask that defines where the codelet may be executed. Here, the
-@code{CORE} value means that only CPUs can execute this codelet
+@code{CPU} value means that only CPUs can execute this codelet
 (@pxref{Codelets and Tasks} for more details on that field).
-When a CPU core executes a codelet, it calls the @code{.core_func} function,
+When a CPU core executes a codelet, it calls the @code{.cpu_func} function,
 which @emph{must} have the following prototype:
 
-@code{void (*core_func)(starpu_data_interface_t *, void *)}
+@code{void (*cpu_func)(starpu_data_interface_t *, void *)}
 
 In this example, we can ignore the first argument of this function which gives a
 description of the input and output buffers (eg. the size and the location of
@@ -1174,8 +1174,8 @@ void scal_func(starpu_data_interface_t *buffers, void *arg)
 @}
 
 starpu_codelet cl = @{
-    .where = CORE,
-    .core_func = scal_func,
+    .where = CPU,
+    .cpu_func = scal_func,
     .nbuffers = 1
 @};
 @end example

+ 2 - 2
examples/audio/starpu-audio-processing.c

@@ -272,11 +272,11 @@ struct starpu_perfmodel_t band_filter_model = {
 };
 
 static starpu_codelet band_filter_cl = {
-	.where = CORE|CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 #ifdef USE_CUDA
 	.cuda_func = band_filter_kernel_gpu,
 #endif
-	.core_func = band_filter_kernel_cpu,
+	.cpu_func = band_filter_kernel_cpu,
 	.model = &band_filter_model,
 	.nbuffers = 1
 };

+ 2 - 2
examples/axpy/axpy.c

@@ -68,9 +68,9 @@ static starpu_codelet axpy_cl = {
 #ifdef USE_CUDA
                 STARPU_CUDA|
 #endif
-                STARPU_CORE,
+                STARPU_CPU,
 
-	.core_func = axpy_cpu,
+	.cpu_func = axpy_cpu,
 #ifdef USE_CUDA
 	.cuda_func = axpy_gpu,
 #endif

+ 2 - 2
examples/basic-examples/hello-world.c

@@ -54,8 +54,8 @@ starpu_codelet cl =
 {
 	/* this codelet may only be executed on a CPU, and its cpu
  	 * implementation is function "cpu_func" */
-	.where = STARPU_CORE,
-	.core_func = cpu_func,
+	.where = STARPU_CPU,
+	.cpu_func = cpu_func,
 	/* the codelet does not manipulate any data that is managed
 	 * by our DSM */
 	.nbuffers = 0

+ 2 - 2
examples/basic-examples/mult.c

@@ -293,9 +293,9 @@ static void launch_tasks(void)
 
 	starpu_codelet cl = {
 		/* we can only execute that kernel on a CPU yet */
-		.where = STARPU_CORE,
+		.where = STARPU_CPU,
 		/* CPU implementation of the codelet */
-		.core_func = cpu_mult,
+		.cpu_func = cpu_mult,
 		/* the codelet manipulates 3 buffers that are managed by the
  		 * DSM */
 		.nbuffers = 3,

+ 2 - 2
examples/basic-examples/vector-scal.c

@@ -100,9 +100,9 @@ int main(int argc, char **argv)
 	task->synchronous = 1;
 
 	starpu_codelet cl = {
-		.where = STARPU_CORE,
+		.where = STARPU_CPU,
 		/* CPU implementation of the codelet */
-		.core_func = scal_func,
+		.cpu_func = scal_func,
 		.nbuffers = 1
 	};
 

+ 6 - 6
examples/cholesky/dw_cholesky.c

@@ -37,8 +37,8 @@ static struct starpu_task *create_task(starpu_tag_t id)
 
 static starpu_codelet cl11 =
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = chol_core_codelet_update_u11,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = chol_cpu_codelet_update_u11,
 #ifdef USE_CUDA
 	.cuda_func = chol_cublas_codelet_update_u11,
 #endif
@@ -72,8 +72,8 @@ static struct starpu_task * create_task_11(starpu_data_handle dataA, unsigned k)
 
 static starpu_codelet cl21 =
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = chol_core_codelet_update_u21,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = chol_cpu_codelet_update_u21,
 #ifdef USE_CUDA
 	.cuda_func = chol_cublas_codelet_update_u21,
 #endif
@@ -110,8 +110,8 @@ static void create_task_21(starpu_data_handle dataA, unsigned k, unsigned j)
 
 static starpu_codelet cl22 =
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = chol_core_codelet_update_u22,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = chol_cpu_codelet_update_u22,
 #ifdef USE_CUDA
 	.cuda_func = chol_cublas_codelet_update_u22,
 #endif

+ 3 - 3
examples/cholesky/dw_cholesky.h

@@ -72,9 +72,9 @@ static unsigned nbigblocks = 8;
 static unsigned pinned = 0;
 static unsigned noprio = 0;
 
-void chol_core_codelet_update_u11(void **, void *);
-void chol_core_codelet_update_u21(void **, void *);
-void chol_core_codelet_update_u22(void **, void *);
+void chol_cpu_codelet_update_u11(void **, void *);
+void chol_cpu_codelet_update_u21(void **, void *);
+void chol_cpu_codelet_update_u22(void **, void *);
 
 #ifdef USE_CUDA
 void chol_cublas_codelet_update_u11(void *descr[], void *_args);

+ 6 - 6
examples/cholesky/dw_cholesky_grain.c

@@ -37,8 +37,8 @@ static struct starpu_task *create_task(starpu_tag_t id)
 
 static starpu_codelet cl11 =
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = chol_core_codelet_update_u11,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = chol_cpu_codelet_update_u11,
 #ifdef USE_CUDA
 	.cuda_func = chol_cublas_codelet_update_u11,
 #endif
@@ -71,8 +71,8 @@ static struct starpu_task * create_task_11(starpu_data_handle dataA, unsigned k,
 
 static starpu_codelet cl21 =
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = chol_core_codelet_update_u21,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = chol_cpu_codelet_update_u21,
 #ifdef USE_CUDA
 	.cuda_func = chol_cublas_codelet_update_u21,
 #endif
@@ -109,8 +109,8 @@ static void create_task_21(starpu_data_handle dataA, unsigned k, unsigned j, uns
 
 static starpu_codelet cl22 =
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = chol_core_codelet_update_u22,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = chol_cpu_codelet_update_u22,
 #ifdef USE_CUDA
 	.cuda_func = chol_cublas_codelet_update_u22,
 #endif

+ 6 - 6
examples/cholesky/dw_cholesky_kernels.c

@@ -27,7 +27,7 @@
  *   U22 
  */
 
-static inline void chol_common_core_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)
+static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)
 {
 	//printf("22\n");
 	float *left 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
@@ -69,15 +69,15 @@ static inline void chol_common_core_codelet_update_u22(void *descr[], int s, __a
 	}
 }
 
-void chol_core_codelet_update_u22(void *descr[], void *_args)
+void chol_cpu_codelet_update_u22(void *descr[], void *_args)
 {
-	chol_common_core_codelet_update_u22(descr, 0, _args);
+	chol_common_cpu_codelet_update_u22(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
 void chol_cublas_codelet_update_u22(void *descr[], void *_args)
 {
-	chol_common_core_codelet_update_u22(descr, 1, _args);
+	chol_common_cpu_codelet_update_u22(descr, 1, _args);
 }
 #endif// USE_CUDA
 
@@ -116,7 +116,7 @@ static inline void chol_common_codelet_update_u21(void *descr[], int s, __attrib
 	}
 }
 
-void chol_core_codelet_update_u21(void *descr[], void *_args)
+void chol_cpu_codelet_update_u21(void *descr[], void *_args)
 {
 	 chol_common_codelet_update_u21(descr, 0, _args);
 }
@@ -200,7 +200,7 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 }
 
 
-void chol_core_codelet_update_u11(void *descr[], void *_args)
+void chol_cpu_codelet_update_u11(void *descr[], void *_args)
 {
 	chol_common_codelet_update_u11(descr, 0, _args);
 }

+ 9 - 9
examples/cholesky/dw_cholesky_models.c

@@ -34,7 +34,7 @@
 #define PERTURBATE(a)	(a)
 #endif
 
-static double core_chol_task_11_cost(starpu_buffer_descr *descr)
+static double cpu_chol_task_11_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
@@ -43,7 +43,7 @@ static double core_chol_task_11_cost(starpu_buffer_descr *descr)
 	double cost = (((double)(n)*n*n)/1000.0f*0.894/0.79176);
 
 #ifdef MODEL_DEBUG
-	printf("core_chol_task_11_cost n %d cost %e\n", n, cost);
+	printf("cpu_chol_task_11_cost n %d cost %e\n", n, cost);
 #endif
 
 	return PERTURBATE(cost);
@@ -64,7 +64,7 @@ static double cuda_chol_task_11_cost(starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 }
 
-static double core_chol_task_21_cost(starpu_buffer_descr *descr)
+static double cpu_chol_task_21_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
@@ -73,7 +73,7 @@ static double core_chol_task_21_cost(starpu_buffer_descr *descr)
 	double cost = (((double)(n)*n*n)/7706.674/0.95/0.9965);
 
 #ifdef MODEL_DEBUG
-	printf("core_chol_task_21_cost n %d cost %e\n", n, cost);
+	printf("cpu_chol_task_21_cost n %d cost %e\n", n, cost);
 #endif
 
 	return PERTURBATE(cost);
@@ -94,7 +94,7 @@ static double cuda_chol_task_21_cost(starpu_buffer_descr *descr)
 	return PERTURBATE(cost);
 }
 
-static double core_chol_task_22_cost(starpu_buffer_descr *descr)
+static double cpu_chol_task_22_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
@@ -103,7 +103,7 @@ static double core_chol_task_22_cost(starpu_buffer_descr *descr)
 	double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760);
 
 #ifdef MODEL_DEBUG
-	printf("core_chol_task_22_cost n %d cost %e\n", n, cost);
+	printf("cpu_chol_task_22_cost n %d cost %e\n", n, cost);
 #endif
 
 	return PERTURBATE(cost);
@@ -126,7 +126,7 @@ static double cuda_chol_task_22_cost(starpu_buffer_descr *descr)
 
 struct starpu_perfmodel_t chol_model_11 = {
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = core_chol_task_11_cost },
+		[STARPU_CPU_DEFAULT] = { .cost_model = cpu_chol_task_11_cost },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = cuda_chol_task_11_cost }
 	},
 	.type = STARPU_HISTORY_BASED,
@@ -135,7 +135,7 @@ struct starpu_perfmodel_t chol_model_11 = {
 
 struct starpu_perfmodel_t chol_model_21 = {
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = core_chol_task_21_cost },
+		[STARPU_CPU_DEFAULT] = { .cost_model = cpu_chol_task_21_cost },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = cuda_chol_task_21_cost }
 	},
 	.type = STARPU_HISTORY_BASED,
@@ -144,7 +144,7 @@ struct starpu_perfmodel_t chol_model_21 = {
 
 struct starpu_perfmodel_t chol_model_22 = {
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = core_chol_task_22_cost },
+		[STARPU_CPU_DEFAULT] = { .cost_model = cpu_chol_task_22_cost },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = cuda_chol_task_22_cost }
 	},
 	.type = STARPU_HISTORY_BASED,

+ 6 - 6
examples/cholesky/dw_cholesky_no_stride.c

@@ -47,8 +47,8 @@ static void terminal_callback(void *argcb)
 
 static starpu_codelet cl11 =
 {
-	.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON,
-	.core_func = chol_core_codelet_update_u11,
+	.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON,
+	.cpu_func = chol_cpu_codelet_update_u11,
 #ifdef USE_CUDA
 	.cuda_func = chol_cublas_codelet_update_u11,
 #endif
@@ -94,8 +94,8 @@ static struct starpu_task * create_task_11(unsigned k, unsigned nblocks, sem_t *
 
 static starpu_codelet cl21 =
 {
-	.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON,
-	.core_func = chol_core_codelet_update_u21,
+	.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON,
+	.cpu_func = chol_cpu_codelet_update_u21,
 #ifdef USE_CUDA
 	.cuda_func = chol_cublas_codelet_update_u21,
 #endif
@@ -139,8 +139,8 @@ static void create_task_21(unsigned k, unsigned j)
 
 static starpu_codelet cl22 =
 {
-	.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON,
-	.core_func = chol_core_codelet_update_u22,
+	.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON,
+	.cpu_func = chol_cpu_codelet_update_u22,
 #ifdef USE_CUDA
 	.cuda_func = chol_cublas_codelet_update_u22,
 #endif

+ 8 - 8
examples/heat/dw_factolu.c

@@ -32,8 +32,8 @@ static unsigned no_prio = 0;
 
 static starpu_codelet cl11 =
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u11,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u11,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u11,
 #endif
@@ -43,8 +43,8 @@ static starpu_codelet cl11 =
 
 static starpu_codelet cl12 =
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u12,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u12,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u12,
 #endif
@@ -54,8 +54,8 @@ static starpu_codelet cl12 =
 
 static starpu_codelet cl21 =
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u21,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u21,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u21,
 #endif
@@ -65,8 +65,8 @@ static starpu_codelet cl21 =
 
 static starpu_codelet cl22 =
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u22,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u22,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u22,
 #endif

+ 4 - 4
examples/heat/dw_factolu.h

@@ -184,10 +184,10 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 }
 #endif // CHECK_RESULTS
 
-void dw_core_codelet_update_u11(void **, void *);
-void dw_core_codelet_update_u12(void **, void *);
-void dw_core_codelet_update_u21(void **, void *);
-void dw_core_codelet_update_u22(void **, void *);
+void dw_cpu_codelet_update_u11(void **, void *);
+void dw_cpu_codelet_update_u12(void **, void *);
+void dw_cpu_codelet_update_u21(void **, void *);
+void dw_cpu_codelet_update_u22(void **, void *);
 
 #ifdef USE_CUDA
 void dw_cublas_codelet_update_u11(void *descr[], void *_args);

+ 8 - 8
examples/heat/dw_factolu_grain.c

@@ -41,8 +41,8 @@ static struct starpu_task *create_task(starpu_tag_t id)
 }
 
 static starpu_codelet cl11 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u11,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u11,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u11,
 #endif
@@ -74,8 +74,8 @@ static struct starpu_task *create_task_11(starpu_data_handle dataA, unsigned k,
 }
 
 static starpu_codelet cl12 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u12,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u12,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u12,
 #endif
@@ -113,8 +113,8 @@ static void create_task_12(starpu_data_handle dataA, unsigned k, unsigned i, uns
 }
 
 static starpu_codelet cl21 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u21,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u21,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u21,
 #endif
@@ -150,8 +150,8 @@ static void create_task_21(starpu_data_handle dataA, unsigned k, unsigned j, uns
 }
 
 static starpu_codelet cl22 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u22,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u22,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u22,
 #endif

+ 7 - 7
examples/heat/dw_factolu_kernels.c

@@ -102,7 +102,7 @@ void display_stat_heat(void)
  *   U22 
  */
 
-static inline void dw_common_core_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)
+static inline void dw_common_cpu_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)
 {
 	float *left 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
 	float *right 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
@@ -145,9 +145,9 @@ static inline void dw_common_core_codelet_update_u22(void *descr[], int s, __att
 	}
 }
 
-void dw_core_codelet_update_u22(void *descr[], void *_args)
+void dw_cpu_codelet_update_u22(void *descr[], void *_args)
 {
-	dw_common_core_codelet_update_u22(descr, 0, _args);
+	dw_common_cpu_codelet_update_u22(descr, 0, _args);
 
 	int id = starpu_get_worker_id();
 	count_22_per_worker[id]++;
@@ -156,7 +156,7 @@ void dw_core_codelet_update_u22(void *descr[], void *_args)
 #ifdef USE_CUDA
 void dw_cublas_codelet_update_u22(void *descr[], void *_args)
 {
-	dw_common_core_codelet_update_u22(descr, 1, _args);
+	dw_common_cpu_codelet_update_u22(descr, 1, _args);
 
 	int id = starpu_get_worker_id();
 	count_22_per_worker[id]++;
@@ -208,7 +208,7 @@ static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribut
 	}
 }
 
-void dw_core_codelet_update_u12(void *descr[], void *_args)
+void dw_cpu_codelet_update_u12(void *descr[], void *_args)
 {
 	dw_common_codelet_update_u12(descr, 0, _args);
 
@@ -268,7 +268,7 @@ static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribut
 	}
 }
 
-void dw_core_codelet_update_u21(void *descr[], void *_args)
+void dw_cpu_codelet_update_u21(void *descr[], void *_args)
 {
 	dw_common_codelet_update_u21(descr, 0, _args);
 
@@ -363,7 +363,7 @@ static inline void dw_common_codelet_update_u11(void *descr[], int s, __attribut
 }
 
 
-void dw_core_codelet_update_u11(void *descr[], void *_args)
+void dw_cpu_codelet_update_u11(void *descr[], void *_args)
 {
 	dw_common_codelet_update_u11(descr, 0, _args);
 

+ 8 - 8
examples/heat/dw_factolu_tag.c

@@ -43,8 +43,8 @@ static struct starpu_task *create_task(starpu_tag_t id)
 }
 
 static starpu_codelet cl11 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u11,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u11,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u11,
 #endif
@@ -77,8 +77,8 @@ static struct starpu_task *create_task_11(starpu_data_handle dataA, unsigned k)
 }
 
 static starpu_codelet cl12 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u12,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u12,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u12,
 #endif
@@ -116,8 +116,8 @@ static void create_task_12(starpu_data_handle dataA, unsigned k, unsigned i)
 }
 
 static starpu_codelet cl21 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u21,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u21,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u21,
 #endif
@@ -153,8 +153,8 @@ static void create_task_21(starpu_data_handle dataA, unsigned k, unsigned j)
 }
 
 static starpu_codelet cl22 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dw_core_codelet_update_u22,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dw_cpu_codelet_update_u22,
 #ifdef USE_CUDA
 	.cuda_func = dw_cublas_codelet_update_u22,
 #endif

+ 18 - 18
examples/heat/dw_sparse_cg.c

@@ -133,8 +133,8 @@ void init_cg(struct cg_problem *problem)
 
 	/* r = b  - A x */
 	struct starpu_task *task1 = create_task(1UL);
-	task1->cl->where = STARPU_CORE;
-	task1->cl->core_func = core_codelet_func_1;
+	task1->cl->where = STARPU_CPU;
+	task1->cl->cpu_func = cpu_codelet_func_1;
 	task1->cl->nbuffers = 4;
 		task1->buffers[0].handle = problem->ds_matrixA;
 		task1->buffers[0].mode = STARPU_R;
@@ -147,8 +147,8 @@ void init_cg(struct cg_problem *problem)
 
 	/* d = r */
 	struct starpu_task *task2 = create_task(2UL);
-	task2->cl->where = STARPU_CORE;
-	task2->cl->core_func = core_codelet_func_2;
+	task2->cl->where = STARPU_CPU;
+	task2->cl->cpu_func = cpu_codelet_func_2;
 	task2->cl->nbuffers = 2;
 		task2->buffers[0].handle = problem->ds_vecd;
 		task2->buffers[0].mode = STARPU_W;
@@ -159,11 +159,11 @@ void init_cg(struct cg_problem *problem)
 
 	/* delta_new = trans(r) r */
 	struct starpu_task *task3 = create_task(3UL);
-	task3->cl->where = STARPU_CUDA|STARPU_CORE;
+	task3->cl->where = STARPU_CUDA|STARPU_CPU;
 #ifdef USE_CUDA
 	task3->cl->cuda_func = cublas_codelet_func_3;
 #endif
-	task3->cl->core_func = core_codelet_func_3;
+	task3->cl->cpu_func = cpu_codelet_func_3;
 	task3->cl_arg = problem;
 	task3->cl->nbuffers = 1;
 		task3->buffers[0].handle = problem->ds_vecr;
@@ -194,8 +194,8 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
 	/* q = A d */
 	struct starpu_task *task4 = create_task(maskiter | 4UL);
-	task4->cl->where = STARPU_CORE;
-	task4->cl->core_func = core_codelet_func_4;
+	task4->cl->where = STARPU_CPU;
+	task4->cl->cpu_func = cpu_codelet_func_4;
 	task4->cl->nbuffers = 3;
 		task4->buffers[0].handle = problem->ds_matrixA;
 		task4->buffers[0].mode = STARPU_R;
@@ -206,11 +206,11 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
 	/* alpha = delta_new / ( trans(d) q )*/
 	struct starpu_task *task5 = create_task(maskiter | 5UL);
-	task5->cl->where = STARPU_CUDA|STARPU_CORE;
+	task5->cl->where = STARPU_CUDA|STARPU_CPU;
 #ifdef USE_CUDA
 	task5->cl->cuda_func = cublas_codelet_func_5;
 #endif
-	task5->cl->core_func = core_codelet_func_5;
+	task5->cl->cpu_func = cpu_codelet_func_5;
 	task5->cl_arg = problem;
 	task5->cl->nbuffers = 2;
 		task5->buffers[0].handle = problem->ds_vecd;
@@ -222,11 +222,11 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
 	/* x = x + alpha d */
 	struct starpu_task *task6 = create_task(maskiter | 6UL);
-	task6->cl->where = STARPU_CUDA|STARPU_CORE;
+	task6->cl->where = STARPU_CUDA|STARPU_CPU;
 #ifdef USE_CUDA
 	task6->cl->cuda_func = cublas_codelet_func_6;
 #endif
-	task6->cl->core_func = core_codelet_func_6;
+	task6->cl->cpu_func = cpu_codelet_func_6;
 	task6->cl_arg = problem;
 	task6->cl->nbuffers = 2;
 		task6->buffers[0].handle = problem->ds_vecx;
@@ -238,11 +238,11 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
 	/* r = r - alpha q */
 	struct starpu_task *task7 = create_task(maskiter | 7UL);
-	task7->cl->where = STARPU_CUDA|STARPU_CORE;
+	task7->cl->where = STARPU_CUDA|STARPU_CPU;
 #ifdef USE_CUDA
 	task7->cl->cuda_func = cublas_codelet_func_7;
 #endif
-	task7->cl->core_func = core_codelet_func_7;
+	task7->cl->cpu_func = cpu_codelet_func_7;
 	task7->cl_arg = problem;
 	task7->cl->nbuffers = 2;
 		task7->buffers[0].handle = problem->ds_vecr;
@@ -254,11 +254,11 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
 	/* update delta_* and compute beta */
 	struct starpu_task *task8 = create_task(maskiter | 8UL);
-	task8->cl->where = STARPU_CUDA|STARPU_CORE;
+	task8->cl->where = STARPU_CUDA|STARPU_CPU;
 #ifdef USE_CUDA
 	task8->cl->cuda_func = cublas_codelet_func_8;
 #endif
-	task8->cl->core_func = core_codelet_func_8;
+	task8->cl->cpu_func = cpu_codelet_func_8;
 	task8->cl_arg = problem;
 	task8->cl->nbuffers = 1;
 		task8->buffers[0].handle = problem->ds_vecr;
@@ -268,11 +268,11 @@ void launch_new_cg_iteration(struct cg_problem *problem)
 
 	/* d = r + beta d */
 	struct starpu_task *task9 = create_task(maskiter | 9UL);
-	task9->cl->where = STARPU_CUDA|STARPU_CORE;
+	task9->cl->where = STARPU_CUDA|STARPU_CPU;
 #ifdef USE_CUDA
 	task9->cl->cuda_func = cublas_codelet_func_9;
 #endif
-	task9->cl->core_func = core_codelet_func_9;
+	task9->cl->cpu_func = cpu_codelet_func_9;
 	task9->cl_arg = problem;
 	task9->cl->nbuffers = 2;
 		task9->buffers[0].handle = problem->ds_vecd;

+ 9 - 9
examples/heat/dw_sparse_cg.h

@@ -104,29 +104,29 @@ static void __attribute__ ((unused)) print_results(float *result, unsigned size)
 	}
 }
 
-void core_codelet_func_1(void *descr[], void *arg);
+void cpu_codelet_func_1(void *descr[], void *arg);
 
-void core_codelet_func_2(void *descr[], void *arg);
+void cpu_codelet_func_2(void *descr[], void *arg);
 
 void cublas_codelet_func_3(void *descr[], void *arg);
-void core_codelet_func_3(void *descr[], void *arg);
+void cpu_codelet_func_3(void *descr[], void *arg);
 
-void core_codelet_func_4(void *descr[], void *arg);
+void cpu_codelet_func_4(void *descr[], void *arg);
 
-void core_codelet_func_5(void *descr[], void *arg);
+void cpu_codelet_func_5(void *descr[], void *arg);
 void cublas_codelet_func_5(void *descr[], void *arg);
 
 void cublas_codelet_func_6(void *descr[], void *arg);
-void core_codelet_func_6(void *descr[], void *arg);
+void cpu_codelet_func_6(void *descr[], void *arg);
 
 void cublas_codelet_func_7(void *descr[], void *arg);
-void core_codelet_func_7(void *descr[], void *arg);
+void cpu_codelet_func_7(void *descr[], void *arg);
 
 void cublas_codelet_func_8(void *descr[], void *arg);
-void core_codelet_func_8(void *descr[], void *arg);
+void cpu_codelet_func_8(void *descr[], void *arg);
 
 void cublas_codelet_func_9(void *descr[], void *arg);
-void core_codelet_func_9(void *descr[], void *arg);
+void cpu_codelet_func_9(void *descr[], void *arg);
 
 void iteration_cg(void *problem);
 

+ 9 - 9
examples/heat/dw_sparse_cg_kernels.c

@@ -50,7 +50,7 @@
  *		descr[0] = A, descr[1] = x, descr [2] = r, descr[3] = b
  */
 
-void core_codelet_func_1(void *descr[], __attribute__((unused)) void *arg)
+void cpu_codelet_func_1(void *descr[], __attribute__((unused)) void *arg)
 {
 	float *nzval = (float *)STARPU_GET_CSR_NZVAL(descr[0]);
 	uint32_t *colind = STARPU_GET_CSR_COLIND(descr[0]);
@@ -94,7 +94,7 @@ void core_codelet_func_1(void *descr[], __attribute__((unused)) void *arg)
  *	compute d = r
  *		descr[0] = d, descr[1] = r
  */
-void core_codelet_func_2(void *descr[], __attribute__((unused)) void *arg)
+void cpu_codelet_func_2(void *descr[], __attribute__((unused)) void *arg)
 {
 	/* simply copy r into d */
 	uint32_t nx = STARPU_GET_VECTOR_NX(descr[0]);
@@ -116,7 +116,7 @@ void core_codelet_func_2(void *descr[], __attribute__((unused)) void *arg)
  *		args = &delta_new, &delta_0
  */
 
-void core_codelet_func_3(void *descr[], void *arg)
+void cpu_codelet_func_3(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float dot;
@@ -161,7 +161,7 @@ void cublas_codelet_func_3(void *descr[], void *arg)
  *		descr[0] = A, descr[1] = d, descr [2] = q
  */
 
-void core_codelet_func_4(void *descr[], __attribute__((unused)) void *arg)
+void cpu_codelet_func_4(void *descr[], __attribute__((unused)) void *arg)
 {
 	float *nzval = (float *)STARPU_GET_CSR_NZVAL(descr[0]);
 	uint32_t *colind = STARPU_GET_CSR_COLIND(descr[0]);
@@ -207,7 +207,7 @@ void core_codelet_func_4(void *descr[], __attribute__((unused)) void *arg)
  *		args = &alpha, &delta_new
  */
 
-void core_codelet_func_5(void *descr[], void *arg)
+void cpu_codelet_func_5(void *descr[], void *arg)
 {
 	float dot;
 	struct cg_problem *pb = arg;
@@ -256,7 +256,7 @@ void cublas_codelet_func_5(void *descr[], void *arg)
  *		args = &alpha
  */
 
-void core_codelet_func_6(void *descr[], void *arg)
+void cpu_codelet_func_6(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float *vecx, *vecd;
@@ -295,7 +295,7 @@ void cublas_codelet_func_6(void *descr[], void *arg)
  *		args = &alpha
  */
 
-void core_codelet_func_7(void *descr[], void *arg)
+void cpu_codelet_func_7(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float *vecr, *vecq;
@@ -336,7 +336,7 @@ void cublas_codelet_func_7(void *descr[], void *arg)
  *		args = &delta_old, &delta_new, &beta
  */
 
-void core_codelet_func_8(void *descr[], void *arg)
+void cpu_codelet_func_8(void *descr[], void *arg)
 {
 	float dot;
 	struct cg_problem *pb = arg;
@@ -382,7 +382,7 @@ void cublas_codelet_func_8(void *descr[], void *arg)
  *
  */
 
-void core_codelet_func_9(void *descr[], void *arg)
+void cpu_codelet_func_9(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float *vecd, *vecr;

+ 12 - 12
examples/heat/lu_kernels_model.c

@@ -159,7 +159,7 @@ double task_22_cost_cuda(starpu_buffer_descr *descr)
  *
  */
 
-double task_11_cost_core(starpu_buffer_descr *descr)
+double task_11_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
@@ -167,11 +167,11 @@ double task_11_cost_core(starpu_buffer_descr *descr)
 
 	double cost = ((n*n*n)/537.5);
 
-//	printf("CORE task 11 ; predict %e\n", cost);
+//	printf("CPU task 11 ; predict %e\n", cost);
 	return PERTURBATE(cost);
 }
 
-double task_12_cost_core(starpu_buffer_descr *descr)
+double task_12_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
@@ -179,12 +179,12 @@ double task_12_cost_core(starpu_buffer_descr *descr)
 
 	double cost = ((n*n*n)/6668.224);
 
-//	printf("CORE task 12 ; predict %e\n", cost);
+//	printf("CPU task 12 ; predict %e\n", cost);
 	return PERTURBATE(cost);
 }
 
 
-double task_21_cost_core(starpu_buffer_descr *descr)
+double task_21_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
@@ -192,13 +192,13 @@ double task_21_cost_core(starpu_buffer_descr *descr)
 
 	double cost = ((n*n*n)/6793.8423);
 
-//	printf("CORE task 21 ; predict %e\n", cost);
+//	printf("CPU task 21 ; predict %e\n", cost);
 	return PERTURBATE(cost);
 }
 
 
 
-double task_22_cost_core(starpu_buffer_descr *descr)
+double task_22_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t nx, ny, nz;
 
@@ -208,14 +208,14 @@ double task_22_cost_core(starpu_buffer_descr *descr)
 
 	double cost = ((nx*ny*nz)/4203.0175);
 
-//	printf("CORE task 22 ; predict %e\n", cost);
+//	printf("CPU task 22 ; predict %e\n", cost);
 	return PERTURBATE(cost);
 }
 
 struct starpu_perfmodel_t model_11 = {
 	.cost_model = task_11_cost,
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = task_11_cost_core },
+		[STARPU_CPU_DEFAULT] = { .cost_model = task_11_cost_cpu },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = task_11_cost_cuda }
 	},
 	.type = STARPU_HISTORY_BASED,
@@ -231,7 +231,7 @@ struct starpu_perfmodel_t model_11 = {
 struct starpu_perfmodel_t model_12 = {
 	.cost_model = task_12_cost,
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = task_12_cost_core },
+		[STARPU_CPU_DEFAULT] = { .cost_model = task_12_cost_cpu },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = task_12_cost_cuda }
 	},
 	.type = STARPU_HISTORY_BASED,
@@ -247,7 +247,7 @@ struct starpu_perfmodel_t model_12 = {
 struct starpu_perfmodel_t model_21 = {
 	.cost_model = task_21_cost,
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = task_21_cost_core },
+		[STARPU_CPU_DEFAULT] = { .cost_model = task_21_cost_cpu },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = task_21_cost_cuda }
 	},
 	.type = STARPU_HISTORY_BASED,
@@ -263,7 +263,7 @@ struct starpu_perfmodel_t model_21 = {
 struct starpu_perfmodel_t model_22 = {
 	.cost_model = task_22_cost,
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = task_22_cost_core },
+		[STARPU_CPU_DEFAULT] = { .cost_model = task_22_cost_cpu },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = task_22_cost_cuda }
 	},
 	.type = STARPU_HISTORY_BASED,

+ 3 - 3
examples/incrementer/incrementer.c

@@ -25,7 +25,7 @@ extern void cuda_codelet(void *descr[], __attribute__ ((unused)) void *_args);
 
 extern void cuda_codelet_host(float *tab);
 
-void core_codelet(void *descr[], __attribute__ ((unused)) void *_args)
+void cpu_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 {
 	float *val = (float *)STARPU_GET_VECTOR_PTR(descr[0]);
 
@@ -48,8 +48,8 @@ int main(int argc, char **argv)
 	starpu_codelet cl =
 	{
 		/* CUBLAS stands for CUDA kernels controlled from the host */
-		.where = STARPU_CORE|STARPU_CUDA,
-		.core_func = core_codelet,
+		.where = STARPU_CPU|STARPU_CUDA,
+		.cpu_func = cpu_codelet,
 #ifdef USE_CUDA
 		.cuda_func = cuda_codelet,
 #endif

+ 8 - 8
examples/lu/xlu.c

@@ -58,8 +58,8 @@ static struct starpu_perfmodel_t STARPU_LU(model_11) = {
 };
 
 static starpu_codelet cl11 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = STARPU_LU(cpu_u11),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_LU(cpu_u11),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_LU(cublas_u11),
 #endif
@@ -103,8 +103,8 @@ static struct starpu_perfmodel_t STARPU_LU(model_12) = {
 };
 
 static starpu_codelet cl12 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = STARPU_LU(cpu_u12),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_LU(cpu_u12),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_LU(cublas_u12),
 #endif
@@ -153,8 +153,8 @@ static struct starpu_perfmodel_t STARPU_LU(model_21) = {
 };
 
 static starpu_codelet cl21 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = STARPU_LU(cpu_u21),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_LU(cpu_u21),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_LU(cublas_u21),
 #endif
@@ -201,8 +201,8 @@ static struct starpu_perfmodel_t STARPU_LU(model_22) = {
 };
 
 static starpu_codelet cl22 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = STARPU_LU(cpu_u22),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_LU(cpu_u22),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_LU(cublas_u22),
 #endif

+ 4 - 4
examples/lu/xlu.h

@@ -74,10 +74,10 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 }
 #endif // CHECK_RESULTS
 
-void dw_core_codelet_update_u11(void **, void *);
-void dw_core_codelet_update_u12(void **, void *);
-void dw_core_codelet_update_u21(void **, void *);
-void dw_core_codelet_update_u22(void **, void *);
+void dw_cpu_codelet_update_u11(void **, void *);
+void dw_cpu_codelet_update_u12(void **, void *);
+void dw_cpu_codelet_update_u21(void **, void *);
+void dw_cpu_codelet_update_u22(void **, void *);
 
 #ifdef USE_CUDA
 void dw_cublas_codelet_update_u11(void *descr[], void *_args);

+ 10 - 10
examples/lu/xlu_pivot.c

@@ -61,8 +61,8 @@ static struct starpu_perfmodel_t STARPU_LU(model_pivot) = {
 };
 
 static starpu_codelet cl_pivot = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = STARPU_LU(cpu_pivot),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_LU(cpu_pivot),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_LU(cublas_pivot),
 #endif
@@ -128,8 +128,8 @@ static struct starpu_perfmodel_t STARPU_LU(model_11_pivot) = {
 };
 
 static starpu_codelet cl11_pivot = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = STARPU_LU(cpu_u11_pivot),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_LU(cpu_u11_pivot),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_LU(cublas_u11_pivot),
 #endif
@@ -175,8 +175,8 @@ static struct starpu_perfmodel_t STARPU_LU(model_12) = {
 };
 
 static starpu_codelet cl12 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = STARPU_LU(cpu_u12),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_LU(cpu_u12),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_LU(cublas_u12),
 #endif
@@ -231,8 +231,8 @@ static struct starpu_perfmodel_t STARPU_LU(model_21) = {
 };
 
 static starpu_codelet cl21 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = STARPU_LU(cpu_u21),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_LU(cpu_u21),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_LU(cublas_u21),
 #endif
@@ -285,8 +285,8 @@ static struct starpu_perfmodel_t STARPU_LU(model_22) = {
 };
 
 static starpu_codelet cl22 = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = STARPU_LU(cpu_u22),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_LU(cpu_u22),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_LU(cublas_u22),
 #endif

+ 2 - 2
examples/mult/dw_mult.c

@@ -181,8 +181,8 @@ static void partition_mult_data(void)
 }
 
 static starpu_codelet cl = {
-	.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON,
-	.core_func = core_mult,
+	.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON,
+	.cpu_func = cpu_mult,
 #ifdef USE_CUDA
 	.cuda_func = cublas_mult,
 #endif

+ 1 - 1
examples/mult/dw_mult.h

@@ -197,6 +197,6 @@ static void display_memory_consumption(void)
 void cublas_mult(void *descr[], __attribute__((unused)) void *arg);
 #endif
 
-void core_mult(void *descr[], __attribute__((unused))  void *arg);
+void cpu_mult(void *descr[], __attribute__((unused))  void *arg);
 
 #endif // __MULT_H__

+ 2 - 2
examples/mult/dw_mult_no_stride.c

@@ -227,7 +227,7 @@ struct cb2_s {
 };
 
 static starpu_codelet cl = {
-	.core_func = core_mult,
+	.cpu_func = cpu_mult,
 #ifdef USE_CUDA
 	.cuda_func = cublas_mult,
 #endif
@@ -236,7 +236,7 @@ static starpu_codelet cl = {
 #endif
 
 	.model = &sgemm_model,
-	.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON,
+	.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON,
 	.nbuffers = 3
 };
 

+ 2 - 2
examples/mult/dw_mult_no_stride_no_tag.c

@@ -280,8 +280,8 @@ struct cb2_s {
 
 
 static starpu_codelet cl = {
-	.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON,
-	.core_func = core_mult,
+	.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON,
+	.cpu_func = cpu_mult,
 #ifdef USE_CUDA
 	.cuda_func = cublas_mult,
 #endif

+ 1 - 1
examples/mult/sgemm_kernels.c

@@ -59,7 +59,7 @@ void cublas_mult(void *descr[], __attribute__((unused)) void *arg)
 }
 #endif
 
-void core_mult(void *descr[], __attribute__((unused))  void *arg)
+void cpu_mult(void *descr[], __attribute__((unused))  void *arg)
 {
 	COMMON_CODE
 

+ 2 - 2
examples/mult/xgemm.c

@@ -183,8 +183,8 @@ static struct starpu_perfmodel_t gemm_model = {
 };
 
 static starpu_codelet cl = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = STARPU_GEMM(core_mult),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_GEMM(cpu_mult),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_GEMM(cublas_mult),
 #endif

+ 1 - 1
examples/mult/xgemm_kernels.c

@@ -59,7 +59,7 @@ void STARPU_GEMM(cublas_mult)(void *descr[], __attribute__((unused)) void *arg)
 }
 #endif
 
-void STARPU_GEMM(core_mult)(void *descr[], __attribute__((unused))  void *arg)
+void STARPU_GEMM(cpu_mult)(void *descr[], __attribute__((unused))  void *arg)
 {
 	COMMON_CODE
 

+ 20 - 20
examples/pastix-wrappers/starpu-blas-wrapper.c

@@ -42,9 +42,9 @@
 
 extern struct starpu_data_interface_ops_t interface_blas_ops;
 
-static int core_sgemm = 0;
+static int cpu_sgemm = 0;
 static int cublas_sgemm = 0;
-static int core_strsm = 0;
+static int cpu_strsm = 0;
 static int cublas_strsm = 0;
 
 static int inited = 0;
@@ -61,8 +61,8 @@ void STARPU_TERMINATE(void)
 {
 	starpu_shutdown();
 
-	fprintf(stderr, "sgemm : core %d cublas %d\n", core_sgemm, cublas_sgemm);
-	fprintf(stderr, "strsm : core %d cublas %d\n", core_strsm, cublas_strsm);
+	fprintf(stderr, "sgemm : cpu %d cublas %d\n", cpu_sgemm, cublas_sgemm);
+	fprintf(stderr, "strsm : cpu %d cublas %d\n", cpu_strsm, cublas_strsm);
 }
 
 /*
@@ -230,7 +230,7 @@ void STARPU_DECLARE_WORK_BLOCKS(float *maxbloktab1, float *maxbloktab2, unsigned
 	sem_t sem;
 
 	/* initialize codelet */
-	cl.where = CUDA;
+	cl.where = STARPU_CUDA;
 	cl.cuda_func = allocate_maxbloktab_on_cublas;
 	
 	j = _starpu_job_create();
@@ -253,7 +253,7 @@ void STARPU_DECLARE_WORK_BLOCKS(float *maxbloktab1, float *maxbloktab2, unsigned
 
 }
 
-void _core_cblk_strsm(void *descr[], void *arg __attribute__((unused)))
+void _cpu_cblk_strsm(void *descr[], void *arg __attribute__((unused)))
 {
 	uint32_t nx, ny, ld;
 	nx = GET_BLAS_NX(descr[0]);
@@ -268,7 +268,7 @@ void _core_cblk_strsm(void *descr[], void *arg __attribute__((unused)))
 	unsigned n = ny;
 
 //	SOPALIN_TRSM("R","L","T","U",dimb,dima,fun,ga,stride,gb,stride);
-	core_strsm++;
+	cpu_strsm++;
 
 	cblas_strsm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasUnit, m, n, 1.0f, 
 			diag_cblkdata, ld, extra_cblkdata, ld);
@@ -301,7 +301,7 @@ void _cublas_cblk_strsm(void *descr[], void *arg __attribute__((unused)))
 
 static struct starpu_perfmodel_t starpu_cblk_strsm = {
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = starpu_cblk_strsm_core_cost },
+		[STARPU_CPU_DEFAULT] = { .cost_model = starpu_cblk_strsm_cpu_cost },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = starpu_cblk_strsm_cuda_cost }
 	},
 //	.type = REGRESSION_BASED,
@@ -318,12 +318,12 @@ void STARPU_CBLK_STRSM(unsigned col)
 	sem_t sem;
 
 	/* initialize codelet */
-	cl.where = CORE|CUDA;
-	cl.core_func = _core_cblk_strsm;
+	cl.where = STARPU_CPU|STARPU_CUDA;
+	cl.cpu_func = _cpu_cblk_strsm;
 	cl.cuda_func = _cublas_cblk_strsm;
 	
 	j = _starpu_job_create();
-//	j->where = (starpu_get_blas_nx(&cblktab[col]) > BLOCK && starpu_get_blas_ny(&cblktab[col]) > BLOCK)? CUBLAS:CORE;
+//	j->where = (starpu_get_blas_nx(&cblktab[col]) > BLOCK && starpu_get_blas_ny(&cblktab[col]) > BLOCK)? CUBLAS:CPU;
 	j->cb = _cublas_cblk_strsm_callback;
 	j->argcb = &sem;
 	j->cl = &cl;
@@ -354,7 +354,7 @@ struct starpu_compute_contrib_compact_args {
 };
 
 
-void _core_compute_contrib_compact(void *descr[], void *arg)
+void _cpu_compute_contrib_compact(void *descr[], void *arg)
 {
 	struct starpu_compute_contrib_compact_args *args = arg;
 
@@ -364,7 +364,7 @@ void _core_compute_contrib_compact(void *descr[], void *arg)
 	float *gc = (float *)GET_BLAS_PTR(descr[2]);
 	unsigned stridec = (unsigned)GET_BLAS_LD(descr[2]);
 
-	core_sgemm++;
+	cpu_sgemm++;
 
 	cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, 
 			args->dimi, args->dimj, args->dima,
@@ -400,7 +400,7 @@ void _cublas_compute_contrib_compact(void *descr[], void *arg)
 
 static struct starpu_perfmodel_t starpu_compute_contrib_compact = {
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = starpu_compute_contrib_compact_core_cost },
+		[STARPU_CPU_DEFAULT] = { .cost_model = starpu_compute_contrib_compact_cpu_cost },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = starpu_compute_contrib_compact_cuda_cost }
 	},
 //	.type = REGRESSION_BASED,
@@ -458,8 +458,8 @@ void STARPU_COMPUTE_CONTRIB_COMPACT(unsigned col, int dimi, int dimj, int dima,
 	sem_t sem;
 
 	/* initialize codelet */
-	cl.where = CUDA|CORE;
-	cl.core_func = _core_compute_contrib_compact;
+	cl.where = STARPU_CUDA|STARPU_CPU;
+	cl.cpu_func = _cpu_compute_contrib_compact;
 	cl.cuda_func = _cublas_compute_contrib_compact;
 	
 	j = _starpu_job_create();
@@ -600,8 +600,8 @@ void STARPU_SGEMM (const char *transa, const char *transb, const int m,
 	starpu_register_blas_data(&C_state, 0, (uintptr_t)C, ldc, m, n, sizeof(float));
 
 	/* initialize codelet */
-	cl.where = CUDA;
-	//cl.core_func = _core_strsm;
+	cl.where = STARPU_CUDA;
+	//cl.cpu_func = _cpu_strsm;
 	cl.cuda_func = _cublas_sgemm;
 	
 	j = _starpu_job_create();
@@ -656,7 +656,7 @@ struct strsm_args {
 	int m,n;
 };
 //
-//void _core_strsm(void *descr[], void *arg)
+//void _cpu_strsm(void *descr[], void *arg)
 //{
 //	float *A, *B;
 //	uint32_t nxA, nyA, ldA;
@@ -674,7 +674,7 @@ struct strsm_args {
 //
 //	struct strsm_args *args = arg;
 //
-//	fprintf(stderr, "CORE STRSM nxA %d nyA %d nxB %d nyB %d lda %d ldb %d\n", nxA, nyA, nxB, nyB, ldA, ldB);
+//	fprintf(stderr, "CPU STRSM nxA %d nyA %d nxB %d nyB %d lda %d ldb %d\n", nxA, nyA, nxB, nyB, ldA, ldB);
 //
 //	SOPALIN_TRSM("R","L","T","U",dimb,dima,fun,ga,stride,gb,stride);
 //	

+ 2 - 2
examples/pastix-wrappers/starpu-blas-wrapper.h

@@ -41,7 +41,7 @@ static double transfer_time_htod(unsigned size)
 
 #define PERF_GEMM_CPU(i,j,k) (GEMM_CPU_A*(double)(i)*(double)(j)*(double)(k)+GEMM_CPU_B*(double)(i)*(double)(j)+GEMM_CPU_C*(double)(j)*(double)(k)+GEMM_CPU_D*(double)(i)+GEMM_CPU_E*(double)(j)+GEMM_CPU_F)
 
-static double starpu_compute_contrib_compact_core_cost(starpu_buffer_descr *descr)
+static double starpu_compute_contrib_compact_cpu_cost(starpu_buffer_descr *descr)
 {
 	unsigned nx0, ny0, ny2;
 	nx0 = descr[0].handle->interface->blas.nx;
@@ -85,7 +85,7 @@ static double starpu_cblk_strsm_cuda_cost(starpu_buffer_descr *descr)
 
 #define PERF_TRSM_CPU(i,j)   (TRSM_CPU_A*(double)(i)*(double)(i)*(double)(j)+TRSM_CPU_B*(double)(i)+TRSM_CPU_C)
 
-static double starpu_cblk_strsm_core_cost(starpu_buffer_descr *descr)
+static double starpu_cblk_strsm_cpu_cost(starpu_buffer_descr *descr)
 {
 	unsigned nx, ny;
 	nx = descr[0].handle->interface->blas.nx;

+ 2 - 2
examples/ppm-downscaler/yuv-downscaler.c

@@ -98,8 +98,8 @@ static void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
 }
 
 static struct starpu_codelet_t ds_codelet = {
-	.where = STARPU_CORE,
-	.core_func = ds_kernel_cpu,
+	.where = STARPU_CPU,
+	.cpu_func = ds_kernel_cpu,
 	.nbuffers = 2, /* input -> output */
 	.model = NULL
 };

+ 2 - 2
examples/spmv/dw_block_spmv.c

@@ -115,8 +115,8 @@ void call_filters(void)
 unsigned totaltasks;
 
 starpu_codelet cl = {
-	.where = CORE|CUDA,
-	.core_func =  core_block_spmv,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func =  cpu_block_spmv,
 #ifdef USE_CUDA
 	.cuda_func = cublas_block_spmv,
 #endif

+ 1 - 1
examples/spmv/dw_block_spmv.h

@@ -32,7 +32,7 @@
 #include <cublas.h>
 #endif
 
-void core_block_spmv(void *descr[], void *_args);
+void cpu_block_spmv(void *descr[], void *_args);
 
 #ifdef USE_CUDA
 void cublas_block_spmv(void *descr[], void *_args);

+ 2 - 2
examples/spmv/dw_block_spmv_kernels.c

@@ -47,9 +47,9 @@ static inline void common_block_spmv(void *descr[], int s, __attribute__((unused
 	}
 }
 
-void core_block_spmv(void *descr[], void *_args)
+void cpu_block_spmv(void *descr[], void *_args)
 {
-//	printf("CORE CODELET \n");
+//	printf("CPU CODELET \n");
 
 	common_block_spmv(descr, 0, _args);
 }

+ 3 - 3
examples/spmv/dw_spmv.c

@@ -91,7 +91,7 @@ void parse_args(int argc, char **argv)
 	}
 }
 
-void core_spmv(void *descr[], __attribute__((unused))  void *arg)
+void cpu_spmv(void *descr[], __attribute__((unused))  void *arg)
 {
 	float *nzval = (float *)STARPU_GET_CSR_NZVAL(descr[0]);
 	uint32_t *colind = STARPU_GET_CSR_COLIND(descr[0]);
@@ -249,8 +249,8 @@ void call_spmv_codelet_filters(void)
 	starpu_partition_data(sparse_matrix, &csr_f);
 	starpu_partition_data(vector_out, &vector_f);
 
-	cl->where = CORE|CUDA;
-	cl->core_func =  core_spmv;
+	cl->where = STARPU_CPU|STARPU_CUDA;
+	cl->cpu_func =  cpu_spmv;
 #ifdef USE_CUDA
 	cl->cuda_func = spmv_kernel_cuda;
 #endif

+ 1 - 1
examples/starpufft/starpufftx.c

@@ -234,7 +234,7 @@ STARPUFFT(destroy_plan)(STARPUFFT(plan) plan)
 
 	for (workerid = 0; workerid < starpu_get_worker_count(); workerid++) {
 		switch (starpu_get_worker_type(workerid)) {
-		case STARPU_CORE_WORKER:
+		case STARPU_CPU_WORKER:
 #ifdef HAVE_FFTW
 			_FFTW(free)(plan->plans[workerid].in1);
 			_FFTW(free)(plan->plans[workerid].out1);

+ 11 - 11
examples/starpufft/starpufftx1d.c

@@ -261,11 +261,11 @@ static starpu_codelet STARPUFFT(twist1_1d_codelet) = {
 #ifdef USE_CUDA
 		STARPU_CUDA|
 #endif
-		STARPU_CORE,
+		STARPU_CPU,
 #ifdef USE_CUDA
 	.cuda_func = STARPUFFT(twist1_1d_kernel_gpu),
 #endif
-	.core_func = STARPUFFT(twist1_1d_kernel_cpu),
+	.cpu_func = STARPUFFT(twist1_1d_kernel_cpu),
 	.model = &STARPUFFT(twist1_1d_model),
 	.nbuffers = 2
 };
@@ -276,22 +276,22 @@ static starpu_codelet STARPUFFT(fft1_1d_codelet) = {
 		STARPU_CUDA|
 #endif
 #ifdef HAVE_FFTW
-		STARPU_CORE|
+		STARPU_CPU|
 #endif
 		0,
 #ifdef USE_CUDA
 	.cuda_func = STARPUFFT(fft1_1d_kernel_gpu),
 #endif
 #ifdef HAVE_FFTW
-	.core_func = STARPUFFT(fft1_1d_kernel_cpu),
+	.cpu_func = STARPUFFT(fft1_1d_kernel_cpu),
 #endif
 	.model = &STARPUFFT(fft1_1d_model),
 	.nbuffers = 3
 };
 
 static starpu_codelet STARPUFFT(twist2_1d_codelet) = {
-	.where = STARPU_CORE,
-	.core_func = STARPUFFT(twist2_1d_kernel_cpu),
+	.where = STARPU_CPU,
+	.cpu_func = STARPUFFT(twist2_1d_kernel_cpu),
 	.model = &STARPUFFT(twist2_1d_model),
 	.nbuffers = 1
 };
@@ -302,22 +302,22 @@ static starpu_codelet STARPUFFT(fft2_1d_codelet) = {
 		STARPU_CUDA|
 #endif
 #ifdef HAVE_FFTW
-		STARPU_CORE|
+		STARPU_CPU|
 #endif
 		0,
 #ifdef USE_CUDA
 	.cuda_func = STARPUFFT(fft2_1d_kernel_gpu),
 #endif
 #ifdef HAVE_FFTW
-	.core_func = STARPUFFT(fft2_1d_kernel_cpu),
+	.cpu_func = STARPUFFT(fft2_1d_kernel_cpu),
 #endif
 	.model = &STARPUFFT(fft2_1d_model),
 	.nbuffers = 2
 };
 
 static starpu_codelet STARPUFFT(twist3_1d_codelet) = {
-	.where = STARPU_CORE,
-	.core_func = STARPUFFT(twist3_1d_kernel_cpu),
+	.where = STARPU_CPU,
+	.cpu_func = STARPUFFT(twist3_1d_kernel_cpu),
 	.model = &STARPUFFT(twist3_1d_model),
 	.nbuffers = 1
 };
@@ -391,7 +391,7 @@ STARPUFFT(plan_dft_1d)(int n, int sign, unsigned flags)
 	/* Initialize per-worker working set */
 	for (workerid = 0; workerid < starpu_get_worker_count(); workerid++) {
 		switch (starpu_get_worker_type(workerid)) {
-		case STARPU_CORE_WORKER:
+		case STARPU_CPU_WORKER:
 #ifdef HAVE_FFTW
 			/* first fft plan: one n2 fft */
 			plan->plans[workerid].in1 = _FFTW(malloc)(plan->totsize2 * sizeof(_fftw_complex));

+ 11 - 11
examples/starpufft/starpufftx2d.c

@@ -301,11 +301,11 @@ static starpu_codelet STARPUFFT(twist1_2d_codelet) = {
 #ifdef USE_CUDA
 		STARPU_CUDA|
 #endif
-		STARPU_CORE,
+		STARPU_CPU,
 #ifdef USE_CUDA
 	.cuda_func = STARPUFFT(twist1_2d_kernel_gpu),
 #endif
-	.core_func = STARPUFFT(twist1_2d_kernel_cpu),
+	.cpu_func = STARPUFFT(twist1_2d_kernel_cpu),
 	.model = &STARPUFFT(twist1_2d_model),
 	.nbuffers = 2
 };
@@ -316,22 +316,22 @@ static starpu_codelet STARPUFFT(fft1_2d_codelet) = {
 		STARPU_CUDA|
 #endif
 #ifdef HAVE_FFTW
-		STARPU_CORE|
+		STARPU_CPU|
 #endif
 		0,
 #ifdef USE_CUDA
 	.cuda_func = STARPUFFT(fft1_2d_kernel_gpu),
 #endif
 #ifdef HAVE_FFTW
-	.core_func = STARPUFFT(fft1_2d_kernel_cpu),
+	.cpu_func = STARPUFFT(fft1_2d_kernel_cpu),
 #endif
 	.model = &STARPUFFT(fft1_2d_model),
 	.nbuffers = 4
 };
 
 static starpu_codelet STARPUFFT(twist2_2d_codelet) = {
-	.where = STARPU_CORE,
-	.core_func = STARPUFFT(twist2_2d_kernel_cpu),
+	.where = STARPU_CPU,
+	.cpu_func = STARPUFFT(twist2_2d_kernel_cpu),
 	.model = &STARPUFFT(twist2_2d_model),
 	.nbuffers = 1
 };
@@ -342,22 +342,22 @@ static starpu_codelet STARPUFFT(fft2_2d_codelet) = {
 		STARPU_CUDA|
 #endif
 #ifdef HAVE_FFTW
-		STARPU_CORE|
+		STARPU_CPU|
 #endif
 		0,
 #ifdef USE_CUDA
 	.cuda_func = STARPUFFT(fft2_2d_kernel_gpu),
 #endif
 #ifdef HAVE_FFTW
-	.core_func = STARPUFFT(fft2_2d_kernel_cpu),
+	.cpu_func = STARPUFFT(fft2_2d_kernel_cpu),
 #endif
 	.model = &STARPUFFT(fft2_2d_model),
 	.nbuffers = 2
 };
 
 static starpu_codelet STARPUFFT(twist3_2d_codelet) = {
-	.where = STARPU_CORE,
-	.core_func = STARPUFFT(twist3_2d_kernel_cpu),
+	.where = STARPU_CPU,
+	.cpu_func = STARPUFFT(twist3_2d_kernel_cpu),
 	.model = &STARPUFFT(twist3_2d_model),
 	.nbuffers = 1
 };
@@ -450,7 +450,7 @@ STARPUFFT(plan_dft_2d)(int n, int m, int sign, unsigned flags)
 	/* Initialize per-worker working set */
 	for (workerid = 0; workerid < starpu_get_worker_count(); workerid++) {
 		switch (starpu_get_worker_type(workerid)) {
-		case STARPU_CORE_WORKER:
+		case STARPU_CPU_WORKER:
 #ifdef HAVE_FFTW
 			/* first fft plan: one n2*m2 fft */
 			plan->plans[workerid].in1 = _FFTW(malloc)(plan->totsize2 * sizeof(_fftw_complex));

+ 10 - 10
examples/strassen/strassen.c

@@ -89,9 +89,9 @@ static void unpartition_matrices(strassen_iter_state_t *iter)
 }
 
 static starpu_codelet cl_add = {
-	.where = CORE|CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = &strassen_model_add_sub,
-	.core_func = add_core_codelet,
+	.cpu_func = add_cpu_codelet,
 #ifdef USE_CUDA
 	.cuda_func = add_cublas_codelet,
 #endif
@@ -99,9 +99,9 @@ static starpu_codelet cl_add = {
 };
 
 static starpu_codelet cl_sub = {
-	.where = CORE|CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = &strassen_model_add_sub,
-	.core_func = sub_core_codelet,
+	.cpu_func = sub_cpu_codelet,
 #ifdef USE_CUDA
 	.cuda_func = sub_cublas_codelet,
 #endif
@@ -109,9 +109,9 @@ static starpu_codelet cl_sub = {
 };
 
 static starpu_codelet cl_mult = {
-	.where = CORE|CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = &strassen_model_mult,
-	.core_func = mult_core_codelet,
+	.cpu_func = mult_cpu_codelet,
 #ifdef USE_CUDA
 	.cuda_func = mult_cublas_codelet,
 #endif
@@ -119,9 +119,9 @@ static starpu_codelet cl_mult = {
 };
 
 static starpu_codelet cl_self_add = {
-	.where = CORE|CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = &strassen_model_self_add_sub,
-	.core_func = self_add_core_codelet,
+	.cpu_func = self_add_cpu_codelet,
 #ifdef USE_CUDA
 	.cuda_func = self_add_cublas_codelet,
 #endif
@@ -129,9 +129,9 @@ static starpu_codelet cl_self_add = {
 };
 
 static starpu_codelet cl_self_sub = {
-	.where = CORE|CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = &strassen_model_self_add_sub,
-	.core_func = self_sub_core_codelet,
+	.cpu_func = self_sub_cpu_codelet,
 #ifdef USE_CUDA
 	.cuda_func = self_sub_cublas_codelet,
 #endif

+ 5 - 5
examples/strassen/strassen.h

@@ -91,11 +91,11 @@ typedef struct {
 	unsigned i;
 } phase3_t;
 
-void mult_core_codelet(void *descr[], __attribute__((unused))  void *arg);
-void sub_core_codelet(void *descr[], __attribute__((unused))  void *arg);
-void add_core_codelet(void *descr[], __attribute__((unused))  void *arg);
-void self_add_core_codelet(void *descr[], __attribute__((unused))  void *arg);
-void self_sub_core_codelet(void *descr[], __attribute__((unused))  void *arg);
+void mult_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);
+void sub_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);
+void add_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);
+void self_add_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);
+void self_sub_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);
 
 #ifdef USE_CUDA
 void mult_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);

+ 28 - 28
examples/strassen/strassen_kernels.c

@@ -19,17 +19,17 @@
 
 static void mult_common_codelet(void *descr[], int s, __attribute__((unused))  void *arg)
 {
-	float *center 	= (float *)GET_BLAS_PTR(descr[0]);
-	float *left 	= (float *)GET_BLAS_PTR(descr[1]);
-	float *right 	= (float *)GET_BLAS_PTR(descr[2]);
+	float *center 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
+	float *left 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	float *right 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
 
-	unsigned dx = GET_BLAS_NX(descr[0]);
-	unsigned dy = GET_BLAS_NY(descr[0]);
-	unsigned dz = GET_BLAS_NX(descr[1]);
+	unsigned dx = STARPU_GET_BLAS_NX(descr[0]);
+	unsigned dy = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dz = STARPU_GET_BLAS_NX(descr[1]);
 
-	unsigned ld21 = GET_BLAS_LD(descr[1]);
-	unsigned ld12 = GET_BLAS_LD(descr[2]);
-	unsigned ld22 = GET_BLAS_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld12 = STARPU_GET_BLAS_LD(descr[2]);
+	unsigned ld22 = STARPU_GET_BLAS_LD(descr[0]);
 
 	switch (s) {
 		case 0:
@@ -51,7 +51,7 @@ static void mult_common_codelet(void *descr[], int s, __attribute__((unused))  v
 	}
 }
 
-void mult_core_codelet(void *descr[], void *_args)
+void mult_cpu_codelet(void *descr[], void *_args)
 {
 	mult_common_codelet(descr, 0, _args);
 }
@@ -67,16 +67,16 @@ static void add_sub_common_codelet(void *descr[], int s, __attribute__((unused))
 {
 	/* C = A op B */
 
-	float *C 	= (float *)GET_BLAS_PTR(descr[0]);
-	float *A 	= (float *)GET_BLAS_PTR(descr[1]);
-	float *B 	= (float *)GET_BLAS_PTR(descr[2]);
+	float *C 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
+	float *A 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	float *B 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
 
-	unsigned dx = GET_BLAS_NX(descr[0]);
-	unsigned dy = GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_BLAS_NX(descr[0]);
+	unsigned dy = STARPU_GET_BLAS_NY(descr[0]);
 
-	unsigned ldA = GET_BLAS_LD(descr[1]);
-	unsigned ldB = GET_BLAS_LD(descr[2]);
-	unsigned ldC = GET_BLAS_LD(descr[0]);
+	unsigned ldA = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ldB = STARPU_GET_BLAS_LD(descr[2]);
+	unsigned ldC = STARPU_GET_BLAS_LD(descr[0]);
 
 	// TODO check dim ...
 
@@ -112,12 +112,12 @@ static void add_sub_common_codelet(void *descr[], int s, __attribute__((unused))
 	}
 }
 
-void sub_core_codelet(void *descr[], __attribute__((unused))  void *arg)
+void sub_cpu_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 0, arg, -1.0f);
 }
 
-void add_core_codelet(void *descr[], __attribute__((unused))  void *arg)
+void add_cpu_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 0, arg, 1.0f);
 }
@@ -139,14 +139,14 @@ static void self_add_sub_common_codelet(void *descr[], int s, __attribute__((unu
 {
 	/* C +=/-= A */
 
-	float *C 	= (float *)GET_BLAS_PTR(descr[0]);
-	float *A 	= (float *)GET_BLAS_PTR(descr[1]);
+	float *C 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
+	float *A 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
 
-	unsigned dx = GET_BLAS_NX(descr[0]);
-	unsigned dy = GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_BLAS_NX(descr[0]);
+	unsigned dy = STARPU_GET_BLAS_NY(descr[0]);
 
-	unsigned ldA = GET_BLAS_LD(descr[1]);
-	unsigned ldC = GET_BLAS_LD(descr[0]);
+	unsigned ldA = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ldC = STARPU_GET_BLAS_LD(descr[0]);
 
 	// TODO check dim ...
 	
@@ -181,12 +181,12 @@ static void self_add_sub_common_codelet(void *descr[], int s, __attribute__((unu
 
 
 
-void self_add_core_codelet(void *descr[], __attribute__((unused))  void *arg)
+void self_add_cpu_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 0, arg, 1.0f);
 }
 
-void self_sub_core_codelet(void *descr[], __attribute__((unused))  void *arg)
+void self_sub_cpu_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 0, arg, -1.0f);
 }

+ 6 - 6
examples/strassen/strassen_models.c

@@ -130,27 +130,27 @@ static double cuda_mult_cost(starpu_buffer_descr *descr)
 
 struct starpu_perfmodel_t strassen_model_mult = {
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = mult_cost },
+		[STARPU_CPU_DEFAULT] = { .cost_model = mult_cost },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = cuda_mult_cost }
 	},
-	.type = HISTORY_BASED,
+	.type = STARPU_HISTORY_BASED,
 	.symbol = "strassen_model_mult"
 };
 
 struct starpu_perfmodel_t strassen_model_add_sub = {
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = add_sub_cost },
+		[STARPU_CPU_DEFAULT] = { .cost_model = add_sub_cost },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = cuda_add_sub_cost }
 	},
-	.type = HISTORY_BASED,
+	.type = STARPU_HISTORY_BASED,
 	.symbol = "strassen_model_add_sub"
 };
 
 struct starpu_perfmodel_t strassen_model_self_add_sub = {
 	.per_arch = { 
-		[STARPU_CORE_DEFAULT] = { .cost_model = self_add_sub_cost },
+		[STARPU_CPU_DEFAULT] = { .cost_model = self_add_sub_cost },
 		[STARPU_CUDA_DEFAULT] = { .cost_model = cuda_self_add_sub_cost }
 	},
-	.type = HISTORY_BASED,
+	.type = STARPU_HISTORY_BASED,
 	.symbol = "strassen_model_self_add_sub"
 };

+ 19 - 19
examples/strassen2/strassen2.c

@@ -77,11 +77,11 @@ static unsigned reclevel = 3;
 static unsigned norandom = 0;
 static unsigned pin = 0;
 
-extern void mult_core_codelet(void *descr[], __attribute__((unused))  void *arg);
-extern void sub_core_codelet(void *descr[], __attribute__((unused))  void *arg);
-extern void add_core_codelet(void *descr[], __attribute__((unused))  void *arg);
-extern void self_add_core_codelet(void *descr[], __attribute__((unused))  void *arg);
-extern void self_sub_core_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void mult_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void sub_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void add_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void self_add_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void self_sub_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);
 
 #ifdef USE_CUDA
 extern void mult_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);
@@ -207,9 +207,9 @@ enum operation {
 };
 
 static starpu_codelet cl_add = {
-	.where = STARPU_CORE|STARPU_CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = &strassen_model_add,
-	.core_func = add_core_codelet,
+	.cpu_func = add_cpu_codelet,
 #ifdef USE_CUDA
 	.cuda_func = add_cublas_codelet,
 #endif
@@ -217,9 +217,9 @@ static starpu_codelet cl_add = {
 };
 
 static starpu_codelet cl_sub = {
-	.where = STARPU_CORE|STARPU_CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = &strassen_model_sub,
-	.core_func = sub_core_codelet,
+	.cpu_func = sub_cpu_codelet,
 #ifdef USE_CUDA
 	.cuda_func = sub_cublas_codelet,
 #endif
@@ -227,9 +227,9 @@ static starpu_codelet cl_sub = {
 };
 
 static starpu_codelet cl_mult = {
-	.where = STARPU_CORE|STARPU_CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = &strassen_model_mult,
-	.core_func = mult_core_codelet,
+	.cpu_func = mult_cpu_codelet,
 #ifdef USE_CUDA
 	.cuda_func = mult_cublas_codelet,
 #endif
@@ -273,9 +273,9 @@ struct starpu_task *compute_add_sub_op(starpu_data_handle C, enum operation op,
 }
 
 static starpu_codelet cl_self_add = {
-	.where = STARPU_CORE|STARPU_CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = &strassen_model_self_add,
-	.core_func = self_add_core_codelet,
+	.cpu_func = self_add_cpu_codelet,
 #ifdef USE_CUDA
 	.cuda_func = self_add_cublas_codelet,
 #endif
@@ -283,9 +283,9 @@ static starpu_codelet cl_self_add = {
 };
 
 static starpu_codelet cl_self_sub = {
-	.where = STARPU_CORE|STARPU_CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = &strassen_model_self_sub,
-	.core_func = self_sub_core_codelet,
+	.cpu_func = self_sub_cpu_codelet,
 #ifdef USE_CUDA
 	.cuda_func = self_sub_cublas_codelet,
 #endif
@@ -343,9 +343,9 @@ void cleanup_callback(void *_arg)
 }
 
 static starpu_codelet cleanup_codelet = {
-	.where = STARPU_CORE|STARPU_CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = NULL,
-	.core_func = null_codelet,
+	.cpu_func = null_codelet,
 #ifdef USE_CUDA
 	.cuda_func = null_codelet,
 #endif
@@ -719,9 +719,9 @@ static void dummy_codelet_func(__attribute__((unused))void *descr[],
 }
 
 static starpu_codelet dummy_codelet = {
-	.where = STARPU_CORE|STARPU_CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.model = NULL,
-	.core_func = dummy_codelet_func,
+	.cpu_func = dummy_codelet_func,
 	#ifdef USE_CUDA
 	.cuda_func = dummy_codelet_func,
 	#endif

+ 5 - 5
examples/strassen2/strassen2_kernels.c

@@ -90,7 +90,7 @@ static void mult_common_codelet(void *descr[], int s, __attribute__((unused))  v
 	}
 }
 
-void mult_core_codelet(void *descr[], void *_args)
+void mult_cpu_codelet(void *descr[], void *_args)
 {
 	mult_common_codelet(descr, 0, _args);
 }
@@ -161,12 +161,12 @@ static void add_sub_common_codelet(void *descr[], int s, __attribute__((unused))
 	}
 }
 
-void sub_core_codelet(void *descr[], __attribute__((unused))  void *arg)
+void sub_cpu_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 0, arg, -1.0f);
 }
 
-void add_core_codelet(void *descr[], __attribute__((unused))  void *arg)
+void add_cpu_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 0, arg, 1.0f);
 }
@@ -237,12 +237,12 @@ static void self_add_sub_common_codelet(void *descr[], int s, __attribute__((unu
 
 
 
-void self_add_core_codelet(void *descr[], __attribute__((unused))  void *arg)
+void self_add_cpu_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 0, arg, 1.0f);
 }
 
-void self_sub_core_codelet(void *descr[], __attribute__((unused))  void *arg)
+void self_sub_cpu_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 0, arg, -1.0f);
 }

+ 8 - 8
examples/tag_example/tag_example.c

@@ -65,7 +65,7 @@ static void parse_args(int argc, char **argv)
 	}
 }
 
-void callback_core(void *argcb);
+void callback_cpu(void *argcb);
 static void express_deps(unsigned i, unsigned j, unsigned iter);
 
 static void tag_cleanup_grid(unsigned ni, unsigned nj, unsigned iter)
@@ -94,7 +94,7 @@ static void create_task_grid(unsigned iter)
 	{
 		/* create a new task */
 		struct starpu_task *task = starpu_task_create();
-		task->callback_func = callback_core;
+		task->callback_func = callback_cpu;
 		//jb->argcb = &coords[i][j];
 		task->cl = &cl;
 		task->cl_arg = NULL;
@@ -113,7 +113,7 @@ static void create_task_grid(unsigned iter)
 	{
 		/* create a new task */
 		struct starpu_task *task = starpu_task_create();
-		task->callback_func = callback_core;
+		task->callback_func = callback_cpu;
 		task->cl = &cl;
 		task->cl_arg = NULL;
 
@@ -127,7 +127,7 @@ static void create_task_grid(unsigned iter)
 }
 
 
-void callback_core(void *argcb __attribute__ ((unused)))
+void callback_cpu(void *argcb __attribute__ ((unused)))
 {
 	unsigned newcnt = STARPU_ATOMIC_ADD(&callback_cnt, -1);	
 
@@ -150,7 +150,7 @@ void callback_core(void *argcb __attribute__ ((unused)))
 	}
 }
 
-void core_codelet(void *descr[] __attribute__((unused)),
+void cpu_codelet(void *descr[] __attribute__((unused)),
 			void *_args __attribute__ ((unused)))
 {
 //	printf("execute task\n");
@@ -199,9 +199,9 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	fprintf(stderr, "ITER: %d\n", nk);
 
-	cl.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON;
-	cl.core_func = core_codelet;
-	cl.cuda_func = core_codelet;
+	cl.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON;
+	cl.cpu_func = cpu_codelet;
+	cl.cuda_func = cpu_codelet;
 #ifdef USE_GORDON
 	cl.gordon_func = gordon_null_kernel;
 #endif

+ 5 - 5
examples/tag_example/tag_example2.c

@@ -59,7 +59,7 @@ static void parse_args(int argc, char **argv)
 	}
 }
 
-void callback_core(void *argcb);
+void callback_cpu(void *argcb);
 
 static void tag_cleanup_grid(unsigned ni, unsigned iter)
 {
@@ -100,7 +100,7 @@ static void create_task_grid(unsigned iter)
 
 }
 
-void core_codelet(void *descr[] __attribute__ ((unused)),
+void cpu_codelet(void *descr[] __attribute__ ((unused)),
 			void *_args __attribute__ ((unused)))
 {
 }
@@ -118,12 +118,12 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	parse_args(argc, argv);
 
-	cl.core_func = core_codelet;
-	cl.cuda_func = core_codelet;
+	cl.cpu_func = cpu_codelet;
+	cl.cuda_func = cpu_codelet;
 #ifdef USE_GORDON
 	cl.gordon_func = gordon_null_kernel;
 #endif
-	cl.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON;
+	cl.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON;
 	cl.nbuffers = 0;
 
 	fprintf(stderr, "ITER : %d\n", nk);

+ 5 - 5
examples/tag_example/tag_example3.c

@@ -59,7 +59,7 @@ static void parse_args(int argc, char **argv)
 	}
 }
 
-void callback_core(void *argcb);
+void callback_cpu(void *argcb);
 
 static void tag_cleanup_grid(unsigned ni, unsigned iter)
 {
@@ -100,7 +100,7 @@ static void create_task_grid(unsigned iter)
 
 }
 
-void core_codelet(void *_args __attribute__ ((unused)))
+void cpu_codelet(void *_args __attribute__ ((unused)))
 {
 }
 
@@ -117,12 +117,12 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	parse_args(argc, argv);
 
-	cl.core_func = core_codelet;
-	cl.cuda_func = core_codelet;
+	cl.cpu_func = cpu_codelet;
+	cl.cuda_func = cpu_codelet;
 #ifdef USE_GORDON
 	cl.gordon_func = gordon_null_kernel;
 #endif
-	cl.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON;
+	cl.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON;
 	cl.nbuffers = 0;
 
 	fprintf(stderr, "ITER : %d\n", nk);

+ 5 - 5
examples/tag_example/tag_restartable.c

@@ -62,7 +62,7 @@ static void parse_args(int argc, char **argv)
 	}
 }
 
-void callback_core(void *argcb);
+void callback_cpu(void *argcb);
 
 static void create_task_grid(unsigned iter)
 {
@@ -102,7 +102,7 @@ static void start_task_grid(unsigned iter)
 		starpu_submit_task(tasks[iter][i]);
 }
 
-void core_codelet(void *descr[], void *_args __attribute__((unused)))
+void cpu_codelet(void *descr[], void *_args __attribute__((unused)))
 {
 	//int i = (uintptr_t) _args;
 	//printf("doing %x\n", i);
@@ -123,12 +123,12 @@ int main(int argc __attribute__((unused)) , char **argv __attribute__((unused)))
 
 	parse_args(argc, argv);
 
-	cl.core_func = core_codelet;
-	cl.cuda_func = core_codelet;
+	cl.cpu_func = cpu_codelet;
+	cl.cuda_func = cpu_codelet;
 #ifdef USE_GORDON
 	cl.gordon_func = gordon_null_kernel;
 #endif
-	cl.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON;
+	cl.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON;
 	cl.nbuffers = 0;
 
 	fprintf(stderr, "ITER : %d\n", nk);

+ 1 - 1
include/starpu-perfmodel.h

@@ -37,7 +37,7 @@ struct starpu_buffer_descr_t;
 */
 
 enum starpu_perf_archtype {
-	STARPU_CORE_DEFAULT = 0,
+	STARPU_CPU_DEFAULT = 0,
 	STARPU_CUDA_DEFAULT = 1,
 	/* STARPU_CUDA_DEFAULT + devid */
 	STARPU_GORDON_DEFAULT = STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS

+ 3 - 3
include/starpu-task.h

@@ -27,7 +27,7 @@
 
 #include <starpu-data.h>
 
-#define STARPU_CORE	((1ULL)<<1)
+#define STARPU_CPU	((1ULL)<<1)
 #define STARPU_CUDA	((1ULL)<<3)
 #define STARPU_SPU	((1ULL)<<4)
 #define STARPU_GORDON	((1ULL)<<5)
@@ -52,10 +52,10 @@ typedef struct starpu_codelet_t {
 
 	/* the different implementations of the codelet */
 	//void (*cuda_func)(starpu_data_interface_t *, void *);
-	//void (*core_func)(starpu_data_interface_t *, void *);
+	//void (*cpu_func)(starpu_data_interface_t *, void *);
 
 	void (*cuda_func)(void **, void *);
-	void (*core_func)(void **, void *);
+	void (*cpu_func)(void **, void *);
 	uint8_t gordon_func;
 
 	/* how many buffers do the codelet takes as argument ? */

+ 1 - 1
include/starpu-util.h

@@ -217,7 +217,7 @@ void starpu_trace_user_event(unsigned code);
 void starpu_helper_init_cublas(void);
 void starpu_helper_shutdown_cublas(void);
 
-/* Call func(arg) on every worker matching the "where" mask (eg. CUDA|CORE to
+/* Call func(arg) on every worker matching the "where" mask (eg. CUDA|CPU to
  * execute the function on every CPUs and every CUDA devices). This function is
  * synchronous, but the different workers may execute the function in parallel.
  * */

+ 3 - 3
include/starpu.h

@@ -72,7 +72,7 @@ void starpu_shutdown(void);
  * StarPU tasks). The returned value should be at most STARPU_NMAXWORKERS. */
 unsigned starpu_get_worker_count(void);
 
-unsigned starpu_get_core_worker_count(void);
+unsigned starpu_get_cpu_worker_count(void);
 unsigned starpu_get_cuda_worker_count(void);
 unsigned starpu_get_spu_worker_count(void);
 
@@ -82,14 +82,14 @@ unsigned starpu_get_spu_worker_count(void);
 int starpu_get_worker_id(void);
 
 enum starpu_archtype {
-	STARPU_CORE_WORKER, /* CPU core */
+	STARPU_CPU_WORKER, /* CPU core */
 	STARPU_CUDA_WORKER, /* NVIDIA CUDA device */
 	STARPU_GORDON_WORKER /* Cell SPU */
 };
 
 /* This function returns the type of worker associated to an identifier (as
  * returned by the starpu_get_worker_id function). The returned value indicates
- * the architecture of the worker: STARPU_CORE_WORKER for a CPU core,
+ * the architecture of the worker: STARPU_CPU_WORKER for a CPU core,
  * STARPU_CUDA_WORKER for a CUDA device, and STARPU_GORDON_WORKER for a Cell
  * SPU. The value returned for an invalid identifier is unspecified.  */
 enum starpu_archtype starpu_get_worker_type(int id);

+ 8 - 8
mpi/examples/mpi_lu/pxlu_kernels.c

@@ -107,8 +107,8 @@ static struct starpu_perfmodel_t STARPU_PLU(model_22) = {
 };
 
 starpu_codelet STARPU_PLU(cl22) = {
-	.where = CORE|CUDA,
-	.core_func = STARPU_PLU(cpu_u22),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_PLU(cpu_u22),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_PLU(cublas_u22),
 #endif
@@ -214,8 +214,8 @@ static struct starpu_perfmodel_t STARPU_PLU(model_12) = {
 };
 
 starpu_codelet STARPU_PLU(cl12) = {
-	.where = CORE|CUDA,
-	.core_func = STARPU_PLU(cpu_u12),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_PLU(cpu_u12),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_PLU(cublas_u12),
 #endif
@@ -323,8 +323,8 @@ static struct starpu_perfmodel_t STARPU_PLU(model_21) = {
 };
 
 starpu_codelet STARPU_PLU(cl21) = {
-	.where = CORE|CUDA,
-	.core_func = STARPU_PLU(cpu_u21),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_PLU(cpu_u21),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_PLU(cublas_u21),
 #endif
@@ -428,8 +428,8 @@ static struct starpu_perfmodel_t STARPU_PLU(model_11) = {
 };
 
 starpu_codelet STARPU_PLU(cl11) = {
-	.where = CORE|CUDA,
-	.core_func = STARPU_PLU(cpu_u11),
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = STARPU_PLU(cpu_u11),
 #ifdef USE_CUDA
 	.cuda_func = STARPU_PLU(cublas_u11),
 #endif

+ 3 - 3
mpi/tests/ring.c

@@ -25,18 +25,18 @@ starpu_data_handle token_handle;
 extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
 
-void increment_core(void *descr[], __attribute__ ((unused)) void *_args)
+void increment_cpu(void *descr[], __attribute__ ((unused)) void *_args)
 {
 	unsigned *tokenptr = (unsigned *)STARPU_GET_VECTOR_PTR(descr[0]);
 	(*tokenptr)++;
 }
 
 static starpu_codelet increment_cl = {
-	.where = CORE|CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 #ifdef USE_CUDA
 	.cuda_func = increment_cuda,
 #endif
-	.core_func = increment_core,
+	.cpu_func = increment_cpu,
 	.nbuffers = 1
 };
 

+ 3 - 3
mpi/tests/ring_async.c

@@ -25,18 +25,18 @@ starpu_data_handle token_handle;
 extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
 
-void increment_core(void *descr[], __attribute__ ((unused)) void *_args)
+void increment_cpu(void *descr[], __attribute__ ((unused)) void *_args)
 {
 	unsigned *tokenptr = (unsigned *)STARPU_GET_VECTOR_PTR(descr[0]);
 	(*tokenptr)++;
 }
 
 static starpu_codelet increment_cl = {
-	.where = CORE|CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 #ifdef USE_CUDA
 	.cuda_func = increment_cuda,
 #endif
-	.core_func = increment_core,
+	.cpu_func = increment_cpu,
 	.nbuffers = 1
 };
 

+ 2 - 2
src/common/fxt.c

@@ -90,9 +90,9 @@ void stop_fxt_profiling(void)
 	}
 }
 
-void fxt_register_thread(unsigned coreid)
+void fxt_register_thread(unsigned cpuid)
 {
-	FUT_DO_PROBE2(FUT_NEW_LWP_CODE, coreid, syscall(SYS_gettid));
+	FUT_DO_PROBE2(FUT_NEW_LWP_CODE, cpuid, syscall(SYS_gettid));
 }
 
 #endif

+ 2 - 2
src/common/fxt.h

@@ -33,7 +33,7 @@
 
 /* some key to identify the worker kind */
 #define FUT_APPS_KEY	0x100
-#define FUT_CORE_KEY	0x101
+#define FUT_CPU_KEY	0x101
 #define FUT_CUDA_KEY	0x102
 
 #define FUT_WORKER_INIT_START	0x5133
@@ -110,7 +110,7 @@ do {									\
 	sprintf((char *)args, "%s", str);				\
 } while (0);
 
-/* workerkind = FUT_CORE_KEY for instance */
+/* workerkind = FUT_CPU_KEY for instance */
 #define TRACE_NEW_MEM_NODE(nodeid)			\
 	FUT_DO_PROBE2(FUT_NEW_MEM_NODE, nodeid, syscall(SYS_gettid));
 

+ 2 - 2
src/core/jobs.h

@@ -45,8 +45,8 @@ struct worker_s;
 typedef void (*cl_func)(void **, void *);
 typedef void (*callback)(void *);
 
-#define STARPU_CORE_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_CORE)
-#define STARPU_CUDA_MAY_PERFORM(j)     ((j)->task->cl->where & STARPU_CUDA)
+#define STARPU_CPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_CPU)
+#define STARPU_CUDA_MAY_PERFORM(j)      ((j)->task->cl->where & STARPU_CUDA)
 #define STARPU_SPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_SPU)
 #define STARPU_GORDON_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_GORDON)
 

+ 2 - 2
src/core/perfmodel/perfmodel.c

@@ -66,8 +66,8 @@ static double common_task_expected_length(struct starpu_perfmodel_t *model, uint
 		float alpha;
 		exp = model->cost_model(task->buffers);
 		switch (who) {
-			case STARPU_CORE:
-				alpha = STARPU_CORE_ALPHA;
+			case STARPU_CPU:
+				alpha = STARPU_CPU_ALPHA;
 				break;
 			case STARPU_CUDA:
 				alpha = STARPU_CUDA_ALPHA;

+ 24 - 24
src/core/perfmodel/perfmodel_bus.c

@@ -150,34 +150,34 @@ int compar_cudadev_timing(const void *left_cudadev_timing, const void *right_cud
 	return (bandwith_sum2_left < bandwith_sum2_right);
 }
 
-static void measure_bandwith_between_host_and_dev(int dev, unsigned ncores)
+static void measure_bandwith_between_host_and_dev(int dev, unsigned ncpus)
 {
-	unsigned core;
-	for (core = 0; core < ncores; core++)
+	unsigned cpu;
+	for (cpu = 0; cpu < ncpus; cpu++)
 	{
-		measure_bandwith_between_host_and_dev_on_cpu(dev, core);
+		measure_bandwith_between_host_and_dev_on_cpu(dev, cpu);
 	}
 
 	/* sort the results */
-	qsort(cudadev_timing_per_cpu[dev+1], ncores,
+	qsort(cudadev_timing_per_cpu[dev+1], ncpus,
 			sizeof(struct cudadev_timing),
 			compar_cudadev_timing);
 	
 #ifdef VERBOSE
-	for (core = 0; core < ncores; core++)
+	for (cpu = 0; cpu < ncpus; cpu++)
 	{
-		unsigned current_core = cudadev_timing_per_cpu[dev+1][core].cpu_id;
-		double bandwith_dtoh = cudadev_timing_per_cpu[dev+1][core].timing_dtoh;
-		double bandwith_htod = cudadev_timing_per_cpu[dev+1][core].timing_htod;
+		unsigned current_cpu = cudadev_timing_per_cpu[dev+1][cpu].cpu_id;
+		double bandwith_dtoh = cudadev_timing_per_cpu[dev+1][cpu].timing_dtoh;
+		double bandwith_htod = cudadev_timing_per_cpu[dev+1][cpu].timing_htod;
 
 		double bandwith_sum2 = bandwith_dtoh*bandwith_dtoh + bandwith_htod*bandwith_htod;
 
-		fprintf(stderr, "BANDWITH GPU %d CPU %d - htod %lf - dtoh %lf - %lf\n", dev, current_core, bandwith_htod, bandwith_dtoh, sqrt(bandwith_sum2));
+		fprintf(stderr, "BANDWITH GPU %d CPU %d - htod %lf - dtoh %lf - %lf\n", dev, current_cpu, bandwith_htod, bandwith_dtoh, sqrt(bandwith_sum2));
 	}
 
-	unsigned best_core = cudadev_timing_per_cpu[dev+1][0].cpu_id;
+	unsigned best_cpu = cudadev_timing_per_cpu[dev+1][0].cpu_id;
 
-	fprintf(stderr, "BANDWITH GPU %d BEST CPU %d\n", dev, best_core);
+	fprintf(stderr, "BANDWITH GPU %d BEST CPU %d\n", dev, best_cpu);
 #endif
 
 	/* The results are sorted in a decreasing order, so that the best
@@ -207,14 +207,14 @@ static void benchmark_all_cuda_devices(void)
 	}
 
 	struct machine_config_s *config = _starpu_get_machine_config();
-	unsigned ncores = _starpu_topology_get_nhwcore(config);
+	unsigned ncpus = _starpu_topology_get_nhwcpu(config);
 
         cudaGetDeviceCount(&ncuda);
 	int i;
 	for (i = 0; i < ncuda; i++)
 	{
 		/* measure bandwith between Host and Device i */
-		measure_bandwith_between_host_and_dev(i, ncores);
+		measure_bandwith_between_host_and_dev(i, ncpus);
 	}
 
 	/* Restore the former affinity */
@@ -265,7 +265,7 @@ static void load_bus_affinity_file_content(void)
 
 #ifdef USE_CUDA
 	struct machine_config_s *config = _starpu_get_machine_config();
-	unsigned ncores = _starpu_topology_get_nhwcore(config);
+	unsigned ncpus = _starpu_topology_get_nhwcpu(config);
 
         cudaGetDeviceCount(&ncuda);
 
@@ -282,10 +282,10 @@ static void load_bus_affinity_file_content(void)
 
 		STARPU_ASSERT(dummy == gpu);
 
-		unsigned core;
-		for (core = 0; core < ncores; core++)
+		unsigned cpu;
+		for (cpu = 0; cpu < ncpus; cpu++)
 		{
-			ret = fscanf(f, "%d\t", &affinity_matrix[gpu][core]);
+			ret = fscanf(f, "%d\t", &affinity_matrix[gpu][cpu]);
 			STARPU_ASSERT(ret == 1);
 		}
 
@@ -315,12 +315,12 @@ static void write_bus_affinity_file_content(void)
 
 #ifdef USE_CUDA
 	struct machine_config_s *config = _starpu_get_machine_config();
-	unsigned ncores = _starpu_topology_get_nhwcore(config);
-	unsigned core;
+	unsigned ncpus = _starpu_topology_get_nhwcpu(config);
+	unsigned cpu;
 
 	fprintf(f, "# GPU\t");
-	for (core = 0; core < ncores; core++)
-		fprintf(f, "CPU%d\t", core);
+	for (cpu = 0; cpu < ncpus; cpu++)
+		fprintf(f, "CPU%d\t", cpu);
 	fprintf(f, "\n");
 
 	int gpu;
@@ -328,9 +328,9 @@ static void write_bus_affinity_file_content(void)
 	{
 		fprintf(f, "%d\t", gpu);
 
-		for (core = 0; core < ncores; core++)
+		for (cpu = 0; cpu < ncpus; cpu++)
 		{
-			fprintf(f, "%d\t", cudadev_timing_per_cpu[gpu+1][core].cpu_id);
+			fprintf(f, "%d\t", cudadev_timing_per_cpu[gpu+1][cpu].cpu_id);
 		}
 
 		fprintf(f, "\n");

+ 4 - 4
src/core/perfmodel/perfmodel_history.c

@@ -110,7 +110,7 @@ static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_
 	if (!scan_history)
 		return;
 
-	/* parse core entries */
+	/* parse cpu entries */
 	unsigned i;
 	for (i = 0; i < nentries; i++) {
 		struct starpu_history_entry_t *entry = malloc(sizeof(struct starpu_history_entry_t));
@@ -274,7 +274,7 @@ static void save_history_based_model(struct starpu_perfmodel_t *model)
 #ifdef DEBUG_MODEL
 	fclose(model->gordon_debug_file);
 	fclose(model->cuda_debug_file);
-	fclose(model->core_debug_file);
+	fclose(model->cpu_debug_file);
 #endif
 }
 
@@ -446,9 +446,9 @@ int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel_t *mod
 
 void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen)
 {
-	if (arch == STARPU_CORE_DEFAULT)
+	if (arch == STARPU_CPU_DEFAULT)
 	{
-		snprintf(archname, maxlen, "core");
+		snprintf(archname, maxlen, "cpu");
 	}
 	else if ((STARPU_CUDA_DEFAULT <= arch)
 		&& (arch < STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS))

+ 46 - 46
src/core/topology.c

@@ -47,12 +47,12 @@ static void _starpu_initialize_workers_gpuid(struct machine_config_s *config)
 
 	config->current_gpuid = 0;
 
-	/* conf->workers_bindid indicates the successive core identifier that
+	/* conf->workers_bindid indicates the successive cpu identifier that
 	 * should be used to bind the workers. It should be either filled
 	 * according to the user's explicit parameters (from starpu_conf) or
 	 * according to the WORKERS_CPUID env. variable. Otherwise, a
 	 * round-robin policy is used to distributed the workers over the
-	 * cores. */
+	 * cpus. */
 
 	/* what do we use, explicit value, env. variable, or round-robin ? */
 	if (config->user_conf && config->user_conf->use_explicit_workers_gpuid)
@@ -125,29 +125,29 @@ static void _starpu_init_topology(struct machine_config_s *config)
 		hwloc_topology_init(&config->hwtopology);
 		hwloc_topology_load(config->hwtopology);
 
-		config->core_depth = hwloc_get_type_depth(config->hwtopology, HWLOC_OBJ_CORE);
+		config->cpu_depth = hwloc_get_type_depth(config->hwtopology, HWLOC_OBJ_CORE);
 
 		/* Would be very odd */
-		STARPU_ASSERT(config->core_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
+		STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE);
 
-		if (config->core_depth == HWLOC_TYPE_DEPTH_UNKNOWN)
+		if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN)
 			/* unknown, using logical procesors as fallback */
-			config->core_depth = hwloc_get_type_depth(config->hwtopology, HWLOC_OBJ_PROC);
+			config->cpu_depth = hwloc_get_type_depth(config->hwtopology, HWLOC_OBJ_PROC);
 
-		config->nhwcores = hwloc_get_nbobjs_by_depth(config->hwtopology, config->core_depth);
+		config->nhwcpus = hwloc_get_nbobjs_by_depth(config->hwtopology, config->cpu_depth);
 #else
-		config->nhwcores = sysconf(_SC_NPROCESSORS_ONLN);
+		config->nhwcpus = sysconf(_SC_NPROCESSORS_ONLN);
 #endif
 	
 		topology_is_initialized = 1;
 	}
 }
 
-unsigned _starpu_topology_get_nhwcore(struct machine_config_s *config)
+unsigned _starpu_topology_get_nhwcpu(struct machine_config_s *config)
 {
 	_starpu_init_topology(config);
 	
-	return config->nhwcores;
+	return config->nhwcpus;
 }
 
 static int _starpu_init_machine_config(struct machine_config_s *config,
@@ -246,7 +246,7 @@ static int _starpu_init_machine_config(struct machine_config_s *config,
 #endif
 
 /* we put the CPU section after the accelerator : in case there was an
- * accelerator found, we devote one core */
+ * accelerator found, we devote one cpu */
 #ifdef USE_CPUS
 	if (user_conf && (user_conf->ncpus != -1)) {
 		explicitval = user_conf->ncpus;
@@ -256,27 +256,27 @@ static int _starpu_init_machine_config(struct machine_config_s *config,
 	}
 
 	if (explicitval < 0) {
-		unsigned already_busy_cores = (config->ngordon_spus?1:0) + config->ncudagpus;
-		long avail_cores = config->nhwcores - (use_accelerator?already_busy_cores:0);
-		config->ncores = STARPU_MIN(avail_cores, NMAXCORES);
+		unsigned already_busy_cpus = (config->ngordon_spus?1:0) + config->ncudagpus;
+		long avail_cpus = config->nhwcpus - (use_accelerator?already_busy_cpus:0);
+		config->ncpus = STARPU_MIN(avail_cpus, NMAXCPUS);
 	} else {
 		/* use the specified value */
-		config->ncores = (unsigned)explicitval;
-		STARPU_ASSERT(config->ncores <= NMAXCORES);
+		config->ncpus = (unsigned)explicitval;
+		STARPU_ASSERT(config->ncpus <= NMAXCPUS);
 	}
-	STARPU_ASSERT(config->ncores + config->nworkers <= STARPU_NMAXWORKERS);
+	STARPU_ASSERT(config->ncpus + config->nworkers <= STARPU_NMAXWORKERS);
 
-	unsigned core;
-	for (core = 0; core < config->ncores; core++)
+	unsigned cpu;
+	for (cpu = 0; cpu < config->ncpus; cpu++)
 	{
-		config->workers[config->nworkers + core].arch = STARPU_CORE_WORKER;
-		config->workers[config->nworkers + core].perf_arch = STARPU_CORE_DEFAULT;
-		config->workers[config->nworkers + core].id = core;
-		config->workers[config->nworkers + core].worker_mask = STARPU_CORE;
-		config->worker_mask |= STARPU_CORE;
+		config->workers[config->nworkers + cpu].arch = STARPU_CPU_WORKER;
+		config->workers[config->nworkers + cpu].perf_arch = STARPU_CPU_DEFAULT;
+		config->workers[config->nworkers + cpu].id = cpu;
+		config->workers[config->nworkers + cpu].worker_mask = STARPU_CPU;
+		config->worker_mask |= STARPU_CPU;
 	}
 
-	config->nworkers += config->ncores;
+	config->nworkers += config->ncpus;
 #endif
 
 	if (config->nworkers == 0)
@@ -300,12 +300,12 @@ static void _starpu_initialize_workers_bindid(struct machine_config_s *config)
 
 	config->current_bindid = 0;
 
-	/* conf->workers_bindid indicates the successive core identifier that
+	/* conf->workers_bindid indicates the successive cpu identifier that
 	 * should be used to bind the workers. It should be either filled
 	 * according to the user's explicit parameters (from starpu_conf) or
 	 * according to the WORKERS_CPUID env. variable. Otherwise, a
 	 * round-robin policy is used to distributed the workers over the
-	 * cores. */
+	 * cpus. */
 
 	/* what do we use, explicit value, env. variable, or round-robin ? */
 	if (config->user_conf && config->user_conf->use_explicit_workers_bindid)
@@ -332,7 +332,7 @@ static void _starpu_initialize_workers_bindid(struct machine_config_s *config)
 				val = strtol(strval, &endptr, 10);
 				if (endptr != strval)
 				{
-					config->workers_bindid[i] = (unsigned)(val % config->nhwcores);
+					config->workers_bindid[i] = (unsigned)(val % config->nhwcpus);
 					strval = endptr;
 				}
 				else {
@@ -355,13 +355,13 @@ static void _starpu_initialize_workers_bindid(struct machine_config_s *config)
 	{
 		/* by default, we take a round robin policy */
 		for (i = 0; i < STARPU_NMAXWORKERS; i++)
-			config->workers_bindid[i] = (unsigned)(i % config->nhwcores);
+			config->workers_bindid[i] = (unsigned)(i % config->nhwcpus);
 	}
 }
 
-/* This function gets the identifier of the next core on which to bind a
- * worker. In case a list of preferred cores was specified, we look for a an
- * available core among the list if possible, otherwise a round-robin policy is
+/* This function gets the identifier of the next cpu on which to bind a
+ * worker. In case a list of preferred cpus was specified, we look for a an
+ * available cpu among the list if possible, otherwise a round-robin policy is
  * used. */
 static inline int _starpu_get_next_bindid(struct machine_config_s *config,
 				int *preferred_binding, int npreferred)
@@ -374,21 +374,21 @@ static inline int _starpu_get_next_bindid(struct machine_config_s *config,
 		if (found)
 			break;
 
-		unsigned requested_core = preferred_binding[current_preferred];
+		unsigned requested_cpu = preferred_binding[current_preferred];
 
-		/* can we bind the worker on the requested core ? */
+		/* can we bind the worker on the requested cpu ? */
 		unsigned ind;
-		for (ind = config->current_bindid; ind < config->nhwcores; ind++)
+		for (ind = config->current_bindid; ind < config->nhwcpus; ind++)
 		{
-			if (config->workers_bindid[ind] == requested_core)
+			if (config->workers_bindid[ind] == requested_cpu)
 			{
-				/* the core is available, we  use it ! In order
+				/* the cpu is available, we  use it ! In order
 				 * to make sure that it will not be used again
 				 * later on, we remove the entry from the list
 				 * */
 				config->workers_bindid[ind] =
 					config->workers_bindid[config->current_bindid];
-				config->workers_bindid[config->current_bindid] = requested_core;
+				config->workers_bindid[config->current_bindid] = requested_cpu;
 
 				found = 1;
 
@@ -402,14 +402,14 @@ static inline int _starpu_get_next_bindid(struct machine_config_s *config,
 	return (int)config->workers_bindid[i];
 }
 
-void _starpu_bind_thread_on_cpu(struct machine_config_s *config __attribute__((unused)), unsigned coreid)
+void _starpu_bind_thread_on_cpu(struct machine_config_s *config __attribute__((unused)), unsigned cpuid)
 {
 	int ret;
 
 #ifdef HAVE_HWLOC
 	_starpu_init_topology(config);
 
-	hwloc_obj_t obj = hwloc_get_obj_by_depth(config->hwtopology, config->core_depth, coreid);
+	hwloc_obj_t obj = hwloc_get_obj_by_depth(config->hwtopology, config->cpu_depth, cpuid);
 	hwloc_cpuset_t set = obj->cpuset;
 	hwloc_cpuset_singlify(set);
 	ret = hwloc_set_cpubind(config->hwtopology, set, HWLOC_CPUBIND_THREAD);
@@ -423,7 +423,7 @@ void _starpu_bind_thread_on_cpu(struct machine_config_s *config __attribute__((u
 	/* fix the thread on the correct cpu */
 	cpu_set_t aff_mask;
 	CPU_ZERO(&aff_mask);
-	CPU_SET(coreid, &aff_mask);
+	CPU_SET(cpuid, &aff_mask);
 
 	pthread_t self = pthread_self();
 
@@ -444,10 +444,10 @@ static void _starpu_init_workers_binding(struct machine_config_s *config)
 	/* launch one thread per CPU */
 	unsigned ram_memory_node;
 
-	/* a single core is dedicated for the accelerators */
+	/* a single cpu is dedicated for the accelerators */
 	int accelerator_bindid = -1;
 
-	/* note that even if the CPU core are not used, we always have a RAM node */
+	/* note that even if the CPU cpu are not used, we always have a RAM node */
 	/* TODO : support NUMA  ;) */
 	ram_memory_node = _starpu_register_memory_node(RAM);
 
@@ -464,8 +464,8 @@ static void _starpu_init_workers_binding(struct machine_config_s *config)
 		
 		/* select the memory node that contains worker's memory */
 		switch (workerarg->arch) {
-			case STARPU_CORE_WORKER:
-			/* "dedicate" a cpu core to that worker */
+			case STARPU_CPU_WORKER:
+			/* "dedicate" a cpu cpu to that worker */
 				is_a_set_of_accelerators = 0;
 				memory_node = ram_memory_node;
 				break;
@@ -481,7 +481,7 @@ static void _starpu_init_workers_binding(struct machine_config_s *config)
 				{
 					/* StarPU is allowed to bind threads automatically */
 					preferred_binding = get_gpu_affinity_vector(workerarg->id);
-					npreferred = config->nhwcores;
+					npreferred = config->nhwcpus;
 				}
 				is_a_set_of_accelerators = 0;
 				memory_node = _starpu_register_memory_node(CUDA_RAM);

+ 3 - 3
src/core/topology.h

@@ -66,9 +66,9 @@ int starpu_build_topology(struct machine_config_s *config);
 
 void starpu_destroy_topology(struct machine_config_s *config);
 
-/* returns the number of physical cores */
-unsigned _starpu_topology_get_nhwcore(struct machine_config_s *config);
+/* returns the number of physical cpus */
+unsigned _starpu_topology_get_nhwcpu(struct machine_config_s *config);
 
-void _starpu_bind_thread_on_cpu(struct machine_config_s *config, unsigned coreid);
+void _starpu_bind_thread_on_cpu(struct machine_config_s *config, unsigned cpuid);
 
 #endif // __TOPOLOGY_H__

+ 9 - 9
src/core/workers.c

@@ -42,9 +42,9 @@ inline uint32_t may_submit_cuda_task(void)
 	return (STARPU_CUDA & config.worker_mask);
 }
 
-inline uint32_t may_submit_core_task(void)
+inline uint32_t may_submit_cpu_task(void)
 {
-	return (STARPU_CORE & config.worker_mask);
+	return (STARPU_CPU & config.worker_mask);
 }
 
 inline uint32_t _starpu_worker_may_execute_task(unsigned workerid, uint32_t where)
@@ -72,8 +72,8 @@ static void _starpu_init_worker_queue(struct worker_s *workerarg)
 	jobq->who |= workerarg->worker_mask;
 
 	switch (workerarg->arch) {
-		case STARPU_CORE_WORKER:
-			jobq->alpha = STARPU_CORE_ALPHA;
+		case STARPU_CPU_WORKER:
+			jobq->alpha = STARPU_CPU_ALPHA;
 			break;
 		case STARPU_CUDA_WORKER:
 			jobq->alpha = STARPU_CUDA_ALPHA;
@@ -123,11 +123,11 @@ static void _starpu_init_workers(struct machine_config_s *config)
 
 		switch (workerarg->arch) {
 #ifdef USE_CPUS
-			case STARPU_CORE_WORKER:
+			case STARPU_CPU_WORKER:
 				workerarg->set = NULL;
 				workerarg->worker_is_initialized = 0;
 				pthread_create(&workerarg->worker_thread, 
-						NULL, _starpu_core_worker, workerarg);
+						NULL, _starpu_cpu_worker, workerarg);
 				break;
 #endif
 #ifdef USE_CUDA
@@ -178,7 +178,7 @@ static void _starpu_init_workers(struct machine_config_s *config)
 		struct worker_s *workerarg = &config->workers[worker];
 
 		switch (workerarg->arch) {
-			case STARPU_CORE_WORKER:
+			case STARPU_CPU_WORKER:
 			case STARPU_CUDA_WORKER:
 				pthread_mutex_lock(&workerarg->mutex);
 				if (!workerarg->worker_is_initialized)
@@ -452,9 +452,9 @@ unsigned starpu_get_worker_count(void)
 	return config.nworkers;
 }
 
-unsigned starpu_get_core_worker_count(void)
+unsigned starpu_get_cpu_worker_count(void)
 {
-	return config.ncores;
+	return config.ncpus;
 }
 
 unsigned starpu_get_cuda_worker_count(void)

+ 7 - 7
src/core/workers.h

@@ -50,7 +50,7 @@
 
 #include <datawizard/datawizard.h>
 
-#define STARPU_CORE_ALPHA	1.0f
+#define STARPU_CPU_ALPHA	1.0f
 #define STARPU_CUDA_ALPHA	13.33f
 #define STARPU_GORDON_ALPHA	6.0f /* XXX this is a random value ... */
 
@@ -67,8 +67,8 @@ struct worker_s {
 	uint32_t worker_mask; /* what is the type of worker ? */
 	enum starpu_perf_archtype perf_arch; /* in case there are different models of the same arch */
 	pthread_t worker_thread; /* the thread which runs the worker */
-	int id; /* which core/gpu/etc is controlled by the workker ? */
-	int bindid; /* which core is the driver bound to ? */
+	int id; /* which cpu/gpu/etc is controlled by the workker ? */
+	int bindid; /* which cpu is the driver bound to ? */
 	int workerid; /* uniquely identify the worker among all processing units types */
         pthread_cond_t ready_cond; /* indicate when the worker is ready */
 	unsigned memory_node; /* which memory node is associated that worker to ? */
@@ -101,12 +101,12 @@ struct machine_config_s {
 
 #ifdef HAVE_HWLOC
 	hwloc_topology_t hwtopology;
-	int core_depth;
+	int cpu_depth;
 #endif
 
-	unsigned nhwcores;
+	unsigned nhwcpus;
 
-	unsigned ncores;
+	unsigned ncpus;
 	unsigned ncudagpus;
 	unsigned ngordon_spus;
 
@@ -137,7 +137,7 @@ unsigned _starpu_machine_is_running(void);
 
 inline uint32_t _starpu_worker_exists(uint32_t task_mask);
 inline uint32_t may_submit_cuda_task(void);
-inline uint32_t may_submit_core_task(void);
+inline uint32_t may_submit_cpu_task(void);
 inline uint32_t _starpu_worker_may_execute_task(unsigned workerid, uint32_t where);
 unsigned _starpu_worker_can_block(unsigned memnode);
 

+ 41 - 41
src/drivers/core/driver_core.c

@@ -20,7 +20,7 @@
 #include "driver_core.h"
 #include <core/policies/sched_policy.h>
 
-static int execute_job_on_core(job_t j, struct worker_s *core_args)
+static int execute_job_on_cpu(job_t j, struct worker_s *cpu_args)
 {
 	int ret;
 	tick_t codelet_start, codelet_end;
@@ -31,7 +31,7 @@ static int execute_job_on_core(job_t j, struct worker_s *core_args)
 	struct starpu_codelet_t *cl = task->cl;
 
 	STARPU_ASSERT(cl);
-	STARPU_ASSERT(cl->core_func);
+	STARPU_ASSERT(cl->cpu_func);
 
 	if (cl->model && cl->model->benchmarking)
 		calibrate_model = 1;
@@ -55,17 +55,17 @@ static int execute_job_on_core(job_t j, struct worker_s *core_args)
 	if (calibrate_model || BENCHMARK_COMM)
 		GET_TICK(codelet_start);
 
-	core_args->status = STATUS_EXECUTING;
-	cl_func func = cl->core_func;
+	cpu_args->status = STATUS_EXECUTING;
+	cl_func func = cl->cpu_func;
 	func(task->interface, task->cl_arg);
 
-	cl->per_worker_stats[core_args->workerid]++;
+	cl->per_worker_stats[cpu_args->workerid]++;
 	
 	if (calibrate_model || BENCHMARK_COMM)
 		GET_TICK(codelet_end);
 
 	TRACE_END_CODELET_BODY(j);
-	core_args->status = STATUS_UNKNOWN;
+	cpu_args->status = STATUS_UNKNOWN;
 
 	push_task_output(task, 0);
 
@@ -76,72 +76,72 @@ static int execute_job_on_core(job_t j, struct worker_s *core_args)
 		double measured_comm = timing_delay(&codelet_start_comm, &codelet_end_comm);
 
 //		fprintf(stderr, "%d\t%d\n", (int)j->penality, (int)measured_comm);
-		core_args->jobq->total_computation_time += measured;
-		core_args->jobq->total_communication_time += measured_comm;
+		cpu_args->jobq->total_computation_time += measured;
+		cpu_args->jobq->total_communication_time += measured_comm;
 
 		double error;
 		error = fabs(STARPU_MAX(measured, 0.0) - STARPU_MAX(j->predicted, 0.0)); 
 //		fprintf(stderr, "Error -> %le, predicted -> %le measured ->%le\n", error, j->predicted, measured);
-		core_args->jobq->total_computation_time_error += error;
+		cpu_args->jobq->total_computation_time_error += error;
 
 		if (calibrate_model)
-			_starpu_update_perfmodel_history(j, core_args->arch, core_args->id, measured);
+			_starpu_update_perfmodel_history(j, cpu_args->arch, cpu_args->id, measured);
 	}
 //#endif
 
-	core_args->jobq->total_job_performed++;
+	cpu_args->jobq->total_job_performed++;
 
 	return 0;
 }
 
-void *_starpu_core_worker(void *arg)
+void *_starpu_cpu_worker(void *arg)
 {
-	struct worker_s *core_arg = arg;
+	struct worker_s *cpu_arg = arg;
 
 #ifdef USE_FXT
-	fxt_register_thread(core_arg->bindid);
+	fxt_register_thread(cpu_arg->bindid);
 #endif
-	TRACE_WORKER_INIT_START(FUT_CORE_KEY, core_arg->memory_node);
+	TRACE_WORKER_INIT_START(FUT_CPU_KEY, cpu_arg->memory_node);
 
-	_starpu_bind_thread_on_cpu(core_arg->config, core_arg->bindid);
+	_starpu_bind_thread_on_cpu(cpu_arg->config, cpu_arg->bindid);
 
 #ifdef VERBOSE
-        fprintf(stderr, "core worker %d is ready on logical core %d\n", core_arg->id, core_arg->bindid);
+        fprintf(stderr, "cpu worker %d is ready on logical cpu %d\n", cpu_arg->id, cpu_arg->bindid);
 #endif
 
-	set_local_memory_node_key(&core_arg->memory_node);
+	set_local_memory_node_key(&cpu_arg->memory_node);
 
-	set_local_queue(core_arg->jobq);
+	set_local_queue(cpu_arg->jobq);
 
-	_starpu_set_local_worker_key(core_arg);
+	_starpu_set_local_worker_key(cpu_arg);
 
-	snprintf(core_arg->name, 32, "CORE %d", core_arg->id);
+	snprintf(cpu_arg->name, 32, "CPU %d", cpu_arg->id);
 
 	/* this is only useful (and meaningful) is there is a single
 	   memory node "related" to that queue */
-	core_arg->jobq->memory_node = core_arg->memory_node;
+	cpu_arg->jobq->memory_node = cpu_arg->memory_node;
 
-	core_arg->jobq->total_computation_time = 0.0;
-	core_arg->jobq->total_communication_time = 0.0;
-	core_arg->jobq->total_computation_time_error = 0.0;
-	core_arg->jobq->total_job_performed = 0;
+	cpu_arg->jobq->total_computation_time = 0.0;
+	cpu_arg->jobq->total_communication_time = 0.0;
+	cpu_arg->jobq->total_computation_time_error = 0.0;
+	cpu_arg->jobq->total_job_performed = 0;
 
-	core_arg->status = STATUS_UNKNOWN;
+	cpu_arg->status = STATUS_UNKNOWN;
 	
 	TRACE_WORKER_INIT_END
 
         /* tell the main thread that we are ready */
-	pthread_mutex_lock(&core_arg->mutex);
-	core_arg->worker_is_initialized = 1;
-	pthread_cond_signal(&core_arg->ready_cond);
-	pthread_mutex_unlock(&core_arg->mutex);
+	pthread_mutex_lock(&cpu_arg->mutex);
+	cpu_arg->worker_is_initialized = 1;
+	pthread_cond_signal(&cpu_arg->ready_cond);
+	pthread_mutex_unlock(&cpu_arg->mutex);
 
         job_t j;
 	int res;
 
 	struct sched_policy_s *policy = get_sched_policy();
 	struct jobq_s *queue = policy->get_local_queue(policy);
-	unsigned memnode = core_arg->memory_node;
+	unsigned memnode = cpu_arg->memory_node;
 
 	while (_starpu_machine_is_running())
 	{
@@ -154,7 +154,7 @@ void *_starpu_core_worker(void *arg)
 		jobq_lock(queue);
 
 		/* perhaps there is some local task to be executed first */
-		j = _starpu_pop_local_task(core_arg);
+		j = _starpu_pop_local_task(cpu_arg);
 
 		/* otherwise ask a task to the scheduler */
 		if (!j)
@@ -169,15 +169,15 @@ void *_starpu_core_worker(void *arg)
 		
 		jobq_unlock(queue);
 
-		/* can a core perform that task ? */
-		if (!STARPU_CORE_MAY_PERFORM(j)) 
+		/* can a cpu perform that task ? */
+		if (!STARPU_CPU_MAY_PERFORM(j)) 
 		{
 			/* put it and the end of the queue ... XXX */
 			push_task(j);
 			continue;
 		}
 
-                res = execute_job_on_core(j, core_arg);
+                res = execute_job_on_cpu(j, cpu_arg);
 		if (res) {
 			switch (res) {
 				case -EAGAIN:
@@ -194,20 +194,20 @@ void *_starpu_core_worker(void *arg)
 	TRACE_WORKER_DEINIT_START
 
 #ifdef DATA_STATS
-	fprintf(stderr, "CORE #%d computation %le comm %le (%lf \%%)\n", core_arg->id, core_arg->jobq->total_computation_time, core_arg->jobq->total_communication_time,  core_arg->jobq->total_communication_time*100.0/core_arg->jobq->total_computation_time);
+	fprintf(stderr, "CPU #%d computation %le comm %le (%lf \%%)\n", cpu_arg->id, cpu_arg->jobq->total_computation_time, cpu_arg->jobq->total_communication_time,  cpu_arg->jobq->total_communication_time*100.0/cpu_arg->jobq->total_computation_time);
 #endif
 
 #ifdef VERBOSE
 	double ratio = 0;
-	if (core_arg->jobq->total_job_performed != 0)
+	if (cpu_arg->jobq->total_job_performed != 0)
 	{
-		ratio = core_arg->jobq->total_computation_time_error/core_arg->jobq->total_computation_time;
+		ratio = cpu_arg->jobq->total_computation_time_error/cpu_arg->jobq->total_computation_time;
 	}
 
-	_starpu_print_to_logfile("MODEL ERROR: CORE %d ERROR %lf EXEC %lf RATIO %lf NTASKS %d\n", core_arg->id, core_arg->jobq->total_computation_time_error, core_arg->jobq->total_computation_time, ratio, core_arg->jobq->total_job_performed);
+	_starpu_print_to_logfile("MODEL ERROR: CPU %d ERROR %lf EXEC %lf RATIO %lf NTASKS %d\n", cpu_arg->id, cpu_arg->jobq->total_computation_time_error, cpu_arg->jobq->total_computation_time, ratio, cpu_arg->jobq->total_job_performed);
 #endif
 
-	TRACE_WORKER_DEINIT_END(FUT_CORE_KEY);
+	TRACE_WORKER_DEINIT_END(FUT_CPU_KEY);
 
 	pthread_exit(NULL);
 }

+ 3 - 3
src/drivers/core/driver_core.h

@@ -32,10 +32,10 @@
 
 #include <starpu.h>
 
-void *_starpu_core_worker(void *);
+void *_starpu_cpu_worker(void *);
 
-#ifndef NMAXCORES
-#define NMAXCORES       4
+#ifndef NMAXCPUS
+#define NMAXCPUS       4
 #endif
 
 #endif //  __DRIVER_CORE_H__

+ 1 - 1
src/util/execute_on_all.c

@@ -38,7 +38,7 @@ void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t whe
 	struct starpu_codelet_t wrapper_cl = {
 		.where = where,
 		.cuda_func = wrapper_func,
-		.core_func = wrapper_func,
+		.cpu_func = wrapper_func,
 		/* XXX we do not handle Cell .. */
 		.nbuffers = 0,
 		.model = NULL

+ 2 - 2
tests/core/empty_task_sync_point.c

@@ -34,8 +34,8 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 	.model = NULL,
 	.nbuffers = 0

+ 2 - 2
tests/core/execute_on_a_specific_worker.c

@@ -71,8 +71,8 @@ static starpu_access_mode select_random_mode(void)
 
 
 static starpu_codelet cl = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = codelet_null,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = codelet_null,
 	.cuda_func = codelet_null,
 	.nbuffers = 1
 };

+ 2 - 2
tests/core/multithreaded.c

@@ -33,8 +33,8 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 	.model = NULL,
 	.nbuffers = 0

+ 2 - 2
tests/core/starpu_wait_all_tasks.c

@@ -28,8 +28,8 @@ static void dummy_func(void *descr[], void *arg)
 
 static starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 #ifdef USE_GORDON
 	.gordon_func = 0, /* this will be defined later */

+ 2 - 2
tests/core/starpu_wait_task.c

@@ -29,8 +29,8 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 	.model = NULL,
 	.nbuffers = 0

+ 2 - 2
tests/core/static_restartable.c

@@ -28,8 +28,8 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 	.model = NULL,
 	.nbuffers = 0

+ 2 - 2
tests/core/static_restartable_tag.c

@@ -29,8 +29,8 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 	.model = NULL,
 	.nbuffers = 0

+ 2 - 2
tests/core/static_restartable_using_initializer.c

@@ -31,8 +31,8 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 	.model = NULL,
 	.nbuffers = 0

+ 2 - 2
tests/core/tag-wait-api.c

@@ -26,8 +26,8 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 	.model = NULL,
 	.nbuffers = 0

+ 2 - 2
tests/datawizard/dining_philosophers.c

@@ -27,9 +27,9 @@ static void eat_kernel(void *descr[], void *arg)
 }
 
 static starpu_codelet eating_cl = {
-	.where = STARPU_CORE|STARPU_CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.cuda_func = eat_kernel,
-	.core_func = eat_kernel,
+	.cpu_func = eat_kernel,
 	.nbuffers = 2
 };
 

+ 3 - 3
tests/datawizard/dsm_stress.c

@@ -54,7 +54,7 @@ static void cuda_codelet_null(void *descr[], __attribute__ ((unused)) void *_arg
 {
 }
 
-static void core_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
+static void cpu_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
 {
 }
 
@@ -74,8 +74,8 @@ static starpu_access_mode select_random_mode(void)
 
 
 static starpu_codelet cl = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = core_codelet_null,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = cpu_codelet_null,
 	.cuda_func = cuda_codelet_null,
 	.nbuffers = 2
 };

+ 2 - 2
tests/datawizard/readers_and_writers.c

@@ -24,9 +24,9 @@ static void dummy_kernel(void *descr[], void *arg)
 }
 
 static starpu_codelet rw_cl = {
-	.where = STARPU_CORE|STARPU_CUDA,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.cuda_func = dummy_kernel,
-	.core_func = dummy_kernel,
+	.cpu_func = dummy_kernel,
 	.nbuffers = 1
 };
 

+ 6 - 6
tests/datawizard/sync_and_notify_data.c

@@ -51,13 +51,13 @@ void cuda_codelet_incC(void *descr[], __attribute__ ((unused)) void *_args);
 starpu_data_handle v_handle;
 static unsigned v[VECTORSIZE] __attribute__((aligned(128))) = {0, 0, 0, 0};
 
-void core_codelet_incA(void *descr[], __attribute__ ((unused)) void *_args)
+void cpu_codelet_incA(void *descr[], __attribute__ ((unused)) void *_args)
 {
 	unsigned *val = (unsigned *)STARPU_GET_VECTOR_PTR(descr[0]);
 	val[0]++;
 }
 
-void core_codelet_incC(void *descr[], __attribute__ ((unused)) void *_args)
+void cpu_codelet_incC(void *descr[], __attribute__ ((unused)) void *_args)
 {
 	unsigned *val = (unsigned *)STARPU_GET_VECTOR_PTR(descr[0]);
 	val[2]++;
@@ -91,8 +91,8 @@ int main(int argc, char **argv)
 		{
 			/* increment a = v[0] */
 			starpu_codelet cl_inc_a = {
-				.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON,
-				.core_func = core_codelet_incA,
+				.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON,
+				.cpu_func = cpu_codelet_incA,
 #ifdef USE_CUDA
 				.cuda_func = cuda_codelet_incA,
 #endif
@@ -127,8 +127,8 @@ int main(int argc, char **argv)
 		{
 			/* increment c = v[2] */
 			starpu_codelet cl_inc_c = {
-				.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON,
-				.core_func = core_codelet_incC,
+				.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON,
+				.cpu_func = cpu_codelet_incC,
 #ifdef USE_CUDA
 				.cuda_func = cuda_codelet_incC,
 #endif

+ 2 - 2
tests/datawizard/sync_with_data_with_mem.c

@@ -33,8 +33,8 @@ static void dummy_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 }
 
 static starpu_codelet cl = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_codelet,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_codelet,
 #ifdef USE_CUDA
 	.cuda_func = dummy_codelet,
 #endif

+ 2 - 2
tests/datawizard/sync_with_data_with_mem_non_blocking.c

@@ -33,8 +33,8 @@ static void dummy_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 }
 
 static starpu_codelet cl = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_codelet,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_codelet,
 #ifdef USE_CUDA
 	.cuda_func = dummy_codelet,
 #endif

+ 2 - 2
tests/datawizard/unpartition.c

@@ -32,8 +32,8 @@ static void dummy_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 }
 
 static starpu_codelet cl = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_codelet,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_codelet,
 #ifdef USE_CUDA
 	.cuda_func = dummy_codelet,
 #endif

+ 5 - 5
tests/datawizard/write_only_tmp_buffer.c

@@ -33,7 +33,7 @@ static void cuda_codelet_null(void *descr[], __attribute__ ((unused)) void *_arg
 }
 #endif
 
-static void core_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
+static void cpu_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
 {
 	char *buf = (char *)STARPU_GET_VECTOR_PTR(descr[0]);
 
@@ -51,8 +51,8 @@ static void display_var(void *descr[], __attribute__ ((unused)) void *_args)
 }
 
 static starpu_codelet cl = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = core_codelet_null,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = cpu_codelet_null,
 #ifdef USE_CUDA
 	.cuda_func = cuda_codelet_null,
 #endif
@@ -60,8 +60,8 @@ static starpu_codelet cl = {
 };
 
 static starpu_codelet display_cl = {
-	.where = STARPU_CORE,
-	.core_func = display_var,
+	.where = STARPU_CPU,
+	.cpu_func = display_var,
 	.nbuffers = 1
 };
 

+ 2 - 2
tests/errorcheck/invalid_blocking_calls.c

@@ -38,8 +38,8 @@ static void wrong_func(void *descr[], void *arg)
 
 static starpu_codelet wrong_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = wrong_func,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = wrong_func,
 	.cuda_func = wrong_func,
 	.model = NULL,
 	.nbuffers = 0

+ 1 - 1
tests/heat/speedup.gp

@@ -21,7 +21,7 @@ set term postscript eps enhanced color
 set output "speedup.eps"
 set title "Facto LU : impact of granularity"
 set xlabel "problem size (unknowns)"
-set ylabel "speedup ( 3 Cores + 1 GPU vs . 4 Cores )"
+set ylabel "speedup ( 3 Cpus + 1 GPU vs . 4 Cpus )"
 set yrange [0.5:2.5]
 plot "speedup.8" usi 1:($3/$2) with lines title "(8x8)",\
 	"speedup.16" usi 1:($3/$2) with lines title "(16x16)",\

+ 2 - 2
tests/helper/execute_on_all.c

@@ -32,9 +32,9 @@ int main(int argc, char **argv)
 
 	int arg = 0x42;
 
-	starpu_execute_on_each_worker(func, &arg, STARPU_CORE|STARPU_CUDA);
+	starpu_execute_on_each_worker(func, &arg, STARPU_CPU|STARPU_CUDA);
 
-	starpu_execute_on_each_worker(func, &arg, STARPU_CORE);
+	starpu_execute_on_each_worker(func, &arg, STARPU_CPU);
 	
 	starpu_execute_on_each_worker(func, &arg, STARPU_CUDA);
 

+ 2 - 2
tests/helper/starpu_create_sync_task.c

@@ -25,8 +25,8 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static starpu_codelet dummy_codelet =
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 	.nbuffers = 0
 };

+ 2 - 2
tests/microbenchs/async-tasks-overhead.c

@@ -35,8 +35,8 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 #ifdef USE_GORDON
 	.gordon_func = 0, /* this will be defined later */

+ 2 - 2
tests/microbenchs/prefetch_data_on_node.c

@@ -75,8 +75,8 @@ static starpu_access_mode select_random_mode(void)
 
 
 static starpu_codelet cl = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = codelet_null,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = codelet_null,
 	.cuda_func = codelet_null,
 	.nbuffers = 1
 };

+ 3 - 3
tests/microbenchs/redundant_buffer.c

@@ -31,13 +31,13 @@ static void cuda_codelet_null(void *descr[], __attribute__ ((unused)) void *_arg
 {
 }
 
-static void core_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
+static void cpu_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
 {
 }
 
 static starpu_codelet cl = {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = core_codelet_null,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = cpu_codelet_null,
 	.cuda_func = cuda_codelet_null,
 	.nbuffers = 2
 };

+ 2 - 2
tests/microbenchs/sync-tasks-overhead.c

@@ -28,8 +28,8 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA|STARPU_GORDON,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA|STARPU_GORDON,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 #ifdef USE_GORDON
 	.gordon_func = 0, /* this will be defined later */

+ 2 - 2
tests/microbenchs/tasks-overhead.c

@@ -35,8 +35,8 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
 static starpu_codelet dummy_codelet = 
 {
-	.where = STARPU_CORE|STARPU_CUDA,
-	.core_func = dummy_func,
+	.where = STARPU_CPU|STARPU_CUDA,
+	.cpu_func = dummy_func,
 	.cuda_func = dummy_func,
 	.model = NULL,
 	.nbuffers = 0

+ 0 - 0
tests/overlap/overlap.c


Some files were not shown because too many files changed in this diff