Browse Source

merge trunk@7061:7100

Nathalie Furmento 13 years ago
parent
commit
697756ee71
100 changed files with 498 additions and 290 deletions
  1. 11 3
      ChangeLog
  2. 3 1
      Makefile.am
  3. 22 6
      doc/chapters/advanced-api.texi
  4. 17 0
      doc/chapters/basic-api.texi
  5. 0 6
      doc/chapters/basic-examples.texi
  6. 67 4
      doc/chapters/mpi-support.texi
  7. 0 1
      doc/chapters/vector_scal_c.texi
  8. 1 2
      doc/chapters/vector_scal_cuda.texi
  9. 1 2
      doc/chapters/vector_scal_opencl.texi
  10. 0 1
      doc/tutorial/vector_scal.c
  11. 1 2
      doc/tutorial/vector_scal_cuda.cu
  12. 1 2
      doc/tutorial/vector_scal_opencl.c
  13. 1 0
      examples/Makefile.am
  14. 0 4
      examples/axpy/axpy.c
  15. 0 1
      examples/axpy/axpy_opencl.c
  16. 0 1
      examples/basic_examples/block.c
  17. 0 1
      examples/basic_examples/block_cuda.cu
  18. 0 1
      examples/basic_examples/block_opencl.c
  19. 0 3
      examples/basic_examples/multiformat.c
  20. 0 1
      examples/basic_examples/multiformat_conversion_codelets_cuda.cu
  21. 0 1
      examples/basic_examples/multiformat_conversion_codelets_opencl.c
  22. 0 1
      examples/basic_examples/multiformat_cuda.cu
  23. 0 1
      examples/basic_examples/multiformat_opencl.c
  24. 0 1
      examples/basic_examples/variable.c
  25. 1 2
      examples/basic_examples/variable_kernels.cu
  26. 1 2
      examples/basic_examples/variable_kernels_opencl.c
  27. 0 1
      examples/basic_examples/vector_scal.c
  28. 0 1
      examples/basic_examples/vector_scal_c.c
  29. 1 2
      examples/basic_examples/vector_scal_cuda.cu
  30. 1 2
      examples/basic_examples/vector_scal_opencl.c
  31. 0 1
      examples/binary/binary.c
  32. 0 3
      examples/cg/cg.h
  33. 1 2
      examples/cg/cg_dot_kernel.cu
  34. 1 2
      examples/cholesky/cholesky.h
  35. 3 6
      examples/cholesky/cholesky_kernels.c
  36. 0 1
      examples/cpp/incrementer_cpp.cpp
  37. 0 1
      examples/filters/custom_mf/conversion.cu
  38. 0 1
      examples/filters/custom_mf/conversion_opencl.c
  39. 0 1
      examples/filters/custom_mf/cuda.cu
  40. 0 4
      examples/filters/custom_mf/custom_interface.c
  41. 0 3
      examples/filters/custom_mf/custom_mf_filter.c
  42. 0 1
      examples/filters/custom_mf/custom_opencl.c
  43. 0 1
      examples/filters/fblock.c
  44. 1 2
      examples/filters/fblock_cuda.cu
  45. 1 2
      examples/filters/fblock_opencl.c
  46. 0 1
      examples/filters/shadow.c
  47. 0 1
      examples/filters/shadow2d.c
  48. 0 1
      examples/filters/shadow3d.c
  49. 0 3
      examples/gl_interop/gl_interop.c
  50. 2 4
      examples/gl_interop/gl_interop_idle.c
  51. 2 6
      examples/heat/dw_factolu.h
  52. 1 2
      examples/heat/dw_sparse_cg.h
  53. 1 2
      examples/heat/heat.h
  54. 0 1
      examples/incrementer/incrementer.c
  55. 2 3
      examples/incrementer/incrementer_kernels.cu
  56. 1 2
      examples/incrementer/incrementer_kernels_opencl.c
  57. 1 55
      examples/interface/complex.c
  58. 76 0
      examples/interface/complex_codelet.h
  59. 40 3
      examples/interface/complex_interface.c
  60. 4 0
      examples/interface/complex_interface.h
  61. 0 1
      examples/interface/complex_kernels.cu
  62. 0 1
      examples/interface/complex_kernels_opencl.c
  63. 0 2
      examples/lu/lu_example.c
  64. 0 5
      examples/lu/xlu.h
  65. 0 3
      examples/mandelbrot/mandelbrot.c
  66. 0 1
      examples/matvecmult/matvecmult.c
  67. 0 1
      examples/mult/xgemm.c
  68. 0 1
      examples/pi/SobolQRNG/sobol_gpu.cu
  69. 1 2
      examples/pi/pi.h
  70. 1 2
      examples/pi/pi_kernel.cu
  71. 0 2
      examples/pi/pi_redux.c
  72. 0 1
      examples/pi/pi_redux_kernel.cu
  73. 0 1
      examples/profiling/profiling.c
  74. 0 5
      examples/reductions/dot_product.c
  75. 1 2
      examples/reductions/dot_product_kernels.cu
  76. 1 9
      examples/spmv/spmv.h
  77. 1 2
      examples/spmv/spmv_cuda.cu
  78. 1 1
      examples/stencil/Makefile.am
  79. 0 1
      examples/stencil/life_cuda.cu
  80. 0 1
      examples/stencil/life_opencl.c
  81. 0 1
      examples/stencil/shadow.cu
  82. 0 1
      examples/stencil/shadow_opencl.c
  83. 0 5
      examples/stencil/stencil-kernels.c
  84. 1 5
      examples/stencil/stencil.h
  85. 0 1
      examples/top/hello_world_top.c
  86. 8 0
      include/starpu.h
  87. 25 0
      include/starpu_cublas.h
  88. 2 0
      include/starpu_data.h
  89. 10 1
      include/starpu_data_interfaces.h
  90. 1 3
      include/starpu_deprecated_api.h
  91. 2 0
      include/starpu_perfmodel.h
  92. 64 0
      include/starpu_task_util.h
  93. 0 32
      include/starpu_util.h
  94. 29 12
      mpi/Makefile.am
  95. 2 3
      mpi/examples/cholesky/mpi_cholesky_kernels.c
  96. 75 0
      mpi/examples/complex/mpi_complex.c
  97. 1 6
      mpi/examples/mpi_lu/pxlu.h
  98. 1 4
      mpi/examples/mpi_lu/pxlu_kernels.h
  99. 5 1
      mpi/starpu_mpi.h
  100. 0 0
      mpi/starpu_mpi.c

+ 11 - 3
ChangeLog

@@ -24,7 +24,6 @@ New features:
   * Performance models measurements can now be provided explicitly by
     applications.
   * Capability to emit communication statistics when running MPI code
-  * Add starpu_block_shadow_filter_func_vector and an example.
   * Add starpu_unregister_submit, starpu_data_acquire_on_node and
     starpu_data_invalidate_submit
   * New functionnality to wrapper starpu_insert_task to pass a array of
@@ -34,8 +33,6 @@ New features:
 Changes:
   * The FxT code can now be used on systems other than Linux.
   * Keep only one hashtable implementation common/uthash.h
-  * Add tag dependency in trace-generated DAG.
-  * Fix CPU binding for optimized CPU-GPU transfers.
   * The cache of starpu_mpi_insert_task is fixed and thus now enabled by
     default.
   * Standardize objects name in the performance model API
@@ -43,6 +40,17 @@ Changes:
 Small changes:
   * STARPU_NCPU should now be used instead of STARPU_NCPUS. STARPU_NCPUS is
 	still available for compatibility reasons.
+  * include/starpu.h includes all include/starpu_*.h files, applications
+	therefore only need to have #include <starpu.h>
+
+StarPU 1.0.2 (svn revision xxx)
+==============================================
+
+Changes:
+  * Add starpu_block_shadow_filter_func_vector and an example.
+  * Add tag dependency in trace-generated DAG.
+  * Fix CPU binding for optimized CPU-GPU transfers.
+  * Fix parallel tasks CPU binding and combined worker generation.
 
 StarPU 1.0.1 (svn revision 6659)
 ==============================================

+ 3 - 1
Makefile.am

@@ -53,6 +53,7 @@ versinclude_HEADERS = 				\
 	include/starpu_task.h			\
 	include/starpu_task_bundle.h		\
 	include/starpu_task_list.h		\
+	include/starpu_task_util.h		\
 	include/starpu_data.h			\
 	include/starpu_perfmodel.h		\
 	include/starpu_util.h			\
@@ -66,7 +67,8 @@ versinclude_HEADERS = 				\
 	include/starpu_top.h			\
 	include/starpu_deprecated_api.h         \
 	include/starpu_hash.h			\
-	include/starpu_rand.h
+	include/starpu_rand.h			\
+	include/starpu_cublas.h
 
 nodist_versinclude_HEADERS = 			\
 	include/starpu_config.h

+ 22 - 6
doc/chapters/advanced-api.texi

@@ -70,6 +70,18 @@ An identifier that is unique to each interface.
 @item @code{size_t interface_size}
 The size of the interface data descriptor.
 
+@item @code{int is_multiformat}
+todo
+
+@item @code{struct starpu_multiformat_data_interface_ops* (*get_mf_ops)(void *data_interface)}
+todo
+
+@item @code{int (*pack_data)(starpu_data_handle_t handle, uint32_t node, void **ptr)}
+Pack the data handle into a contiguous buffer at the address @code{ptr}
+
+@item @code{int (*unpack_data)(starpu_data_handle_t handle, uint32_t node, void *ptr)}
+Unpack the data handle from the contiguous buffer at the address @code{ptr}
+
 @end table
 @end deftp
 
@@ -815,14 +827,18 @@ static struct starpu_sched_policy dummy_sched_policy = @{
 @node Driver API
 @subsection Driver API
 
-@deftypefun int starpu_driver_init (struct starpu_driver *@var{d})
-Initialize the given driver. Returns 0 on success, -EINVAL if
-@code{d->type} is not a valid StarPU device type (STARPU_CPU_WORKER,
-STARPU_CUDA_WORKER or STARPU_OPENCL_WORKER).
+@deftypefun int starpu_driver_run ({struct starpu_driver *}@var{d})
+Initialize the given driver, run it until it receives a request to terminate,
+deinitialize it and return 0 on success. It returns -EINVAL if @code{d->type}
+is not a valid StarPU device type (STARPU_CPU_WORKER, STARPU_CUDA_WORKER or
+STARPU_OPENCL_WORKER). This is the same as using the following
+functions: calling @code{starpu_driver_init()}, then calling
+@code{starpu_driver_run_once()} in a loop, and eventually
+@code{starpu_driver_deinit()}.
 @end deftypefun
 
-@deftypefun int starpu_driver_run ({struct starpu_driver *}@var{d})
-Run the driver until it receives a request to terminate, then returns 0 on success, -EINVAL if
+@deftypefun int starpu_driver_init (struct starpu_driver *@var{d})
+Initialize the given driver. Returns 0 on success, -EINVAL if
 @code{d->type} is not a valid StarPU device type (STARPU_CPU_WORKER,
 STARPU_CUDA_WORKER or STARPU_OPENCL_WORKER).
 @end deftypefun

+ 17 - 0
doc/chapters/basic-api.texi

@@ -703,6 +703,23 @@ if @var{handle}'s interface does not have data allocated locally
 Return the unique identifier of the interface associated with the given @var{handle}.
 @end deftypefun
 
+@deftypefun size_t starpu_handle_get_size (starpu_data_handle_t @var{handle})
+Return the size of the data associated with @var{handle}
+@end deftypefun
+
+@deftypefun int starpu_handle_pack_data (starpu_data_handle_t @var{handle}, {void **}@var{ptr})
+Allocates a buffer large enough at @var{ptr} and copy to the newly
+allocated buffer the data associated to @var{handle}. The interface of
+the data registered at @var{handle} must define a packing operation
+(@pxref{struct starpu_data_interface_ops}).
+@end deftypefun
+
+@deftypefun int starpu_handle_unpack_data (starpu_data_handle_t @var{handle}, {void *}@var{ptr})
+Copy in @var{handle} the data located at @var{ptr} as described by the
+interface of the data. The interface registered at @var{handle} must
+define a unpacking operation (@pxref{struct starpu_data_interface_ops}).
+@end deftypefun
+
 @node Accessing Variable Data Interfaces
 @subsubsection Variable Data Interfaces
 

+ 0 - 6
doc/chapters/basic-examples.texi

@@ -438,9 +438,6 @@ in our C file like this:
 
 @cartouche
 @smallexample
-/* Include StarPU's OpenCL integration.  */
-#include <starpu_opencl.h>
-
 /* The OpenCL programs, loaded from `main' (see below).  */
 static struct starpu_opencl_program cl_programs;
 
@@ -536,7 +533,6 @@ the CUDA Kernel}).
    with `nvcc'.  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 #include <stdlib.h>
 
 static __global__ void
@@ -724,7 +720,6 @@ call.
 @cartouche
 @smallexample
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 static __global__ void vector_mult_cuda(float *val, unsigned n,
                                         float factor)
@@ -778,7 +773,6 @@ important when using partitioning, see @ref{Partitioning Data}.
 @cartouche
 @smallexample
 #include <starpu.h>
-@i{#include <starpu_opencl.h>}
 
 @i{extern struct starpu_opencl_program programs;}
 

+ 67 - 4
doc/chapters/mpi-support.texi

@@ -20,10 +20,11 @@ distributed application, by automatically issuing all required data transfers
 according to the task graph and an application-provided distribution.
 
 @menu
-* The API::
-* Simple Example::
-* MPI Insert Task Utility::
-* MPI Collective Operations::
+* The API::                     
+* Simple Example::              
+* Exchanging User Defined Data Interface::  
+* MPI Insert Task Utility::     
+* MPI Collective Operations::   
 @end menu
 
 @node The API
@@ -242,6 +243,68 @@ int main(int argc, char **argv)
 @end cartouche
 
 @page
+@node Exchanging User Defined Data Interface
+@section Exchanging User Defined Data Interface
+
+New data interfaces defined as explained in @ref{An example
+of data interface} can also be used within StarPU-MPI and exchanged
+between nodes. Two functions needs to be defined through
+the type @code{struct starpu_data_interface_ops} (@pxref{Data
+Interface API}). The pack function takes a handle and returns a
+contiguous memory buffer where data to be conveyed to another node
+should be copied. The reversed operation is implemented in the unpack
+function which takes a contiguous memory buffer and recreates the data
+handle.
+
+@cartouche
+@smallexample
+static int complex_pack_data(starpu_data_handle_t handle, uint32_t node, void **ptr)
+@{
+  STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+  struct starpu_complex_interface *complex_interface =
+    (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node);
+
+  *ptr = malloc(complex_get_size(handle));
+  memcpy(*ptr, complex_interface->real, complex_interface->nx*sizeof(double));
+  memcpy(*ptr+complex_interface->nx*sizeof(double), complex_interface->imaginary,
+         complex_interface->nx*sizeof(double));
+
+  return 0;
+@}
+@end smallexample
+@end cartouche
+
+@cartouche
+@smallexample
+static int complex_unpack_data(starpu_data_handle_t handle, uint32_t node, void *ptr)
+@{
+  STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+  struct starpu_complex_interface *complex_interface =
+    (struct starpu_complex_interface *)	starpu_data_get_interface_on_node(handle, node);
+
+  memcpy(complex_interface->real, ptr, complex_interface->nx*sizeof(double));
+  memcpy(complex_interface->imaginary, ptr+complex_interface->nx*sizeof(double),
+         complex_interface->nx*sizeof(double));
+
+  return 0;
+@}
+@end smallexample
+@end cartouche
+
+@cartouche
+@smallexample
+static struct starpu_data_interface_ops interface_complex_ops =
+@{
+  ...
+  .pack_data = complex_pack_data,
+  .unpack_data = complex_unpack_data
+@};
+@end smallexample
+@end cartouche
+
+@page
 @node MPI Insert Task Utility
 @section MPI Insert Task Utility
 

+ 0 - 1
doc/chapters/vector_scal_c.texi

@@ -14,7 +14,6 @@
  *  3- how a kernel can manipulate the data (buffers[0].vector.ptr)
  */
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 #define    NX    2048
 

+ 1 - 2
doc/chapters/vector_scal_cuda.texi

@@ -2,12 +2,11 @@
 
 @c This file is part of the StarPU Handbook.
 @c Copyright (C) 2009-2011  Université de Bordeaux 1
-@c Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+@c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
 @c See the file starpu.texi for copying conditions.
 
 @smallexample
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 static __global__ void vector_mult_cuda(float *val, unsigned n,
                                         float factor)

+ 1 - 2
doc/chapters/vector_scal_opencl.texi

@@ -2,12 +2,11 @@
 
 @c This file is part of the StarPU Handbook.
 @c Copyright (C) 2009-2011  Université de Bordeaux 1
-@c Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+@c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
 @c See the file starpu.texi for copying conditions.
 
 @smallexample
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 extern struct starpu_opencl_program programs;
 

+ 0 - 1
doc/tutorial/vector_scal.c

@@ -36,7 +36,6 @@
  *  3- how a kernel can manipulate the data (buffers[0].vector.ptr)
  */
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 #define    NX    2048
 

+ 1 - 2
doc/tutorial/vector_scal_cuda.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011  Université de Bordeaux 1
  *
  * Redistribution  and  use  in  source and binary forms, with or without
@@ -29,7 +29,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 static __global__ void vector_mult_cuda(float *val, unsigned n, float factor)
 {

+ 1 - 2
doc/tutorial/vector_scal_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011  Université de Bordeaux 1
  *
  * Redistribution  and  use  in  source and binary forms, with or without
@@ -29,7 +29,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 extern struct starpu_opencl_program programs;
 

+ 1 - 0
examples/Makefile.am

@@ -138,6 +138,7 @@ noinst_HEADERS = 				\
 	filters/custom_mf/custom_interface.h    \
 	filters/custom_mf/custom_types.h	\
 	interface/complex_interface.h		\
+	interface/complex_codelet.h		\
 	pi/pi.h					\
 	pi/SobolQRNG/sobol.h			\
 	pi/SobolQRNG/sobol_gold.h		\

+ 0 - 4
examples/axpy/axpy.c

@@ -27,10 +27,6 @@
 
 #ifdef STARPU_USE_CUDA
 #include <cublas.h>
-#include <starpu_cuda.h>
-#endif
-#ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
 #endif
 
 #include "axpy.h"

+ 0 - 1
examples/axpy/axpy_opencl.c

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 #include "axpy.h"
 
 extern struct starpu_opencl_program opencl_program;

+ 0 - 1
examples/basic_examples/block.c

@@ -16,7 +16,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 #include <pthread.h>
 #include <math.h>
 

+ 0 - 1
examples/basic_examples/block_cuda.cu

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 static __global__ void cuda_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier)
 {

+ 0 - 1
examples/basic_examples/block_opencl.c

@@ -16,7 +16,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       \
 do						    	    \

+ 0 - 3
examples/basic_examples/multiformat.c

@@ -15,9 +15,6 @@
  */
 
 #include <starpu.h>
-#ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
-#endif
 #include "multiformat_types.h"
 
 static int ncpu = 0;

+ 0 - 1
examples/basic_examples/multiformat_conversion_codelets_cuda.cu

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 #include "multiformat_types.h"
 
 static __global__ void cpu_to_cuda_cuda(struct point *src,

+ 0 - 1
examples/basic_examples/multiformat_conversion_codelets_opencl.c

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 extern struct starpu_opencl_program opencl_conversion_program;
 

+ 0 - 1
examples/basic_examples/multiformat_cuda.cu

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 #include "multiformat_types.h"
 
 static __global__ void multiformat_cuda(struct struct_of_arrays *soa, unsigned n)

+ 0 - 1
examples/basic_examples/multiformat_opencl.c

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 extern struct starpu_opencl_program opencl_program;
 

+ 0 - 1
examples/basic_examples/variable.c

@@ -29,7 +29,6 @@ extern void cuda_codelet(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
 
 #ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
 extern void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args);
 struct starpu_opencl_program opencl_program;
 #endif

+ 1 - 2
examples/basic_examples/variable_kernels.cu

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -16,7 +16,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 static __global__ void cuda_variable(float * tab)
 {

+ 1 - 2
examples/basic_examples/variable_kernels_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -16,7 +16,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 extern struct starpu_opencl_program opencl_program;
 void opencl_codelet(void *descr[], void *_args)

+ 0 - 1
examples/basic_examples/vector_scal.c

@@ -25,7 +25,6 @@
 
 #include <config.h>
 #include <starpu.h>
-#include <starpu_opencl.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>

+ 0 - 1
examples/basic_examples/vector_scal_c.c

@@ -26,7 +26,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 #include <stdio.h>
 
 

+ 1 - 2
examples/basic_examples/vector_scal_cuda.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -20,7 +20,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 static __global__ void vector_mult_cuda(float *val, unsigned n,
                                         float factor)

+ 1 - 2
examples/basic_examples/vector_scal_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010  Institut National de Recherche en Informatique et Automatique
  * Copyright (C) 2011  Université de Bordeaux 1
  *
@@ -21,7 +21,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 extern struct starpu_opencl_program opencl_program;
 

+ 0 - 1
examples/binary/binary.c

@@ -22,7 +22,6 @@
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
 
 #ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
 extern void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args);
 struct starpu_opencl_program opencl_program;
 #endif

+ 0 - 3
examples/cg/cg.h

@@ -24,11 +24,8 @@
 #ifdef STARPU_USE_CUDA
 #include <cuda.h>
 #include <cublas.h>
-#include <starpu_cuda.h>
 #endif
 
-#include <starpu.h>
-
 #define DOUBLE
 
 #ifdef DOUBLE

+ 1 - 2
examples/cg/cg_dot_kernel.cu

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -16,7 +16,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 #include "cg.h"
 

+ 1 - 2
examples/cholesky/cholesky.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -30,7 +30,6 @@
 
 #include <common/blas.h>
 #include <starpu.h>
-#include <starpu_bound.h>
 
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
 #define NMAXBLOCKS	32

+ 3 - 6
examples/cholesky/cholesky_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010, 2011-2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,16 +15,13 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
-#include <starpu_config.h>
+#include <starpu.h>
 #include "cholesky.h"
 #include "../common/blas.h"
-#ifdef STARPU_USE_CUDA
-#include <starpu_cuda.h>
-#ifdef STARPU_HAVE_MAGMA
+#if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA)
 #include "magma.h"
 #include "magma_lapack.h"
 #endif
-#endif
 
 /*
  *   U22 

+ 0 - 1
examples/cpp/incrementer_cpp.cpp

@@ -25,7 +25,6 @@ extern "C" void cuda_codelet(void *descr[], __attribute__ ((unused)) void *_args
 #endif
 
 #ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
 extern "C" void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args);
 struct starpu_opencl_program opencl_program;
 #endif

+ 0 - 1
examples/filters/custom_mf/conversion.cu

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 #include "custom_types.h"
 #include "custom_interface.h"
 

+ 0 - 1
examples/filters/custom_mf/conversion_opencl.c

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 #include "custom_types.h"
 #include "custom_interface.h"
 

+ 0 - 1
examples/filters/custom_mf/cuda.cu

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 #include "custom_types.h"
 #include "custom_interface.h"
 

+ 0 - 4
examples/filters/custom_mf/custom_interface.c

@@ -14,10 +14,6 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 #include <starpu.h>
-#include <starpu_hash.h>
-#ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
-#endif
 #include "custom_interface.h"
 #include "custom_types.h"
 

+ 0 - 3
examples/filters/custom_mf/custom_mf_filter.c

@@ -16,9 +16,6 @@
 #include <starpu.h>
 #include "custom_interface.h"
 #include "custom_types.h"
-#ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
-#endif /* !STARPU_USE_OPENCL */
 
 #define N 12
 

+ 0 - 1
examples/filters/custom_mf/custom_opencl.c

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 #include "custom_types.h"
 #include "custom_interface.h"
 

+ 0 - 1
examples/filters/fblock.c

@@ -16,7 +16,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 #define NX    5
 #define NY    4

+ 1 - 2
examples/filters/fblock_cuda.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 static __global__ void fblock_cuda(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor)
 {

+ 1 - 2
examples/filters/fblock_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -16,7 +16,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr)       \
 do                                                          \

+ 0 - 1
examples/filters/shadow.c

@@ -41,7 +41,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 /* Shadow width */
 #define SHADOW 2

+ 0 - 1
examples/filters/shadow2d.c

@@ -81,7 +81,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 /* Shadow width */
 #define SHADOWX 3

+ 0 - 1
examples/filters/shadow3d.c

@@ -26,7 +26,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 /* Shadow width */
 #define SHADOWX 2

+ 0 - 3
examples/gl_interop/gl_interop.c

@@ -27,9 +27,6 @@
 #include <starpu.h>
 #include <unistd.h>
 #include <GL/glut.h>
-#ifdef STARPU_USE_CUDA
-#include <starpu_cuda.h>
-#endif
 
 void dummy(void *buffers[], void *cl_arg)
 {

+ 2 - 4
examples/gl_interop/gl_interop_idle.c

@@ -30,9 +30,6 @@
 #include <starpu.h>
 #include <unistd.h>
 #include <GL/glut.h>
-#ifdef STARPU_USE_CUDA
-#include <starpu_cuda.h>
-#endif
 
 void dummy(void *buffers[], void *cl_arg)
 {
@@ -146,7 +143,8 @@ int main(int argc, char **argv)
 	glutIdleFunc(idle);
 	/* Now run the glut loop */
 	glutMainLoop();
-	starpu_driver_run(&drivers[0]);
+	/* And deinitialize driver */
+	starpu_driver_deinit(&drivers[0]);
 	printf("finished running the driver\n");
 
 	starpu_shutdown();

+ 2 - 6
examples/heat/dw_factolu.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,19 +22,15 @@
 #include <string.h>
 #include <math.h>
 #include <sys/time.h>
-/* for STARPU_USE_CUDA */
-#include <starpu_config.h>
+#include <starpu.h>
 #ifdef STARPU_USE_CUDA
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <cublas.h>
-#include <starpu_cuda.h>
 #endif
 
 #include "../common/blas.h"
 
-#include <starpu.h>
-
 #include "lu_kernels_model.h"
 
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)

+ 1 - 2
examples/heat/dw_sparse_cg.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,7 +28,6 @@
 #include <pthread.h>
 #include <signal.h>
 
-#include <starpu_config.h>
 #include <starpu.h>
 
 #ifdef STARPU_USE_CUDA

+ 1 - 2
examples/heat/heat.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010, 2011-2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,7 +25,6 @@
 #include <math.h>
 
 /* needed for STARPU_OPENGL_RENDER */
-#include <starpu_config.h>
 #include <starpu.h>
 
 #include <common/blas.h>

+ 0 - 1
examples/incrementer/incrementer.c

@@ -27,7 +27,6 @@ extern void cuda_codelet(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
 
 #ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
 extern void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args);
 struct starpu_opencl_program opencl_program;
 #endif

+ 2 - 3
examples/incrementer/incrementer_kernels.cu

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -16,13 +16,12 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 static __global__ void cuda_incrementer(float * tab)
 {
 	tab[0] = tab[0] + 1.0f;
 	tab[2] = tab[2] + 1.0f;
-	
+
 	return;
 }
 

+ 1 - 2
examples/incrementer/incrementer_kernels_opencl.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -16,7 +16,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 extern struct starpu_opencl_program opencl_program;
 void opencl_codelet(void *descr[], void *_args)

+ 1 - 55
examples/interface/complex.c

@@ -16,9 +16,7 @@
 
 #include <starpu.h>
 #include "complex_interface.h"
-#ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
-#endif
+#include "complex_codelet.h"
 
 #ifdef STARPU_USE_CUDA
 extern void copy_complex_codelet_cuda(void *descr[], __attribute__ ((unused)) void *_args);
@@ -27,52 +25,6 @@ extern void copy_complex_codelet_cuda(void *descr[], __attribute__ ((unused)) vo
 extern void copy_complex_codelet_opencl(void *buffers[], void *args);
 #endif
 
-void compare_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	int nx1 = STARPU_COMPLEX_GET_NX(descr[0]);
-	double *real1 = STARPU_COMPLEX_GET_REAL(descr[0]);
-	double *imaginary1 = STARPU_COMPLEX_GET_IMAGINARY(descr[0]);
-
-	int nx2 = STARPU_COMPLEX_GET_NX(descr[1]);
-	double *real2 = STARPU_COMPLEX_GET_REAL(descr[1]);
-	double *imaginary2 = STARPU_COMPLEX_GET_IMAGINARY(descr[1]);
-
-	int compare = (nx1 == nx2);
-	if (nx1 == nx2)
-	{
-		int i;
-		for(i=0 ; i<nx1 ; i++)
-		{
-			if (real1[i] != real2[i] || imaginary1[i] != imaginary2[i])
-			{
-				compare = 0;
-				break;
-			}
-		}
-	}
-	fprintf(stderr, "Complex numbers are%s similar\n", compare==0 ? " NOT" : "");
-}
-
-void display_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args)
-{
-	int nx = STARPU_COMPLEX_GET_NX(descr[0]);
-	double *real = STARPU_COMPLEX_GET_REAL(descr[0]);
-	double *imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[0]);
-	int i;
-
-	for(i=0 ; i<nx ; i++)
-	{
-		fprintf(stderr, "Complex[%d] = %3.2f + %3.2f i\n", i, real[i], imaginary[i]);
-	}
-}
-
-struct starpu_codelet cl_display =
-{
-	.cpu_funcs = {display_complex_codelet, NULL},
-	.nbuffers = 1,
-	.modes = {STARPU_R}
-};
-
 struct starpu_codelet cl_copy =
 {
 #ifdef STARPU_USE_CUDA
@@ -85,12 +37,6 @@ struct starpu_codelet cl_copy =
 	.modes = {STARPU_R, STARPU_W}
 };
 
-struct starpu_codelet cl_compare =
-{
-	.cpu_funcs = {compare_complex_codelet, NULL},
-	.nbuffers = 2,
-	.modes = {STARPU_R, STARPU_R}
-};
 
 #ifdef STARPU_USE_OPENCL
 struct starpu_opencl_program opencl_program;

+ 76 - 0
examples/interface/complex_codelet.h

@@ -0,0 +1,76 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include "complex_interface.h"
+
+#ifndef __COMPLEX_CODELET_H
+#define __COMPLEX_CODELET_H
+
+void compare_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args)
+{
+	int nx1 = STARPU_COMPLEX_GET_NX(descr[0]);
+	double *real1 = STARPU_COMPLEX_GET_REAL(descr[0]);
+	double *imaginary1 = STARPU_COMPLEX_GET_IMAGINARY(descr[0]);
+
+	int nx2 = STARPU_COMPLEX_GET_NX(descr[1]);
+	double *real2 = STARPU_COMPLEX_GET_REAL(descr[1]);
+	double *imaginary2 = STARPU_COMPLEX_GET_IMAGINARY(descr[1]);
+
+	int compare = (nx1 == nx2);
+	if (nx1 == nx2)
+	{
+		int i;
+		for(i=0 ; i<nx1 ; i++)
+		{
+			if (real1[i] != real2[i] || imaginary1[i] != imaginary2[i])
+			{
+				compare = 0;
+				break;
+			}
+		}
+	}
+	fprintf(stderr, "Complex numbers are%s similar\n", compare==0 ? " NOT" : "");
+}
+
+struct starpu_codelet cl_compare =
+{
+	.cpu_funcs = {compare_complex_codelet, NULL},
+	.nbuffers = 2,
+	.modes = {STARPU_R, STARPU_R}
+};
+
+void display_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args)
+{
+	int nx = STARPU_COMPLEX_GET_NX(descr[0]);
+	double *real = STARPU_COMPLEX_GET_REAL(descr[0]);
+	double *imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[0]);
+	int i;
+
+	for(i=0 ; i<nx ; i++)
+	{
+		fprintf(stderr, "Complex[%d] = %3.2f + %3.2f i\n", i, real[i], imaginary[i]);
+	}
+}
+
+struct starpu_codelet cl_display =
+{
+	.cpu_funcs = {display_complex_codelet, NULL},
+	.nbuffers = 1,
+	.modes = {STARPU_R}
+};
+
+#endif /* __COMPLEX_CODELET_H */

+ 40 - 3
examples/interface/complex_interface.c

@@ -15,9 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
-#include <starpu_opencl.h>
-#include <starpu_hash.h>
 
 #include "complex_interface.h"
 
@@ -164,6 +161,43 @@ static uint32_t complex_footprint(starpu_data_handle_t handle)
 	return starpu_crc32_be(starpu_complex_get_nx(handle), 0);
 }
 
+static void *complex_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
+{
+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
+		starpu_data_get_interface_on_node(handle, node);
+
+	return (void*) complex_interface->real;
+}
+
+static int complex_pack_data(starpu_data_handle_t handle, uint32_t node, void **ptr)
+{
+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
+		starpu_data_get_interface_on_node(handle, node);
+
+	*ptr = malloc(complex_get_size(handle));
+	memcpy(*ptr, complex_interface->real, complex_interface->nx*sizeof(double));
+	memcpy(*ptr+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double));
+
+	return 0;
+}
+
+static int complex_unpack_data(starpu_data_handle_t handle, uint32_t node, void *ptr)
+{
+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
+		starpu_data_get_interface_on_node(handle, node);
+
+	memcpy(complex_interface->real, ptr, complex_interface->nx*sizeof(double));
+	memcpy(complex_interface->imaginary, ptr+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double));
+
+	return 0;
+}
+
 #ifdef STARPU_USE_CUDA
 static int copy_cuda_async_sync(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, enum cudaMemcpyKind kind, cudaStream_t stream)
 {
@@ -310,6 +344,9 @@ static struct starpu_data_interface_ops interface_complex_ops =
 	.footprint = complex_footprint,
 	.interfaceid = -1,
 	.interface_size = sizeof(struct starpu_complex_interface),
+	.handle_to_pointer = complex_handle_to_pointer,
+	.pack_data = complex_pack_data,
+	.unpack_data = complex_unpack_data
 };
 
 void starpu_complex_data_register(starpu_data_handle_t *handleptr, uint32_t home_node, double *real, double *imaginary, int nx)

+ 4 - 0
examples/interface/complex_interface.h

@@ -16,6 +16,9 @@
 
 #include <starpu.h>
 
+#ifndef __COMPLEX_INTERFACE_H
+#define __COMPLEX_INTERFACE_H
+
 /* interface for complex numbers */
 struct starpu_complex_interface
 {
@@ -34,3 +37,4 @@ int starpu_complex_get_nx(starpu_data_handle_t handle);
 #define STARPU_COMPLEX_GET_IMAGINARY(interface)	(((struct starpu_complex_interface *)(interface))->imaginary)
 #define STARPU_COMPLEX_GET_NX(interface)	(((struct starpu_complex_interface *)(interface))->nx)
 
+#endif /* __COMPLEX_INTERFACE_H */

+ 0 - 1
examples/interface/complex_kernels.cu

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 #include "complex_interface.h"
 
 static __global__ void complex_copy_cuda(double *o_real, double *o_imaginary, double *i_real, double *i_imaginary, unsigned n)

+ 0 - 1
examples/interface/complex_kernels_opencl.c

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 #include "complex_interface.h"
 
 extern struct starpu_opencl_program opencl_program;

+ 0 - 2
examples/lu/lu_example.c

@@ -21,8 +21,6 @@
 #include <time.h>
 #include <math.h>
 #include <starpu.h>
-#include <starpu_profiling.h>
-#include <starpu_bound.h>
 #include "xlu.h"
 #include "xlu_kernels.h"
 

+ 0 - 5
examples/lu/xlu.h

@@ -19,12 +19,7 @@
 #define __XLU_H__
 
 #include <sys/time.h>
-
-/* for STARPU_USE_CUDA */
-#include <starpu_config.h>
 #include <starpu.h>
-#include <starpu_cuda.h>
-
 #include <common/blas.h>
 
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)

+ 0 - 3
examples/mandelbrot/mandelbrot.c

@@ -16,9 +16,6 @@
  */
 
 #include <starpu.h>
-#ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
-#endif
 #include <sys/time.h>
 #include <math.h>
 #include <limits.h>

+ 0 - 1
examples/matvecmult/matvecmult.c

@@ -16,7 +16,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_opencl.h>
 #include <pthread.h>
 #include <math.h>
 

+ 0 - 1
examples/mult/xgemm.c

@@ -28,7 +28,6 @@
 #ifdef STARPU_USE_CUDA
 #include <cuda.h>
 #include <cublas.h>
-#include <starpu_cuda.h>
 #endif
 
 static unsigned niter = 100;

+ 0 - 1
examples/pi/SobolQRNG/sobol_gpu.cu

@@ -40,7 +40,6 @@
 #include "sobol.h"
 #include "sobol_gpu.h"
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 #define k_2powneg32 2.3283064E-10F
 

+ 1 - 2
examples/pi/pi.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,7 +19,6 @@
 #define __PI_H__
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 #include <stdio.h>
 
 #define NSHOT_PER_TASK	(16*1024*1024ULL)

+ 1 - 2
examples/pi/pi_kernel.cu

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,7 +17,6 @@
 
 #include "SobolQRNG/sobol_gpu.h"
 #include "pi.h"
-#include <starpu_cuda.h>
 
 #define MAXNBLOCKS	128
 #define MAXTHREADSPERBLOCK	256

+ 0 - 2
examples/pi/pi_redux.c

@@ -17,7 +17,6 @@
 #include <starpu.h>
 #include <stdlib.h>
 #include <sys/time.h>
-#include <starpu_config.h>
 
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
 #define PI	3.14159265358979323846
@@ -29,7 +28,6 @@
 #ifdef STARPU_HAVE_CURAND
 #include <cuda.h>
 #include <curand.h>
-#include <starpu_cuda.h>
 #endif
 
 #define NSHOT_PER_TASK	(1024*1024)

+ 0 - 1
examples/pi/pi_redux_kernel.cu

@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 #define MAXNBLOCKS	128
 #define MAXTHREADSPERBLOCK	256

+ 0 - 1
examples/profiling/profiling.c

@@ -16,7 +16,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_profiling.h>
 #include <assert.h>
 #include <unistd.h>
 

+ 0 - 5
examples/reductions/dot_product.c

@@ -24,11 +24,6 @@
 #ifdef STARPU_USE_CUDA
 #include <cuda.h>
 #include <cublas.h>
-#include <starpu_cuda.h>
-#endif
-
-#ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
 #endif
 
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)

+ 1 - 2
examples/reductions/dot_product_kernels.cu

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,7 +15,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 #define DOT_TYPE double
 

+ 1 - 9
examples/spmv/spmv.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,14 +28,6 @@
 
 #include <starpu.h>
 
-#ifdef STARPU_USE_CUDA
-#include <starpu_cuda.h>
-#endif
-
-#ifdef STARPU_USE_OPENCL
-#include <starpu_opencl.h>
-#endif
-
 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
 
 #ifdef STARPU_USE_CUDA

+ 1 - 2
examples/spmv/spmv_cuda.cu

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -16,7 +16,6 @@
  */
 
 #include <starpu.h>
-#include <starpu_cuda.h>
 
 #define MIN(a,b)	((a)<(b)?(a):(b))
 

+ 1 - 1
examples/stencil/Makefile.am

@@ -20,7 +20,7 @@ AM_LDFLAGS = $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS)
 
 if USE_MPI
 LIBS += $(top_builddir)/mpi/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
-AM_CPPFLAGS += -I$(top_srcdir)/mpi/
+AM_CPPFLAGS += -I$(top_srcdir)/mpi/include
 endif
 
 CC = $(CC_OR_MPICC)

+ 0 - 1
examples/stencil/life_cuda.cu

@@ -16,7 +16,6 @@
 
 #define _externC extern "C"
 #include "stencil.h"
-#include <starpu_cuda.h>
 
 /* Heart of the stencil computation: compute a new state from an old one. */
 

+ 0 - 1
examples/stencil/life_opencl.c

@@ -21,7 +21,6 @@
 #include <stencil.h>
 #include <CL/cl.h>
 #include <starpu.h>
-#include <starpu_opencl.h>
 
 #define str(x) #x
 

+ 0 - 1
examples/stencil/shadow.cu

@@ -16,7 +16,6 @@
 
 #define _externC extern "C"
 #include "stencil.h"
-#include <starpu_cuda.h>
 
 /* Perform replication of data on X and Y edges, to fold the domain on 
    itself through mere replication of the source state. */

+ 0 - 1
examples/stencil/shadow_opencl.c

@@ -15,7 +15,6 @@
  */
 
 #include "stencil.h"
-#include <starpu_opencl.h>
 
 /* Perform replication of data on X and Y edges, to fold the domain on 
    itself through mere replication of the source state. */

+ 0 - 5
examples/stencil/stencil-kernels.c

@@ -18,11 +18,6 @@
 #include "stencil.h"
 #include <sys/time.h>
 
-#ifdef STARPU_USE_OPENCL
-#include <CL/cl.h>
-#include <starpu_opencl.h>
-#endif
-
 #ifndef timersub
 #define	timersub(x, y, res) \
 	do \

+ 1 - 5
examples/stencil/stencil.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010-2011  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -21,10 +21,6 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <starpu.h>
-#include <starpu_top.h>
-#ifdef STARPU_USE_CUDA
-#include <starpu_cuda.h>
-#endif
 
 #ifndef __CUDACC__
 #ifdef STARPU_USE_MPI

+ 0 - 1
examples/top/hello_world_top.c

@@ -30,7 +30,6 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <starpu.h>
-#include <starpu_top.h>
 #include <stdlib.h>
 #include <time.h>
 

+ 8 - 0
include/starpu.h

@@ -42,9 +42,17 @@ typedef unsigned long long uint64_t;
 #include <starpu_perfmodel.h>
 #include <starpu_task.h>
 #include <starpu_task_list.h>
+#include <starpu_task_util.h>
 #include <starpu_scheduler.h>
 #include <starpu_expert.h>
 #include <starpu_rand.h>
+#include <starpu_cuda.h>
+#include <starpu_cublas.h>
+#include <starpu_bound.h>
+#include <starpu_hash.h>
+#include <starpu_profiling.h>
+#include <starpu_top.h>
+#include <starpu_fxt.h>
 
 #ifdef __cplusplus
 extern "C"

+ 25 - 0
include/starpu_cublas.h

@@ -0,0 +1,25 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_CUBLAS_H__
+#define __STARPU_CUBLAS_H__
+
+/* Some helper functions for application using CUBLAS kernels */
+void starpu_helper_cublas_init(void);
+void starpu_helper_cublas_shutdown(void);
+
+#endif /* __STARPU_CUBLAS_H__ */

+ 2 - 0
include/starpu_data.h

@@ -134,6 +134,8 @@ int starpu_data_get_rank(starpu_data_handle_t handle);
 int starpu_data_set_tag(starpu_data_handle_t handle, int tag);
 int starpu_data_get_tag(starpu_data_handle_t handle);
 
+unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, uint32_t memory_node);
+
 #ifdef __cplusplus
 }
 #endif

+ 10 - 1
include/starpu_data_interfaces.h

@@ -123,7 +123,7 @@ struct starpu_data_interface_ops
 	void (*display)(starpu_data_handle_t handle, FILE *f);
 #ifdef STARPU_USE_GORDON
 	/* Convert the data size to the spu size format */
-	int (*convert_to_gordon)(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss); 
+	int (*convert_to_gordon)(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
 #endif
 	/* an identifier that is unique to each interface */
 	enum starpu_data_interface_id interfaceid;
@@ -132,6 +132,11 @@ struct starpu_data_interface_ops
 
 	int is_multiformat;
 	struct starpu_multiformat_data_interface_ops* (*get_mf_ops)(void *data_interface);
+
+	/* Pack the data handle into a contiguous buffer at the address ptr */
+	int (*pack_data)(starpu_data_handle_t handle, uint32_t node, void **ptr);
+	/* Unpack the data handle from the contiguous buffer at the address ptr */
+	int (*unpack_data)(starpu_data_handle_t handle, uint32_t node, void *ptr);
 };
 
 /* Return the next available id for a data interface */
@@ -376,6 +381,10 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handle, uint32_t hom
 
 enum starpu_data_interface_id starpu_handle_get_interface_id(starpu_data_handle_t handle);
 
+int starpu_handle_pack_data(starpu_data_handle_t handle, void **ptr);
+int starpu_handle_unpack_data(starpu_data_handle_t handle, void *ptr);
+size_t starpu_handle_get_size(starpu_data_handle_t handle);
+
 /* Lookup a ram pointer into a StarPU handle */
 extern starpu_data_handle_t starpu_data_lookup(const void *ptr);
 

+ 1 - 3
include/starpu_deprecated_api.h

@@ -39,13 +39,11 @@ typedef struct starpu_multiformat_interface starpu_multiformat_interface_t;
 #define starpu_buffer_descr_t starpu_buffer_descr
 #define starpu_history_list_t starpu_history_list
 #define starpu_regression_model_t starpu_regression_model
+#define starpu_per_arch_perfmodel_t starpu_per_arch_perfmodel
 #define starpu_perfmodel_t starpu_perfmodel
 #define starpu_sched_policy_s starpu_sched_policy
 #define starpu_data_interface_ops_t starpu_data_interface_ops
 
-#define starpu_per_arch_perfmodel_t starpu_per_arch_perfmodel
-#define starpu_per_arch_perfmodel starpu_perfmodel_per_arch
-
 typedef struct starpu_buffer_descr starpu_buffer_descr;
 typedef struct starpu_codelet starpu_codelet;
 typedef struct starpu_codelet starpu_codelet_t;

+ 2 - 0
include/starpu_perfmodel.h

@@ -141,6 +141,8 @@ struct starpu_perfmodel_regression_model
 
 struct starpu_perfmodel_history_table;
 
+#define starpu_per_arch_perfmodel starpu_perfmodel_per_arch STARPU_DEPRECATED
+
 struct starpu_perfmodel_per_arch
 {
 	double (*cost_model)(struct starpu_buffer_descr *t) STARPU_DEPRECATED; /* returns expected duration in µs */

+ 64 - 0
include/starpu_task_util.h

@@ -0,0 +1,64 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_TASK_UTIL_H__
+#define __STARPU_TASK_UTIL_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <starpu.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* This creates (and submits) an empty task that unlocks a tag once all its
+ * dependencies are fulfilled. */
+void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps,
+				void (*callback)(void *), void *callback_arg);
+
+/* Constants used by the starpu_insert_task helper to determine the different types of argument */
+#define STARPU_VALUE		(1<<4)	/* Pointer to a constant value */
+#define STARPU_CALLBACK		(1<<5)	/* Callback function */
+#define STARPU_CALLBACK_WITH_ARG	(1<<6)	/* Callback function */
+#define STARPU_CALLBACK_ARG	(1<<7)	/* Argument of the callback function (of type void *) */
+#define STARPU_PRIORITY		(1<<8)	/* Priority associated to the task */
+#define STARPU_EXECUTE_ON_NODE	(1<<9)	/* Used by MPI to define which task is going to execute the codelet */
+#define STARPU_EXECUTE_ON_DATA	(1<<10)	/* Used by MPI to define which task is going to execute the codelet */
+#define STARPU_HYPERVISOR_TAG	(1<<11)	/* Used to tag a task after whose execution we'll execute  a code */
+#define STARPU_HYPERVISOR_FLOPS	(1<<12)	/* Used to specify the number of flops needed to be executed by a task */
+#define STARPU_DATA_ARRAY       (1<<13) /* Array of data handles */
+
+/* Wrapper to create a task. */
+int starpu_insert_task(struct starpu_codelet *cl, ...);
+
+/* Retrieve the arguments of type STARPU_VALUE associated to a task
+ * automatically created using starpu_insert_task. */
+void starpu_codelet_unpack_args(void *cl_arg, ...);
+
+/* Pack arguments of type STARPU_VALUE into a buffer which can be
+ * given to a codelet and later unpacked with starpu_codelet_unpack_args */
+void starpu_codelet_pack_args(char **arg_buffer, size_t *arg_buffer_size, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __STARPU_TASK_UTIL_H__ */

+ 0 - 32
include/starpu_util.h

@@ -214,10 +214,6 @@ static __inline int starpu_get_env_number(const char *str)
 /* Add an event in the execution trace if FxT is enabled */
 void starpu_trace_user_event(unsigned long code);
 
-/* Some helper functions for application using CUBLAS kernels */
-void starpu_helper_cublas_init(void);
-void starpu_helper_cublas_shutdown(void);
-
 /* Call func(arg) on every worker matching the "where" mask (eg.
  * STARPU_CUDA|STARPU_CPU to execute the function on every CPU and every CUDA
  * device). This function is synchronous, but the different workers may execute
@@ -225,11 +221,6 @@ void starpu_helper_cublas_shutdown(void);
  * */
 void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where);
 
-/* This creates (and submits) an empty task that unlocks a tag once all its
- * dependencies are fulfilled. */
-void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps,
-				void (*callback)(void *), void *callback_arg);
-
 /* Copy the content of the src_handle into the dst_handle handle.  The
  * asynchronous parameter indicates whether the function should block or not.
  * In the case of an asynchronous call, it is possible to synchronize with the
@@ -239,29 +230,6 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
  * copied, and it is given the callback_arg pointer as argument.*/
 int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
 
-/* Constants used by the starpu_insert_task helper to determine the different types of argument */
-#define STARPU_VALUE		(1<<4)	/* Pointer to a constant value */
-#define STARPU_CALLBACK		(1<<5)	/* Callback function */
-#define STARPU_CALLBACK_WITH_ARG	(1<<6)	/* Callback function */
-#define STARPU_CALLBACK_ARG	(1<<7)	/* Argument of the callback function (of type void *) */
-#define STARPU_PRIORITY		(1<<8)	/* Priority associated to the task */
-#define STARPU_EXECUTE_ON_NODE	(1<<9)	/* Used by MPI to define which task is going to execute the codelet */
-#define STARPU_EXECUTE_ON_DATA	(1<<10)	/* Used by MPI to define which task is going to execute the codelet */
-#define STARPU_HYPERVISOR_TAG	(1<<11)	/* Used to tag a task after whose execution we'll execute  a code */
-#define STARPU_HYPERVISOR_FLOPS	(1<<12)	/* Used to specify the number of flops needed to be executed by a task */
-#define STARPU_DATA_ARRAY       (1<<13) /* Array of data handles */
-
-/* Wrapper to create a task. */
-int starpu_insert_task(struct starpu_codelet *cl, ...);
-
-/* Retrieve the arguments of type STARPU_VALUE associated to a task
- * automatically created using starpu_insert_task. */
-void starpu_codelet_unpack_args(void *cl_arg, ...);
-
-/* Pack arguments of type STARPU_VALUE into a buffer which can be
- * given to a codelet and later unpacked with starpu_codelet_unpack_args */
-void starpu_codelet_pack_args(char **arg_buffer, size_t *arg_buffer_size, ...);
-
 #ifdef __cplusplus
 }
 #endif

+ 29 - 12
mpi/Makefile.am

@@ -64,7 +64,7 @@ endif
 
 AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS)
 LIBS = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la @LIBS@ $(FXT_LIBS) $(MAGMA_LIBS)
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/mpi/ -I$(top_srcdir)/src/  -I$(top_srcdir)/examples/ -I$(top_builddir)/src -I$(top_builddir)/include
+AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/mpi/ -I$(top_srcdir)/src/  -I$(top_srcdir)/examples/ -I$(top_builddir)/src -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src
 AM_LDFLAGS = $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS)
 
 lib_LTLIBRARIES = libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
@@ -74,22 +74,22 @@ libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined
   -version-info $(LIBSTARPUMPI_INTERFACE_CURRENT):$(LIBSTARPUMPI_INTERFACE_REVISION):$(LIBSTARPUMPI_INTERFACE_AGE)
 
 noinst_HEADERS =					\
-	starpu_mpi_private.h				\
-	starpu_mpi_fxt.h				\
-	starpu_mpi_stats.h
+	src/starpu_mpi_private.h				\
+	src/starpu_mpi_fxt.h				\
+	src/starpu_mpi_stats.h				\
+	src/starpu_mpi_datatype.h
 
 versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)
 versinclude_HEADERS = 				\
-	starpu_mpi.h					\
-	starpu_mpi_datatype.h
+	include/starpu_mpi.h
 
 libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
-	starpu_mpi.c					\
-	starpu_mpi_helper.c				\
-	starpu_mpi_datatype.c				\
-	starpu_mpi_insert_task.c			\
-	starpu_mpi_collective.c				\
-	starpu_mpi_stats.c
+	src/starpu_mpi.c				\
+	src/starpu_mpi_helper.c				\
+	src/starpu_mpi_datatype.c			\
+	src/starpu_mpi_insert_task.c			\
+	src/starpu_mpi_collective.c			\
+	src/starpu_mpi_stats.c
 
 ###################
 # Stencil example #
@@ -203,6 +203,23 @@ examples_reduction_mpi_reduction_LDADD =	\
 check_PROGRAMS +=		\
 	examples/reduction/mpi_reduction
 
+###################
+# complex example #
+###################
+
+examplebin_PROGRAMS +=				\
+	examples/complex/mpi_complex
+
+examples_complex_mpi_complex_SOURCES =		\
+	examples/complex/mpi_complex.c		\
+	../examples/interface/complex_interface.c
+
+examples_complex_mpi_complex_LDADD =		\
+	libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
+
+check_PROGRAMS	+=	\
+	examples/complex/mpi_complex
+
 ########################
 # Unit testcases       #
 ########################

+ 2 - 3
mpi/examples/cholesky/mpi_cholesky_kernels.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010, 2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,14 +15,13 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
-#include <starpu_config.h>
+#include <starpu.h>
 #include "mpi_cholesky.h"
 #include "common/blas.h"
 #ifdef STARPU_USE_CUDA
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <cublas.h>
-#include <starpu_cuda.h>
 #ifdef STARPU_HAVE_MAGMA
 #include "magma.h"
 #include "magma_lapack.h"

+ 75 - 0
mpi/examples/complex/mpi_complex.c

@@ -0,0 +1,75 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include <interface/complex_interface.h>
+#include <interface/complex_codelet.h>
+
+int main(int argc, char **argv)
+{
+	int rank, nodes;
+	int ret;
+
+	ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+	starpu_mpi_initialize_extended(&rank, &nodes);
+
+	if (nodes < 2)
+	{
+		fprintf(stderr, "This program needs at least 2 nodes\n");
+		ret = 77;
+	}
+	else
+	{
+		if (rank == 0)
+		{
+			double real[2] = {4.0, 2.0};
+			double imaginary[2] = {7.0, 9.0};
+			starpu_data_handle_t handle;
+
+			double real2[2] = {14.0, 12.0};
+			double imaginary2[2] = {17.0, 19.0};
+			starpu_data_handle_t handle2;
+			MPI_Status status;
+
+			starpu_complex_data_register(&handle, 0, real, imaginary, 2);
+			starpu_insert_task(&cl_display, STARPU_R, handle, 0);
+			starpu_mpi_send(handle, 1, 10, MPI_COMM_WORLD);
+
+			starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2);
+			starpu_mpi_recv(handle2, 1, 11, MPI_COMM_WORLD, &status);
+			starpu_insert_task(&cl_display, STARPU_R, handle2, 0);
+			starpu_insert_task(&cl_compare, STARPU_R, handle, STARPU_R, handle2, 0);
+		}
+		else if (rank == 1)
+		{
+			double real[2] = {0.0, 0.0};
+			double imaginary[2] = {0.0, 0.0};
+			starpu_data_handle_t handle;
+			MPI_Status status;
+
+			starpu_complex_data_register(&handle, 0, real, imaginary, 2);
+			starpu_mpi_recv(handle, 0, 10, MPI_COMM_WORLD, &status);
+			starpu_insert_task(&cl_display, STARPU_R, handle, 0);
+			starpu_mpi_send(handle, 0, 11, MPI_COMM_WORLD);
+		}
+	}
+	starpu_task_wait_for_all();
+	starpu_mpi_shutdown();
+	starpu_shutdown();
+
+	return ret;
+}

+ 1 - 6
mpi/examples/mpi_lu/pxlu.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,13 +18,8 @@
 #ifndef __PXLU_H__
 #define __PXLU_H__
 
-/* for STARPU_USE_CUDA */
-#include <starpu_config.h>
 #include <starpu.h>
-#include <starpu_cuda.h>
-
 #include <common/blas.h>
-
 #include <starpu_mpi.h>
 
 #define BLAS3_FLOP(n1,n2,n3)    \

+ 1 - 4
mpi/examples/mpi_lu/pxlu_kernels.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2012  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,9 +19,6 @@
 #define __PXLU_KERNELS_H__
 
 #include <starpu.h>
-#ifdef STARPU_USE_CUDA
-#include <starpu_cuda.h>
-#endif
 
 #define str(s) #s
 #define xstr(s)        str(s)

+ 5 - 1
mpi/starpu_mpi.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,6 +19,9 @@
 #define __STARPU_MPI_H__
 
 #include <starpu.h>
+
+#if defined(STARPU_USE_MPI)
+
 #include <mpi.h>
 
 #ifdef __cplusplus
@@ -63,4 +66,5 @@ int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_
 }
 #endif
 
+#endif // STARPU_USE_MPI
 #endif // __STARPU_MPI_H__

+ 0 - 0
mpi/starpu_mpi.c


Some files were not shown because too many files changed in this diff