Преглед изворни кода

- rename MAXNODES into STARPU_MAXNODES.
- automatically compute this value given the maximum number of devices.

Cédric Augonnet пре 15 година
родитељ
комит
1c9e738133

+ 14 - 0
configure.ac

@@ -384,6 +384,20 @@ AC_MSG_RESULT($nmaxbuffers)
 AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers], 
 		[how many buffers can be manipulated per task])
 
+# We have one memory node shared by all CPU workers, one node per GPU, and
+# currently the Cell driver is using the same memory node as the CPU.
+if test x$enable_cuda = xyes; then
+	# we could have used nmaxcudadev + 1, but this would certainly give an
+	# odd number.
+	maxnodes=`expr 2 \* $nmaxcudadev`
+else
+	maxnodes=1	
+fi
+AC_MSG_CHECKING(maximum number of memory nodes)
+AC_MSG_RESULT($maxnodes)
+AC_DEFINE_UNQUOTED(STARPU_MAXNODES, [$maxnodes],
+		[maximum number of memory nodes])
+
 
 AC_MSG_CHECKING(whether priorities should be enabled)
 AC_ARG_ENABLE(priority, [AS_HELP_STRING([--disable-priority],

+ 3 - 3
examples/pastix-wrappers/starpu-blas-wrapper.c

@@ -116,7 +116,7 @@ void divide_vector_in_blas_filter(starpu_filter *f, starpu_data_handle root_data
 	
 	if (n1 > 0)
 	{
-		for (node = 0; node < MAXNODES; node++)
+		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 			starpu_vector_interface_t *local = &root_data->children[child].interface[node].vector;
 	
@@ -132,7 +132,7 @@ void divide_vector_in_blas_filter(starpu_filter *f, starpu_data_handle root_data
 		child++;
 	}
 	
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		starpu_blas_interface_t *local = &root_data->children[child].interface[node].blas;
 
@@ -151,7 +151,7 @@ void divide_vector_in_blas_filter(starpu_filter *f, starpu_data_handle root_data
 
 	child++;
 
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		starpu_vector_interface_t *local = &root_data->children[child].interface[node].vector;
 

+ 0 - 1
src/Makefile.am

@@ -58,7 +58,6 @@ noinst_HEADERS = 						\
 	datawizard/write_back.h					\
 	datawizard/datastats.h					\
 	datawizard/memalloc.h					\
-	datawizard/data_parameters.h				\
 	datawizard/copy-driver.h				\
 	datawizard/coherency.h					\
 	datawizard/memory_nodes.h				\

+ 29 - 14
src/core/perfmodel/perfmodel_bus.c

@@ -22,7 +22,6 @@
 #include <common/config.h>
 #include <core/workers.h>
 #include <core/perfmodel/perfmodel.h>
-#include <datawizard/data_parameters.h>
 
 #define SIZE	(32*1024*1024*sizeof(char))
 #define NITER	128
@@ -35,8 +34,8 @@ struct cudadev_timing {
 	double timing_dtoh;
 };
 
-static double bandwith_matrix[MAXNODES][MAXNODES] = {{-1.0}};
-static double latency_matrix[MAXNODES][MAXNODES] = {{ -1.0}};
+static double bandwith_matrix[STARPU_MAXNODES][STARPU_MAXNODES] = {{-1.0}};
+static double latency_matrix[STARPU_MAXNODES][STARPU_MAXNODES] = {{ -1.0}};
 static unsigned was_benchmarked = 0;
 static int ncuda = 0;
 
@@ -45,10 +44,10 @@ static int affinity_matrix[STARPU_MAXCUDADEVS][MAXCPUS];
 /* Benchmarking the performance of the bus */
 
 #ifdef USE_CUDA
-static double cudadev_timing_htod[MAXNODES] = {0.0};
-static double cudadev_timing_dtoh[MAXNODES] = {0.0};
+static double cudadev_timing_htod[STARPU_MAXNODES] = {0.0};
+static double cudadev_timing_dtoh[STARPU_MAXNODES] = {0.0};
 
-static struct cudadev_timing cudadev_timing_per_cpu[MAXNODES][MAXCPUS];
+static struct cudadev_timing cudadev_timing_per_cpu[STARPU_MAXNODES][MAXCPUS];
 
 static void measure_bandwith_between_host_and_dev_on_cpu(int dev, int cpu)
 {
@@ -64,20 +63,36 @@ static void measure_bandwith_between_host_and_dev_on_cpu(int dev, int cpu)
 	/* hack to force the initialization */
 	cudaFree(0);
 
+	/* hack to avoid third party libs to rebind threads */
+	_starpu_bind_thread_on_cpu(config, cpu);
+
+
 	/* Allocate a buffer on the device */
 	unsigned char *d_buffer;
 	cudaMalloc((void **)&d_buffer, SIZE);
 	assert(d_buffer);
 
+	/* hack to avoid third party libs to rebind threads */
+	_starpu_bind_thread_on_cpu(config, cpu);
+
+
 	/* Allocate a buffer on the host */
 	unsigned char *h_buffer;
 	cudaHostAlloc((void **)&h_buffer, SIZE, 0); 
 	assert(h_buffer);
 
+	/* hack to avoid third party libs to rebind threads */
+	_starpu_bind_thread_on_cpu(config, cpu);
+
+
 	/* Fill them */
 	memset(h_buffer, 0, SIZE);
 	cudaMemset(d_buffer, 0, SIZE);
 
+	/* hack to avoid third party libs to rebind threads */
+	_starpu_bind_thread_on_cpu(config, cpu);
+
+
 	unsigned iter;
 	double timing;
 	struct timeval start;
@@ -369,9 +384,9 @@ static void load_bus_latency_file_content(void)
 	f = fopen(path, "r");
 	STARPU_ASSERT(f);
 
-	for (src = 0; src < MAXNODES; src++)
+	for (src = 0; src < STARPU_MAXNODES; src++)
 	{
-		for (dst = 0; dst < MAXNODES; dst++)
+		for (dst = 0; dst < STARPU_MAXNODES; dst++)
 		{
 			double latency;
 
@@ -405,9 +420,9 @@ static void write_bus_latency_file_content(void)
 		STARPU_ABORT();
 	}
 
-	for (src = 0; src < MAXNODES; src++)
+	for (src = 0; src < STARPU_MAXNODES; src++)
 	{
-		for (dst = 0; dst < MAXNODES; dst++)
+		for (dst = 0; dst < STARPU_MAXNODES; dst++)
 		{
 			double latency;
 
@@ -483,9 +498,9 @@ static void load_bus_bandwith_file_content(void)
 		STARPU_ABORT();
 	}
 
-	for (src = 0; src < MAXNODES; src++)
+	for (src = 0; src < STARPU_MAXNODES; src++)
 	{
-		for (dst = 0; dst < MAXNODES; dst++)
+		for (dst = 0; dst < STARPU_MAXNODES; dst++)
 		{
 			double bandwith;
 
@@ -515,9 +530,9 @@ static void write_bus_bandwith_file_content(void)
 	f = fopen(path, "w+");
 	STARPU_ASSERT(f);
 
-	for (src = 0; src < MAXNODES; src++)
+	for (src = 0; src < STARPU_MAXNODES; src++)
 	{
-		for (dst = 0; dst < MAXNODES; dst++)
+		for (dst = 0; dst < STARPU_MAXNODES; dst++)
 		{
 			double bandwith;
 			

+ 3 - 3
src/datawizard/coherency.h

@@ -32,8 +32,8 @@
 #include <common/timing.h>
 #include <common/fxt.h>
 #include <common/list.h>
+#include <common/config.h>
 
-#include <datawizard/data_parameters.h>
 #include <datawizard/data_request.h>
 #include <datawizard/interfaces/data_interface.h>
 #include <datawizard/progress.h>
@@ -87,10 +87,10 @@ struct starpu_data_state_t {
 	unsigned nchildren;
 
 	/* describe the state of the data in term of coherency */
-	local_data_state per_node[MAXNODES];
+	local_data_state per_node[STARPU_MAXNODES];
 
 	/* describe the actual data layout */
-	void *interface[MAXNODES];
+	void *interface[STARPU_MAXNODES];
 
 	struct data_interface_ops_t *ops;
 

+ 0 - 22
src/datawizard/data_parameters.h

@@ -1,22 +0,0 @@
-/*
- * StarPU
- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __DATA_PARAMETERS_H__
-#define __DATA_PARAMETERS_H__
-
-#define MAXNODES	6
-
-#endif // __DATA_PARAMETERS_H__

+ 8 - 8
src/datawizard/data_request.c

@@ -19,19 +19,19 @@
 #include <pthread.h>
 
 /* requests that have not been treated at all */
-static data_request_list_t data_requests[MAXNODES];
-static pthread_cond_t data_requests_list_cond[MAXNODES];
-static pthread_mutex_t data_requests_list_mutex[MAXNODES];
+static data_request_list_t data_requests[STARPU_MAXNODES];
+static pthread_cond_t data_requests_list_cond[STARPU_MAXNODES];
+static pthread_mutex_t data_requests_list_mutex[STARPU_MAXNODES];
 
 /* requests that are not terminated (eg. async transfers) */
-static data_request_list_t data_requests_pending[MAXNODES];
-static pthread_cond_t data_requests_pending_list_cond[MAXNODES];
-static pthread_mutex_t data_requests_pending_list_mutex[MAXNODES];
+static data_request_list_t data_requests_pending[STARPU_MAXNODES];
+static pthread_cond_t data_requests_pending_list_cond[STARPU_MAXNODES];
+static pthread_mutex_t data_requests_pending_list_mutex[STARPU_MAXNODES];
 
 void init_data_request_lists(void)
 {
 	unsigned i;
-	for (i = 0; i < MAXNODES; i++)
+	for (i = 0; i < STARPU_MAXNODES; i++)
 	{
 		data_requests[i] = data_request_list_new();
 		pthread_mutex_init(&data_requests_list_mutex[i], NULL);
@@ -46,7 +46,7 @@ void init_data_request_lists(void)
 void deinit_data_request_lists(void)
 {
 	unsigned i;
-	for (i = 0; i < MAXNODES; i++)
+	for (i = 0; i < STARPU_MAXNODES; i++)
 	{
 		pthread_cond_destroy(&data_requests_pending_list_cond[i]);
 		pthread_mutex_destroy(&data_requests_pending_list_mutex[i]);

+ 1 - 1
src/datawizard/data_request.h

@@ -48,7 +48,7 @@ LIST_TYPE(data_request,
 	int retval;
 
 	/* in case we have a chain of request (eg. for nvidia multi-GPU) */
-	struct data_request_s *next_req[MAXNODES];
+	struct data_request_s *next_req[STARPU_MAXNODES];
 	/* who should perform the next request ? */
 	unsigned next_req_count;
 

+ 12 - 13
src/datawizard/datastats.c

@@ -18,13 +18,12 @@
 #include <datawizard/datastats.h>
 #include <common/config.h>
 #include <starpu.h>
-#include <datawizard/data_parameters.h>
 
 /* measure the cache hit ratio for each node */
 
 #ifdef DATA_STATS
-static unsigned hit_cnt[MAXNODES];
-static unsigned miss_cnt[MAXNODES];
+static unsigned hit_cnt[STARPU_MAXNODES];
+static unsigned miss_cnt[STARPU_MAXNODES];
 #endif
 
 inline void msi_cache_hit(unsigned node __attribute__ ((unused)))
@@ -50,7 +49,7 @@ void display_msi_stats(void)
 
 	fprintf(stderr, "MSI cache stats :\n");
 
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		total_hit_cnt += hit_cnt[node];
 		total_miss_cnt += miss_cnt[node];
@@ -58,7 +57,7 @@ void display_msi_stats(void)
 
 	fprintf(stderr, "TOTAL MSI stats\thit %u (%2.2f \%%)\tmiss %u (%2.2f \%%)\n", total_hit_cnt, (100.0f*total_hit_cnt)/(total_hit_cnt+total_miss_cnt), total_miss_cnt, (100.0f*total_miss_cnt)/(total_hit_cnt+total_miss_cnt));
 
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		if (hit_cnt[node]+miss_cnt[node])
 		{
@@ -73,8 +72,8 @@ void display_msi_stats(void)
 /* measure the efficiency of our allocation cache */
 
 #ifdef DATA_STATS
-static unsigned alloc_cnt[MAXNODES];
-static unsigned alloc_cache_hit_cnt[MAXNODES];
+static unsigned alloc_cnt[STARPU_MAXNODES];
+static unsigned alloc_cache_hit_cnt[STARPU_MAXNODES];
 #endif
 
 inline void allocation_cache_hit(unsigned node __attribute__ ((unused)))
@@ -96,7 +95,7 @@ void display_alloc_cache_stats(void)
 #ifdef DATA_STATS
 	fprintf(stderr, "Allocation cache stats:\n");
 	unsigned node;
-	for (node = 0; node < MAXNODES; node++) 
+	for (node = 0; node < STARPU_MAXNODES; node++) 
 	{
 		if (alloc_cnt[node]) 
 		{
@@ -112,7 +111,7 @@ void display_alloc_cache_stats(void)
 /* measure the amount of data transfers between each pair of nodes */
 #ifdef DATA_STATS
 
-static size_t comm_ammount[MAXNODES][MAXNODES];
+static size_t comm_ammount[STARPU_MAXNODES][STARPU_MAXNODES];
 
 void display_comm_ammounts(void)
 {
@@ -120,16 +119,16 @@ void display_comm_ammounts(void)
 
 	unsigned long sum = 0;
 
-	for (dst = 0; dst < MAXNODES; dst++)
-	for (src = 0; src < MAXNODES; src++)
+	for (dst = 0; dst < STARPU_MAXNODES; dst++)
+	for (src = 0; src < STARPU_MAXNODES; src++)
 	{
 		sum += (unsigned long)comm_ammount[src][dst];
 	}
 
 	fprintf(stderr, "\nData transfers stats:\nTOTAL transfers %ld MB\n", sum/(1024*1024));
 
-	for (dst = 0; dst < MAXNODES; dst++)
-	for (src = dst + 1; src < MAXNODES; src++)
+	for (dst = 0; dst < STARPU_MAXNODES; dst++)
+	for (src = dst + 1; src < STARPU_MAXNODES; src++)
 	{
 		if (comm_ammount[src][dst])
 			fprintf(stderr, "\t%d <-> %d\t%ld MB\n\t\t%d -> %d\t%ld MB\n\t\t%d -> %d\t%ld MB\n",

+ 10 - 10
src/datawizard/hierarchy.c

@@ -23,7 +23,7 @@
 static void starpu_data_liberate_interfaces(starpu_data_handle handle)
 {
 	unsigned node;
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 		free(handle->interface[node]);
 }
 
@@ -33,7 +33,7 @@ void starpu_delete_data(starpu_data_handle handle)
 	unsigned node;
 
 	STARPU_ASSERT(handle);
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		local_data_state *local = &handle->per_node[node];
 
@@ -63,7 +63,7 @@ void register_new_data(starpu_data_handle handle, uint32_t home_node, uint32_t w
 	starpu_spin_lock(&handle->header_lock);
 
 	/* we assume that all nodes may use that data */
-	handle->nnodes = MAXNODES;
+	handle->nnodes = STARPU_MAXNODES;
 
 	/* there is no hierarchy yet */
 	handle->nchildren = 0;
@@ -75,7 +75,7 @@ void register_new_data(starpu_data_handle handle, uint32_t home_node, uint32_t w
 	/* that new data is invalid from all nodes perpective except for the
 	 * home node */
 	unsigned node;
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		if (node == home_node) {
 			/* this is the home node with the only valid copy */
@@ -207,7 +207,7 @@ void starpu_partition_data(starpu_data_handle initial_handle, starpu_filter *f)
 		starpu_spin_init(&children->header_lock);
 
 		unsigned node;
-		for (node = 0; node < MAXNODES; node++)
+		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 			children->per_node[node].state = 
 				initial_handle->per_node[node].state;
@@ -255,7 +255,7 @@ void starpu_unpartition_data(starpu_data_handle root_handle, uint32_t gathering_
 	 * for the gathering node, if we have some locally allocated data, we 
 	 * copy all the children (XXX this should not happen so we just do not
 	 * do anything since this is transparent ?) */
-	unsigned still_valid[MAXNODES];
+	unsigned still_valid[STARPU_MAXNODES];
 
 	/* we do 2 passes : the first pass determines wether the data is still
 	 * valid or not, the second pass is needed to choose between SHARED and
@@ -264,7 +264,7 @@ void starpu_unpartition_data(starpu_data_handle root_handle, uint32_t gathering_
 	unsigned nvalids = 0;
 
 	/* still valid ? */
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		/* until an issue is found the data is assumed to be valid */
 		unsigned isvalid = 1;
@@ -294,7 +294,7 @@ void starpu_unpartition_data(starpu_data_handle root_handle, uint32_t gathering_
 
 	cache_state newstate = (nvalids == 1)?OWNER:SHARED;
 
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		root_handle->per_node[node].state = 
 			still_valid[node]?newstate:INVALID;
@@ -340,7 +340,7 @@ starpu_data_handle starpu_data_state_create(struct data_interface_ops_t *interfa
 	size_t interfacesize = interface_ops->interface_size;
 
 	unsigned node;
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		handle->interface[node] = calloc(1, interfacesize);
 		STARPU_ASSERT(handle->interface[node]);
@@ -368,7 +368,7 @@ void starpu_data_create_children(starpu_data_handle handle,
 
 		size_t interfacesize = children_interface_ops->interface_size;
 
-		for (node = 0; node < MAXNODES; node++)
+		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 			handle_child->interface[node] = calloc(1, interfacesize);
 			STARPU_ASSERT(handle->children->interface[node]);

+ 1 - 1
src/datawizard/interfaces/bcsr_filters.c

@@ -54,7 +54,7 @@ void starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unused))
 		uint32_t ptr_offset = c*r*chunk*elemsize;
 
 		unsigned node;
-		for (node = 0; node < MAXNODES; node++)
+		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 			starpu_blas_interface_t *local =
 				starpu_data_get_interface_on_node(sub_handle, node);

+ 1 - 2
src/datawizard/interfaces/bcsr_interface.c

@@ -17,7 +17,6 @@
 #include <starpu.h>
 #include <common/config.h>
 
-#include <datawizard/data_parameters.h>
 #include <datawizard/coherency.h>
 #include <datawizard/copy-driver.h>
 #include <datawizard/hierarchy.h>
@@ -69,7 +68,7 @@ static void register_bcsr_handle(starpu_data_handle handle, uint32_t home_node,
 	starpu_bcsr_interface_t *bcsr_interface = interface;
 
 	unsigned node;
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		starpu_bcsr_interface_t *local_interface =
 			starpu_data_get_interface_on_node(handle, node);

+ 2 - 2
src/datawizard/interfaces/blas_filters.c

@@ -54,7 +54,7 @@ void starpu_block_filter_func(starpu_filter *f, starpu_data_handle root_handle)
 			starpu_data_get_child(root_handle, chunk);
 
 		unsigned node;
-		for (node = 0; node < MAXNODES; node++)
+		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 			starpu_blas_interface_t *local = 
 				starpu_data_get_interface_on_node(chunk_handle, node);
@@ -105,7 +105,7 @@ void starpu_vertical_block_filter_func(starpu_filter *f, starpu_data_handle root
 			starpu_data_get_child(root_handle, chunk);
 
 		unsigned node;
-		for (node = 0; node < MAXNODES; node++)
+		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 			starpu_blas_interface_t *local =
 				starpu_data_get_interface_on_node(chunk_handle, node);

+ 2 - 4
src/datawizard/interfaces/blas_interface.c

@@ -14,16 +14,14 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+#include <starpu.h>
 #include <common/config.h>
-#include <datawizard/data_parameters.h>
 #include <datawizard/coherency.h>
 #include <datawizard/copy-driver.h>
 #include <datawizard/hierarchy.h>
 
 #include <common/hash.h>
 
-#include <starpu.h>
-
 #ifdef USE_CUDA
 #include <cuda.h>
 #include <cuda_runtime.h>
@@ -101,7 +99,7 @@ static void register_blas_handle(starpu_data_handle handle, uint32_t home_node,
 	starpu_blas_interface_t *blas_interface = interface;
 
 	unsigned node;
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		starpu_blas_interface_t *local_interface =
 			starpu_data_get_interface_on_node(handle, node);

+ 1 - 2
src/datawizard/interfaces/block_interface.c

@@ -16,7 +16,6 @@
 
 #include <starpu.h>
 #include <common/config.h>
-#include <datawizard/data_parameters.h>
 #include <datawizard/coherency.h>
 #include <datawizard/copy-driver.h>
 #include <datawizard/hierarchy.h>
@@ -88,7 +87,7 @@ static void register_block_handle(starpu_data_handle handle, uint32_t home_node,
 	starpu_block_interface_t *block_interface = interface;
 
 	unsigned node;
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		starpu_block_interface_t *local_interface =
 			starpu_data_get_interface_on_node(handle, node);

+ 1 - 1
src/datawizard/interfaces/csr_filters.c

@@ -57,7 +57,7 @@ void starpu_vertical_block_filter_func_csr(starpu_filter *f, starpu_data_handle
 			starpu_data_get_child(root_handle, chunk);
 
 		unsigned node;
-		for (node = 0; node < MAXNODES; node++)
+		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 			starpu_csr_interface_t *local = 
 				starpu_data_get_interface_on_node(chunk_handle, node);

+ 2 - 2
src/datawizard/interfaces/csr_interface.c

@@ -15,7 +15,7 @@
  */
 
 #include <starpu.h>
-#include <datawizard/data_parameters.h>
+#include <common/config.h>
 #include <datawizard/coherency.h>
 #include <datawizard/copy-driver.h>
 #include <datawizard/hierarchy.h>
@@ -65,7 +65,7 @@ static void register_csr_handle(starpu_data_handle handle, uint32_t home_node, v
 	starpu_csr_interface_t *csr_interface = interface;
 
 	unsigned node;
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		starpu_csr_interface_t *local_interface =
 			starpu_data_get_interface_on_node(handle, node);

+ 0 - 1
src/datawizard/interfaces/data_interface.h

@@ -19,7 +19,6 @@
 
 #include <starpu.h>
 #include <common/config.h>
-#include <datawizard/data_parameters.h>
 
 #ifdef USE_GORDON
 /* to get the gordon_strideSize_t data structure from gordon */

+ 4 - 4
src/datawizard/interfaces/vector_filters.c

@@ -49,7 +49,7 @@ void starpu_block_filter_func_vector(starpu_filter *f, starpu_data_handle root_h
 			starpu_data_get_child(root_handle, chunk);
 
 		unsigned node;
-		for (node = 0; node < MAXNODES; node++)
+		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 			starpu_vector_interface_t *local =
 				starpu_data_get_interface_on_node(chunk_handle, node);
@@ -87,7 +87,7 @@ void starpu_divide_in_2_filter_func_vector(starpu_filter *f, starpu_data_handle
 		starpu_data_get_child(root_handle, 0);
 
 	unsigned node;
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		starpu_vector_interface_t *local =
 			starpu_data_get_interface_on_node(chunk0_handle, node);
@@ -106,7 +106,7 @@ void starpu_divide_in_2_filter_func_vector(starpu_filter *f, starpu_data_handle
 	starpu_data_handle chunk1_handle =
 		starpu_data_get_child(root_handle, 1);
 
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		starpu_vector_interface_t *local =
 			starpu_data_get_interface_on_node(chunk1_handle, node);
@@ -148,7 +148,7 @@ void starpu_list_filter_func_vector(starpu_filter *f, starpu_data_handle root_ha
 		uint32_t chunk_size = length_tab[chunk];
 
 		unsigned node;
-		for (node = 0; node < MAXNODES; node++)
+		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 			starpu_vector_interface_t *local =
 				starpu_data_get_interface_on_node(chunk_handle, node);

+ 2 - 4
src/datawizard/interfaces/vector_interface.c

@@ -14,16 +14,14 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+#include <starpu.h>
 #include <common/config.h>
-#include <datawizard/data_parameters.h>
 #include <datawizard/coherency.h>
 #include <datawizard/copy-driver.h>
 #include <datawizard/hierarchy.h>
 
 #include <common/hash.h>
 
-#include <starpu.h>
-
 #ifdef USE_CUDA
 #include <cuda.h>
 #endif
@@ -82,7 +80,7 @@ static void register_vector_handle(starpu_data_handle handle, uint32_t home_node
 	starpu_vector_interface_t *vector_interface = interface;
 
 	unsigned node;
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		starpu_vector_interface_t *local_interface = 
 			starpu_data_get_interface_on_node(handle, node);

+ 6 - 6
src/datawizard/memalloc.c

@@ -17,16 +17,16 @@
 #include "memalloc.h"
 #include <datawizard/footprint.h>
 
-static pthread_rwlock_t mc_rwlock[MAXNODES]; 
-static mem_chunk_list_t mc_list[MAXNODES];
-static mem_chunk_list_t mc_list_to_free[MAXNODES];
+static pthread_rwlock_t mc_rwlock[STARPU_MAXNODES]; 
+static mem_chunk_list_t mc_list[STARPU_MAXNODES];
+static mem_chunk_list_t mc_list_to_free[STARPU_MAXNODES];
 
 static size_t liberate_memory_on_node(mem_chunk_t mc, uint32_t node);
 
 void _starpu_init_mem_chunk_lists(void)
 {
 	unsigned i;
-	for (i = 0; i < MAXNODES; i++)
+	for (i = 0; i < STARPU_MAXNODES; i++)
 	{
 		pthread_rwlock_init(&mc_rwlock[i], NULL);
 		mc_list[i] = mem_chunk_list_new();
@@ -37,7 +37,7 @@ void _starpu_init_mem_chunk_lists(void)
 void _starpu_deinit_mem_chunk_lists(void)
 {
 	unsigned i;
-	for (i = 0; i < MAXNODES; i++)
+	for (i = 0; i < STARPU_MAXNODES; i++)
 	{
 		mem_chunk_list_delete(mc_list[i]);
 		mem_chunk_list_delete(mc_list_to_free[i]);
@@ -157,7 +157,7 @@ static void transfer_subtree_to_node(starpu_data_handle handle, unsigned src_nod
 
 			/* count the number of copies */
 			cnt = 0;
-			for (i = 0; i < MAXNODES; i++)
+			for (i = 0; i < STARPU_MAXNODES; i++)
 			{
 				if (handle->per_node[i].state == SHARED) {
 					cnt++; 

+ 1 - 1
src/datawizard/memory_nodes.c

@@ -34,7 +34,7 @@ void init_memory_nodes(void)
 	pthread_key_create(&memory_node_key, NULL);
 
 	unsigned i;
-	for (i = 0; i < MAXNODES; i++) 
+	for (i = 0; i < STARPU_MAXNODES; i++) 
 		descr.nodes[i] = UNUSED; 
 
 	_starpu_init_mem_chunk_lists();

+ 4 - 4
src/datawizard/memory_nodes.h

@@ -33,17 +33,17 @@ typedef enum {
 
 typedef struct {
 	unsigned nnodes;
-	node_kind nodes[MAXNODES];
+	node_kind nodes[STARPU_MAXNODES];
 
 	/* the list of queues that are attached to a given node */
 	// XXX 32 is set randomly !
 	// TODO move this 2 lists outside mem_node_descr
 	pthread_rwlock_t attached_queues_rwlock;
-	struct jobq_s *attached_queues_per_node[MAXNODES][32];
-	struct jobq_s *attached_queues_all[MAXNODES*32];
+	struct jobq_s *attached_queues_per_node[STARPU_MAXNODES][32];
+	struct jobq_s *attached_queues_all[STARPU_MAXNODES*32];
 	/* the number of queues attached to each node */
 	unsigned total_queues_count;
-	unsigned queues_count[MAXNODES];
+	unsigned queues_count[STARPU_MAXNODES];
 } mem_node_descr;
 
 void init_memory_nodes(void);

+ 1 - 1
src/datawizard/write_back.c

@@ -30,7 +30,7 @@ void write_through_data(starpu_data_handle handle, uint32_t requesting_node,
 
 	/* first commit all changes onto the nodes specified by the mask */
 	uint32_t node;
-	for (node = 0; node < MAXNODES; node++)
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		if (write_through_mask & (1<<node)) {
 			/* we need to commit the buffer on that node */