Browse Source

merge trunk

Nathalie Furmento 7 years ago
parent
commit
c415b96e18

+ 3 - 0
doc/doxygen/chapters/210_check_list_performance.doxy

@@ -13,6 +13,9 @@ TODO: improve!
 To achieve good
 performance, we give below a list of features which should be checked.
 
+For a start, you can use \ref OfflinePerformanceTools to get a Gantt chart which
+will show roughly where time is spent, and focus correspondingly.
+
 \section ConfigurationImprovePerformance Configuration That May Improve Performance
 
 The \ref enable-fast "--enable-fast" configuration option disables all

+ 3 - 0
doc/doxygen/chapters/380_offline_performance_tools.doxy

@@ -123,6 +123,9 @@ $ vite paje.trace
 To get names of tasks instead of "unknown", fill the optional
 starpu_codelet::name, or use a performance model for them.
 
+One can also introduce user-defined events in the diagram thanks to the
+starpu_fxt_trace_user_event_string() function.
+
 In the MPI execution case, \ref STARPU_GENERATE_TRACE will not work as expected
 (each node will try to generate paje.trace, thus mixing outputs...), you have to
 collect the trace files from the MPI nodes, and

+ 3 - 1
doc/doxygen/chapters/410_mpi_support.doxy

@@ -190,7 +190,9 @@ int main(int argc, char **argv)
 
 We have here replaced <c>MPI_Recv()</c> and <c>MPI_Send()</c> with starpu_mpi_irecv_detached()
 and starpu_mpi_isend_detached(), which just submit the communication to be
-performed. The only remaining synchronization with starpu_data_acquire() is at
+performed. The implicit sequential consistency dependencies provide
+synchronization between mpi reception and emission and the corresponding tasks.
+The only remaining synchronization with starpu_data_acquire() is at
 the beginning and the end.
 
 \section MPIInitialization How to Initialize StarPU-MPI

+ 1 - 1
mpi/examples/matrix_decomposition/mpi_decomposition_params.c

@@ -86,7 +86,7 @@ void parse_args(int argc, char **argv, int nodes)
 
                 if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
                 {
-                        printf("usage : %s [-display] [-size size] [-nblocks nblocks]\n", argv[0]);
+			printf("usage : %s [-size size] [-nblocks nblocks] [-no-prio] [-display]\n", argv[0]);
                 }
         }
 

+ 1 - 1
mpi/examples/mpi_lu/pxlu.c

@@ -555,8 +555,8 @@ static void create_task_21_recv(unsigned k, unsigned i)
 				tag_array[ndeps++] = TAG22(k-1, i, j);
 #else
 				tag_array[ndeps++] = TAG22(k-2, i, j);
-		}
 #endif
+		}
 	}
 
 	int source = get_block_rank(i, k);

+ 1 - 1
src/core/disk.c

@@ -243,7 +243,7 @@ int _starpu_disk_copy(unsigned node_src, void *obj_src, off_t offset_src, unsign
 	/* Something goes wrong with copy disk to disk... */
 	if (!event)
 	{
-		if (channel || (!channel && starpu_asynchronous_copy_disabled()))
+		if (channel || starpu_asynchronous_copy_disabled())
 			disk_register_list[node_src]->functions->copy = NULL;
 
 		/* perform a read, and after a write... */

+ 0 - 1
src/core/disk_ops/unistd/disk_unistd_global.c

@@ -949,7 +949,6 @@ int starpu_unistd_global_test_request(void *async_channel)
 		case STARPU_UNISTD_COPY :
 		{
 			return starpu_sem_trywait(&event->event.event_copy->finished) == 0;
-			break;
 		}
 #endif
 

+ 6 - 1
src/core/perfmodel/perfmodel_bus.c

@@ -626,7 +626,12 @@ static void measure_bandwidth_between_numa_nodes_and_dev(int dev, struct dev_tim
 #ifdef STARPU_HAVE_HWLOC
 		hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NODE, numa_id);
 
-		cpu_id = find_cpu_from_numa_node(obj);
+		if (obj)
+			cpu_id = find_cpu_from_numa_node(obj);
+		else
+                        /* No such NUMA node, probably hwloc 1.x with no NUMA
+                         * node, just take one CPU from the whole system */
+			cpu_id = find_cpu_from_numa_node(hwloc_get_root_obj(hwtopology));
 #endif
 
 #ifdef STARPU_USE_CUDA

+ 7 - 7
src/core/topology.c

@@ -1314,13 +1314,13 @@ _starpu_topology_count_ngpus(hwloc_obj_t obj)
 		n += _starpu_topology_count_ngpus(obj->children[i]);
 
 	data->ngpus = n;
-#ifdef STARPU_VERBOSE
-	{
-		char name[64];
-		hwloc_obj_type_snprintf(name, sizeof(name), obj, 0);
-		_STARPU_DEBUG("hwloc obj %s has %u GPUs below\n", name, n);
-	}
-#endif
+//#ifdef STARPU_VERBOSE
+//	{
+//		char name[64];
+//		hwloc_obj_type_snprintf(name, sizeof(name), obj, 0);
+//		_STARPU_DEBUG("hwloc obj %s has %u GPUs below\n", name, n);
+//	}
+//#endif
 	return n;
 }
 #endif

+ 1 - 1
src/datawizard/copy_driver.c

@@ -997,7 +997,7 @@ unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *as
 		break;
 	case STARPU_CPU_RAM:
 	default:
-		STARPU_ABORT_MSG("Memory is not recognized (kind %u) \n", kind);
+		STARPU_ABORT_MSG("Memory is not recognized (kind %d) \n", kind);
 	}
 
 	return success;

+ 4 - 4
src/debug/traces/starpu_fxt.c

@@ -741,7 +741,7 @@ static void worker_set_state(double time, const char *prefix, long unsigned int
 	worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, workerid);
 	poti_SetState(time, container, "WS", name);
 #else
-	fprintf(out_paje_file, "10	%.9f	%sw%lu	WS	%s\n", time, prefix, workerid, name);
+	fprintf(out_paje_file, "10	%.9f	%sw%lu	WS	\"%s\"\n", time, prefix, workerid, name);
 #endif
 }
 
@@ -1474,7 +1474,7 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 			worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, ev->param[2]);
 			poti_SetState(start_codelet_time, container, ctx, name);
 #else
-			fprintf(out_paje_file, "10	%.9f	%sw%"PRIu64"	Ctx%d	%s\n", start_codelet_time, prefix, ev->param[2], sched_ctx, name);
+			fprintf(out_paje_file, "10	%.9f	%sw%"PRIu64"	Ctx%d	\"%s\"\n", start_codelet_time, prefix, ev->param[2], sched_ctx, name);
 #endif
 		}
 	}
@@ -1636,7 +1636,7 @@ static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 			poti_SetState(last_codelet_start[worker], container, typectx, name);
 #endif
 #else
-			fprintf(out_paje_file, "21	%.9f	%sw%d	Ctx%u	%s	%ld	%s	%08lx	%016lx	%s%lu	%s%lu\n", last_codelet_start[worker], prefix, worker, sched_ctx, _starpu_last_codelet_symbol[worker], ev->param[1], parameters,  ev->param[2], ev->param[4], prefix, job_id, prefix, task->submit_order);
+			fprintf(out_paje_file, "21	%.9f	%sw%d	Ctx%u	\"%s\"	%ld	%s	%08lx	%016lx	%s%lu	%s%lu\n", last_codelet_start[worker], prefix, worker, sched_ctx, _starpu_last_codelet_symbol[worker], ev->param[1], parameters,  ev->param[2], ev->param[4], prefix, job_id, prefix, task->submit_order);
 #endif
 		}
 	}
@@ -2995,7 +2995,7 @@ static void handle_string_event(struct fxt_ev_64 *ev, const char *event, struct
 		snprintf(container, sizeof(container), "%sp", options->file_prefix);
 		poti_NewEvent(get_event_time_stamp(ev, options), container, "prog_event", event);
 #else
-		fprintf(out_paje_file, "9	%.9f	prog_event	%sp	%s\n", get_event_time_stamp(ev, options), options->file_prefix, event);
+		fprintf(out_paje_file, "9	%.9f	prog_event	%sp	\"%s\"\n", get_event_time_stamp(ev, options), options->file_prefix, event);
 #endif
 	}