Browse Source

finish r2995

Samuel Thibault 14 years ago
parent
commit
2c3c6b45a5

+ 3 - 0
include/starpu_perfmodel.h

@@ -112,6 +112,9 @@ void starpu_perfmodel_debugfilepath(struct starpu_perfmodel_t *model,
 void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch,
 		char *archname, size_t maxlen);
 int starpu_list_models(void);
+double starpu_task_expected_length(struct starpu_task *task, enum starpu_perf_archtype arch);
+double starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtype);
+double starpu_data_expected_penalty(uint32_t memory_node, struct starpu_task *task);
 
 void starpu_force_bus_sampling(void);
 

+ 13 - 0
include/starpu_scheduler.h

@@ -119,6 +119,19 @@ void starpu_sched_set_max_priority(int max_prio);
 
 int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[]);
 
+/* Return the current date */
+double starpu_timing_now(void);
+
+/* Check if the worker specified by workerid can execute the codelet. */
+int starpu_worker_may_execute_task(unsigned workerid, struct starpu_task *task);
+int starpu_combined_worker_may_execute_task(unsigned workerid, struct starpu_task *task);
+
+/* Whether STARPU_PREFETCH was set */
+int starpu_get_prefetch_flag(void);
+/* Prefetch data for a given task on a given node */
+int starpu_prefetch_task_input_on_node(struct starpu_task *task, uint32_t node);
+
+/* Initialize combined workers */
 void _starpu_sched_find_worker_combinations(struct starpu_machine_topology_s *topology);
 
 int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid);

+ 1 - 1
src/common/timing.c

@@ -185,7 +185,7 @@ double starpu_timing_timespec_to_us(struct timespec *ts)
 	return (1000000.0*ts->tv_sec) + (0.001*ts->tv_nsec);
 }
 
-double _starpu_timing_now(void)
+double starpu_timing_now(void)
 {
 	struct timespec now;
 	starpu_clock_gettime(&now);

+ 1 - 1
src/common/timing.h

@@ -30,7 +30,7 @@
 
 void _starpu_timing_init(void);
 void starpu_clock_gettime(struct timespec *ts);
-double _starpu_timing_now(void);
+double starpu_timing_now(void);
 
 #endif /* TIMING_H */
 

+ 4 - 4
src/core/perfmodel/perfmodel.c

@@ -91,7 +91,7 @@ static double per_arch_task_expected_length(struct starpu_perfmodel_t *model, en
  * Common model
  */
 
-double _starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtype)
+double starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtype)
 {
 	if (perf_archtype < STARPU_CUDA_DEFAULT)
 	{
@@ -123,7 +123,7 @@ static double common_task_expected_length(struct starpu_perfmodel_t *model, enum
 
 	if (model->cost_model) {
 		exp = model->cost_model(task->buffers);
-		alpha = _starpu_worker_get_relative_speedup(arch);
+		alpha = starpu_worker_get_relative_speedup(arch);
 
 		STARPU_ASSERT(alpha != 0.0f);
 
@@ -133,7 +133,7 @@ static double common_task_expected_length(struct starpu_perfmodel_t *model, enum
 	return -1.0;
 }
 
-double _starpu_task_expected_length(struct starpu_task *task, enum starpu_perf_archtype arch)
+double starpu_task_expected_length(struct starpu_task *task, enum starpu_perf_archtype arch)
 {
 	starpu_job_t j = _starpu_get_job_associated_to_task(task);
 	struct starpu_perfmodel_t *model = task->cl->model;
@@ -162,7 +162,7 @@ double _starpu_task_expected_length(struct starpu_task *task, enum starpu_perf_a
 }
 
 /* Data transfer performance modeling */
-double _starpu_data_expected_penalty(uint32_t memory_node, struct starpu_task *task)
+double starpu_data_expected_penalty(uint32_t memory_node, struct starpu_task *task)
 {
 	unsigned nbuffers = task->cl->nbuffers;
 	unsigned buffer;

+ 0 - 5
src/core/perfmodel/perfmodel.h

@@ -95,14 +95,11 @@ void _starpu_register_model(struct starpu_perfmodel_t *model);
 void _starpu_initialize_registered_performance_models(void);
 void _starpu_deinitialize_registered_performance_models(void);
 
-double _starpu_task_expected_length(struct starpu_task *task, enum starpu_perf_archtype arch);
 double _starpu_regression_based_job_expected_length(struct starpu_perfmodel_t *model,
 					enum starpu_perf_archtype arch, struct starpu_job_s *j);
 void _starpu_update_perfmodel_history(struct starpu_job_s *j, enum starpu_perf_archtype arch,
 				unsigned cpuid, double measured);
 
-double _starpu_data_expected_penalty(uint32_t memory_node, struct starpu_task *task);
-
 void _starpu_create_sampling_directory_if_needed(void);
 
 void _starpu_load_bus_performance_files(void);
@@ -111,8 +108,6 @@ double _starpu_predict_transfer_time(unsigned src_node, unsigned dst_node, size_
 void _starpu_set_calibrate_flag(unsigned val);
 unsigned _starpu_get_calibrate_flag(void);
 
-double _starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtype);
-
 enum starpu_perf_archtype starpu_worker_get_perf_archtype(int workerid);
 
 #if defined(STARPU_USE_CUDA)

+ 2 - 2
src/core/sched_policy.c

@@ -26,7 +26,7 @@ static struct starpu_sched_policy_s policy;
 
 static int use_prefetch = 0;
 
-int _starpu_get_prefetch_flag(void)
+int starpu_get_prefetch_flag(void)
 {
 	return use_prefetch;
 }
@@ -228,7 +228,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 	}
 
 	if (use_prefetch)
-		_starpu_prefetch_task_input_on_node(task, memory_node);
+		starpu_prefetch_task_input_on_node(task, memory_node);
 
 	if (is_basic_worker)
 	{

+ 0 - 2
src/core/sched_policy.h

@@ -28,8 +28,6 @@ struct starpu_sched_policy_s *_starpu_get_sched_policy(void);
 void _starpu_init_sched_policy(struct starpu_machine_config_s *config);
 void _starpu_deinit_sched_policy(struct starpu_machine_config_s *config);
 
-int _starpu_get_prefetch_flag(void);
-
 int _starpu_push_task(starpu_job_t task, unsigned job_is_already_locked);
 /* pop a task that can be executed on the worker */
 struct starpu_task *_starpu_pop_task(void);

+ 1 - 1
src/core/task.c

@@ -221,7 +221,7 @@ int starpu_task_submit(struct starpu_task *task)
 		/* In case we require that a task should be explicitely
 		 * executed on a specific worker, we make sure that the worker
 		 * is able to execute this task.  */
-		if (task->execute_on_a_specific_worker && !_starpu_combined_worker_may_execute_task(task->workerid, task)) {
+		if (task->execute_on_a_specific_worker && !starpu_combined_worker_may_execute_task(task->workerid, task)) {
                         _STARPU_LOG_OUT_TAG("ENODEV");
 			return -ENODEV;
                 }

+ 2 - 2
src/core/workers.c

@@ -66,7 +66,7 @@ inline uint32_t _starpu_may_submit_opencl_task(void)
 	return (STARPU_OPENCL & config.worker_mask);
 }
 
-int _starpu_worker_may_execute_task(unsigned workerid, struct starpu_task *task)
+int starpu_worker_may_execute_task(unsigned workerid, struct starpu_task *task)
 {
 	/* TODO: check that the task operand sizes will fit on that device */
 	/* TODO: call application-provided function for various cases like
@@ -76,7 +76,7 @@ int _starpu_worker_may_execute_task(unsigned workerid, struct starpu_task *task)
 
 
 
-int _starpu_combined_worker_may_execute_task(unsigned workerid, struct starpu_task *task)
+int starpu_combined_worker_may_execute_task(unsigned workerid, struct starpu_task *task)
 {
 	/* TODO: check that the task operand sizes will fit on that device */
 	/* TODO: call application-provided function for various cases like

+ 0 - 4
src/core/workers.h

@@ -172,10 +172,6 @@ uint32_t _starpu_may_submit_cpu_task(void);
 /* Is there a worker that can execute OpenCL code ? */
 uint32_t _starpu_may_submit_opencl_task(void);
 
-/* Check if the worker specified by workerid can execute the codelet. */
-int _starpu_worker_may_execute_task(unsigned workerid, struct starpu_task *task);
-int _starpu_combined_worker_may_execute_task(unsigned workerid, struct starpu_task *task);
-
 /* Check whether there is anything that the worker should do instead of
  * sleeping (waiting on something to happen). */
 unsigned _starpu_worker_can_block(unsigned memnode);

+ 1 - 1
src/datawizard/coherency.c

@@ -388,7 +388,7 @@ static void _starpu_set_data_requested_flag_if_needed(struct starpu_data_replica
 //	pthread_spin_unlock(&handle->header_lock);
 }
 
-int _starpu_prefetch_task_input_on_node(struct starpu_task *task, uint32_t node)
+int starpu_prefetch_task_input_on_node(struct starpu_task *task, uint32_t node)
 {
 	starpu_buffer_descr *descrs = task->buffers;
 	unsigned nbuffers = task->cl->nbuffers;

+ 0 - 2
src/datawizard/coherency.h

@@ -214,8 +214,6 @@ unsigned _starpu_is_data_present_or_requested(struct starpu_data_state_t *state,
 unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle handle, uint32_t memory_node);
 
 
-int _starpu_prefetch_task_input_on_node(struct starpu_task *task, uint32_t node);
-
 uint32_t _starpu_select_node_to_handle_request(uint32_t src_node, uint32_t dst_node);
 uint32_t _starpu_select_src_node(struct starpu_data_state_t *state);