|
@@ -23,6 +23,9 @@
|
|
#include <core/debug.h>
|
|
#include <core/debug.h>
|
|
#include <core/topology.h>
|
|
#include <core/topology.h>
|
|
#include <drivers/cuda/driver_cuda.h>
|
|
#include <drivers/cuda/driver_cuda.h>
|
|
|
|
+#include <drivers/mic/driver_mic_source.h>
|
|
|
|
+#include <drivers/scc/driver_scc_source.h>
|
|
|
|
+#include <drivers/mp_common/source_common.h>
|
|
#include <drivers/opencl/driver_opencl.h>
|
|
#include <drivers/opencl/driver_opencl.h>
|
|
#include <profiling/profiling.h>
|
|
#include <profiling/profiling.h>
|
|
#include <common/uthash.h>
|
|
#include <common/uthash.h>
|
|
@@ -45,7 +48,7 @@
|
|
|
|
|
|
static unsigned topology_is_initialized = 0;
|
|
static unsigned topology_is_initialized = 0;
|
|
|
|
|
|
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
|
|
|
|
|
|
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
|
|
|
|
|
|
struct handle_entry
|
|
struct handle_entry
|
|
{
|
|
{
|
|
@@ -67,9 +70,9 @@ static unsigned may_bind_automatically = 0;
|
|
* Discover the topology of the machine
|
|
* Discover the topology of the machine
|
|
*/
|
|
*/
|
|
|
|
|
|
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
|
|
|
|
|
|
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
|
|
static void
|
|
static void
|
|
-_starpu_initialize_workers_gpuid (int *explicit_workers_gpuid,
|
|
|
|
|
|
+_starpu_initialize_workers_deviceid (int *explicit_workers_gpuid,
|
|
int *current, int *workers_gpuid,
|
|
int *current, int *workers_gpuid,
|
|
const char *varname, unsigned nhwgpus)
|
|
const char *varname, unsigned nhwgpus)
|
|
{
|
|
{
|
|
@@ -144,7 +147,8 @@ _starpu_initialize_workers_gpuid (int *explicit_workers_gpuid,
|
|
workers_gpuid[i] = (unsigned)(i % nhwgpus);
|
|
workers_gpuid[i] = (unsigned)(i % nhwgpus);
|
|
|
|
|
|
/* StarPU can use sampling techniques to bind threads
|
|
/* StarPU can use sampling techniques to bind threads
|
|
- * correctly */
|
|
|
|
|
|
+ * correctly
|
|
|
|
+ * TODO: use a private value for each kind of device */
|
|
may_bind_automatically = 1;
|
|
may_bind_automatically = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -157,7 +161,7 @@ _starpu_initialize_workers_cuda_gpuid (struct _starpu_machine_config *config)
|
|
struct starpu_machine_topology *topology = &config->topology;
|
|
struct starpu_machine_topology *topology = &config->topology;
|
|
struct starpu_conf *uconf = config->conf;
|
|
struct starpu_conf *uconf = config->conf;
|
|
|
|
|
|
- _starpu_initialize_workers_gpuid (
|
|
|
|
|
|
+ _starpu_initialize_workers_deviceid (
|
|
uconf->use_explicit_workers_cuda_gpuid == 0
|
|
uconf->use_explicit_workers_cuda_gpuid == 0
|
|
? NULL
|
|
? NULL
|
|
: (int *)uconf->workers_cuda_gpuid,
|
|
: (int *)uconf->workers_cuda_gpuid,
|
|
@@ -184,7 +188,7 @@ _starpu_initialize_workers_opencl_gpuid (struct _starpu_machine_config*config)
|
|
struct starpu_machine_topology *topology = &config->topology;
|
|
struct starpu_machine_topology *topology = &config->topology;
|
|
struct starpu_conf *uconf = config->conf;
|
|
struct starpu_conf *uconf = config->conf;
|
|
|
|
|
|
- _starpu_initialize_workers_gpuid(
|
|
|
|
|
|
+ _starpu_initialize_workers_deviceid(
|
|
uconf->use_explicit_workers_opencl_gpuid == 0
|
|
uconf->use_explicit_workers_opencl_gpuid == 0
|
|
? NULL
|
|
? NULL
|
|
: (int *)uconf->workers_opencl_gpuid,
|
|
: (int *)uconf->workers_opencl_gpuid,
|
|
@@ -258,6 +262,147 @@ _starpu_get_next_opencl_gpuid (struct _starpu_machine_config *config)
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
+#if 0
|
|
|
|
+#if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
|
|
|
|
+static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
|
|
|
|
+{
|
|
|
|
+ struct starpu_machine_topology *topology = &config->topology;
|
|
|
|
+ struct starpu_conf *uconf = config->conf;
|
|
|
|
+
|
|
|
|
+ _starpu_initialize_workers_deviceid(
|
|
|
|
+ uconf->use_explicit_workers_mic_deviceid == 0
|
|
|
|
+ ? NULL
|
|
|
|
+ : (int *)config->user_conf->workers_mic_deviceid,
|
|
|
|
+ &(config->current_mic_deviceid),
|
|
|
|
+ (int *)topology->workers_mic_deviceid,
|
|
|
|
+ "STARPU_WORKERS_MICID",
|
|
|
|
+ topology->nhwmiccores);
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+#ifdef STARPU_USE_SCC
|
|
|
|
+static void _starpu_initialize_workers_scc_deviceid(struct _starpu_machine_config *config)
|
|
|
|
+{
|
|
|
|
+ struct starpu_machine_topology *topology = &config->topology;
|
|
|
|
+ struct starpu_conf *uconf = config->conf;
|
|
|
|
+
|
|
|
|
+ _starpu_initialize_workers_deviceid(
|
|
|
|
+ uconf->use_explicit_workers_scc_deviceid == 0
|
|
|
|
+ ? NULL
|
|
|
|
+ : (int *) uconf->workers_scc_deviceid,
|
|
|
|
+ &(config->current_scc_deviceid),
|
|
|
|
+ (int *)topology->workers_scc_deviceid,
|
|
|
|
+ "STARPU_WORKERS_SCCID",
|
|
|
|
+ topology->nhwscc);
|
|
|
|
+}
|
|
|
|
+#endif /* STARPU_USE_SCC */
|
|
|
|
+
|
|
|
|
+#if 0
|
|
|
|
+#ifdef STARPU_USE_MIC
|
|
|
|
+static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
|
|
|
|
+{
|
|
|
|
+ unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
|
|
|
|
+
|
|
|
|
+ return (int)config->topology.workers_mic_deviceid[i];
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+#ifdef STARPU_USE_SCC
|
|
|
|
+static inline int _starpu_get_next_scc_deviceid(struct _starpu_machine_config *config)
|
|
|
|
+{
|
|
|
|
+ unsigned i = ((config->current_scc_deviceid++) % config->topology.nsccdevices);
|
|
|
|
+
|
|
|
|
+ return (int)config->topology.workers_scc_deviceid[i];
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+#ifdef STARPU_USE_MIC
|
|
|
|
+static void
|
|
|
|
+_starpu_init_mic_topology (struct _starpu_machine_config *config, long mic_idx)
|
|
|
|
+{
|
|
|
|
+ /* Discover the topology of the mic node identifier by MIC_IDX. That
|
|
|
|
+ * means, make this StarPU instance aware of the number of cores available
|
|
|
|
+ * on this MIC device. Update the `nhwmiccores' topology field
|
|
|
|
+ * accordingly. */
|
|
|
|
+
|
|
|
|
+ struct starpu_machine_topology *topology = &config->topology;
|
|
|
|
+
|
|
|
|
+ int nbcores;
|
|
|
|
+ _starpu_src_common_sink_nbcores (mic_nodes[mic_idx], &nbcores);
|
|
|
|
+ topology->nhwmiccores[mic_idx] = nbcores;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+static int
|
|
|
|
+_starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
|
|
|
|
+ COIENGINE *coi_handle, COIPROCESS *coi_process)
|
|
|
|
+{
|
|
|
|
+ /* Initialize the MIC node of index MIC_IDX. */
|
|
|
|
+
|
|
|
|
+ struct starpu_conf *user_conf = config->conf;
|
|
|
|
+
|
|
|
|
+ char ***argv = _starpu_get_argv();
|
|
|
|
+ const char *suffixes[] = {"-mic", "_mic", NULL};
|
|
|
|
+
|
|
|
|
+ /* Environment variables to send to the Sink, it informs it what kind
|
|
|
|
+ * of node it is (architecture and type) as there is no way to discover
|
|
|
|
+ * it itself */
|
|
|
|
+ char mic_idx_env[32];
|
|
|
|
+ sprintf(mic_idx_env, "DEVID=%d", mic_idx);
|
|
|
|
+
|
|
|
|
+ /* XXX: this is currently necessary so that the remote process does not
|
|
|
|
+ * segfault. */
|
|
|
|
+ char nb_mic_env[32];
|
|
|
|
+ sprintf(nb_mic_env, "NB_MIC=%d", 2);
|
|
|
|
+
|
|
|
|
+ const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", mic_idx_env, nb_mic_env, NULL};
|
|
|
|
+
|
|
|
|
+ char mic_sink_program_path[1024];
|
|
|
|
+ /* Let's get the helper program to run on the MIC device */
|
|
|
|
+ int mic_file_found =
|
|
|
|
+ _starpu_src_common_locate_file (mic_sink_program_path,
|
|
|
|
+ getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
|
|
|
|
+ getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
|
|
|
|
+ user_conf->mic_sink_program_path,
|
|
|
|
+ (argv ? (*argv)[0] : NULL),
|
|
|
|
+ suffixes);
|
|
|
|
+
|
|
|
|
+ if (0 != mic_file_found) {
|
|
|
|
+ fprintf(stderr, "No MIC program specified, use the environment"
|
|
|
|
+ "variable STARPU_MIC_SINK_PROGRAM_NAME or the environment"
|
|
|
|
+ "or the field 'starpu_conf.mic_sink_program_path'"
|
|
|
|
+ "to define it.\n");
|
|
|
|
+
|
|
|
|
+ return -1;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ COIRESULT res;
|
|
|
|
+ /* Let's get the handle which let us manage the remote MIC device */
|
|
|
|
+ res = COIEngineGetHandle(COI_ISA_MIC, mic_idx, coi_handle);
|
|
|
|
+ if (STARPU_UNLIKELY(res != COI_SUCCESS))
|
|
|
|
+ STARPU_MIC_SRC_REPORT_COI_ERROR(res);
|
|
|
|
+
|
|
|
|
+ /* We launch the helper on the MIC device, which will wait for us
|
|
|
|
+ * to give it work to do.
|
|
|
|
+ * As we will communicate further with the device throught scif we
|
|
|
|
+ * don't need to keep the process pointer */
|
|
|
|
+ res = COIProcessCreateFromFile(*coi_handle, mic_sink_program_path, 0, NULL, 0,
|
|
|
|
+ mic_sink_env, 1, NULL, 0, NULL,
|
|
|
|
+ coi_process);
|
|
|
|
+ if (STARPU_UNLIKELY(res != COI_SUCCESS))
|
|
|
|
+ STARPU_MIC_SRC_REPORT_COI_ERROR(res);
|
|
|
|
+
|
|
|
|
+ /* Let's create the node structure, we'll communicate with the peer
|
|
|
|
+ * through scif thanks to it */
|
|
|
|
+ mic_nodes[mic_idx] =
|
|
|
|
+ _starpu_mp_common_node_create(STARPU_MIC_SOURCE, mic_idx);
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
|
|
static void
|
|
static void
|
|
_starpu_init_topology (struct _starpu_machine_config *config)
|
|
_starpu_init_topology (struct _starpu_machine_config *config)
|
|
@@ -284,6 +429,9 @@ _starpu_init_topology (struct _starpu_machine_config *config)
|
|
_starpu_cpu_discover_devices(config);
|
|
_starpu_cpu_discover_devices(config);
|
|
_starpu_cuda_discover_devices(config);
|
|
_starpu_cuda_discover_devices(config);
|
|
_starpu_opencl_discover_devices(config);
|
|
_starpu_opencl_discover_devices(config);
|
|
|
|
+#ifdef STARPU_USE_SCC
|
|
|
|
+ config->topology.nhwscc = _starpu_scc_src_get_device_count();
|
|
|
|
+#endif
|
|
|
|
|
|
topology_is_initialized = 1;
|
|
topology_is_initialized = 1;
|
|
}
|
|
}
|
|
@@ -434,8 +582,137 @@ _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
|
|
return config->topology.nhwcpus;
|
|
return config->topology.nhwcpus;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+#ifdef STARPU_USE_MIC
|
|
|
|
+static void
|
|
|
|
+_starpu_init_mic_config (struct _starpu_machine_config *config,
|
|
|
|
+ struct starpu_conf *user_conf,
|
|
|
|
+ unsigned mic_idx)
|
|
|
|
+{
|
|
|
|
+ // Configure the MIC device of index MIC_IDX.
|
|
|
|
+
|
|
|
|
+ struct starpu_machine_topology *topology = &config->topology;
|
|
|
|
+
|
|
|
|
+ topology->nhwmiccores[mic_idx] = 0;
|
|
|
|
+
|
|
|
|
+ _starpu_init_mic_topology (config, mic_idx);
|
|
|
|
+
|
|
|
|
+ int nmiccores;
|
|
|
|
+ nmiccores = starpu_get_env_number("STARPU_NMIC");
|
|
|
|
+
|
|
|
|
+ /* STARPU_NMIC is not set. Did the user specify anything ? */
|
|
|
|
+ if (nmiccores == -1 && user_conf)
|
|
|
|
+ nmiccores = user_conf->nmic;
|
|
|
|
+
|
|
|
|
+ if (nmiccores != 0)
|
|
|
|
+ {
|
|
|
|
+ if (nmiccores == -1)
|
|
|
|
+ {
|
|
|
|
+ /* Nothing was specified, so let's use the number of
|
|
|
|
+ * detected mic cores. ! */
|
|
|
|
+ nmiccores = topology->nhwmiccores[mic_idx];
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
|
|
|
|
+ {
|
|
|
|
+ /* The user requires more MIC devices than there is available */
|
|
|
|
+ fprintf(stderr,
|
|
|
|
+ "# Warning: %d MIC devices requested. Only %d available.\n",
|
|
|
|
+ nmiccores, topology->nhwmiccores[mic_idx]);
|
|
|
|
+ nmiccores = topology->nhwmiccores[mic_idx];
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ topology->nmiccores[mic_idx] = nmiccores;
|
|
|
|
+ STARPU_ASSERT(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS);
|
|
|
|
+
|
|
|
|
+ /* _starpu_initialize_workers_mic_deviceid (config); */
|
|
|
|
+
|
|
|
|
+ unsigned miccore_id;
|
|
|
|
+ for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
|
|
|
|
+ {
|
|
|
|
+ int worker_idx = topology->nworkers + miccore_id;
|
|
|
|
+ enum starpu_perfmodel_archtype arch =
|
|
|
|
+ (enum starpu_perfmodel_archtype)((int)STARPU_MIC_DEFAULT + devid);
|
|
|
|
+ config->workers[worker_idx].arch = STARPU_MIC_WORKER;
|
|
|
|
+ config->workers[worker_idx].perf_arch = arch;
|
|
|
|
+ config->workers[worker_idx].mp_nodeid = mic_idx;
|
|
|
|
+ config->workers[worker_idx].devid = miccore_id;
|
|
|
|
+ config->workers[worker_idx].worker_mask = STARPU_MIC;
|
|
|
|
+ config->worker_mask |= STARPU_MIC;
|
|
|
|
+ _starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ topology->nworkers += topology->nmiccores[mic_idx];
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+#ifdef STARPU_USE_MIC
|
|
|
|
+static COIENGINE handles[2];
|
|
|
|
+static COIPROCESS process[2];
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+static void
|
|
|
|
+_starpu_init_mp_config (struct _starpu_machine_config *config,
|
|
|
|
+ struct starpu_conf *user_conf)
|
|
|
|
+{
|
|
|
|
+ /* Discover and configure the mp topology. That means:
|
|
|
|
+ * - discover the number of mp nodes;
|
|
|
|
+ * - initialize each discovered node;
|
|
|
|
+ * - discover the local topology (number of PUs/devices) of each node;
|
|
|
|
+ * - configure the workers accordingly.
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+ struct starpu_machine_topology *topology = &config->topology;
|
|
|
|
+
|
|
|
|
+ // We currently only support MIC at this level.
|
|
|
|
+#ifdef STARPU_USE_MIC
|
|
|
|
+
|
|
|
|
+ /* Discover and initialize the number of MIC nodes through the mp
|
|
|
|
+ * infrastructure. */
|
|
|
|
+ unsigned nhwmicdevices = _starpu_mic_src_get_device_count();
|
|
|
|
+
|
|
|
|
+ int reqmicdevices = starpu_get_env_number("STARPU_NMICDEVS");
|
|
|
|
+ if (-1 == reqmicdevices)
|
|
|
|
+ reqmicdevices = nhwmicdevices;
|
|
|
|
+
|
|
|
|
+ topology->nmicdevices = 0;
|
|
|
|
+ unsigned i;
|
|
|
|
+ for (i = 0; i < STARPU_MIN (nhwmicdevices, (unsigned) reqmicdevices); i++)
|
|
|
|
+ if (0 == _starpu_init_mic_node (config, i, &handles[i], &process[i]))
|
|
|
|
+ topology->nmicdevices++;
|
|
|
|
+
|
|
|
|
+ i = 0;
|
|
|
|
+ for (; i < topology->nmicdevices; i++)
|
|
|
|
+ _starpu_init_mic_config (config, user_conf, i);
|
|
|
|
+#endif
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void
|
|
|
|
+_starpu_deinit_mic_node (unsigned mic_idx)
|
|
|
|
+{
|
|
|
|
+ _starpu_mp_common_send_command(mic_nodes[mic_idx], STARPU_EXIT, NULL, 0);
|
|
|
|
+
|
|
|
|
+ COIProcessDestroy(process[mic_idx], -1, 0, NULL, NULL);
|
|
|
|
+
|
|
|
|
+ _starpu_mp_common_node_destroy(mic_nodes[mic_idx]);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void
|
|
|
|
+_starpu_deinit_mp_config (struct _starpu_machine_config *config)
|
|
|
|
+{
|
|
|
|
+ struct starpu_machine_topology *topology = &config->topology;
|
|
|
|
+ unsigned i;
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < topology->nmicdevices; i++)
|
|
|
|
+ _starpu_deinit_mic_node (i);
|
|
|
|
+ _starpu_mic_clear_kernels();
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+
|
|
static int
|
|
static int
|
|
-_starpu_init_machine_config (struct _starpu_machine_config *config)
|
|
|
|
|
|
+_starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_config)
|
|
{
|
|
{
|
|
int i;
|
|
int i;
|
|
for (i = 0; i < STARPU_NMAXWORKERS; i++)
|
|
for (i = 0; i < STARPU_NMAXWORKERS; i++)
|
|
@@ -498,6 +775,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
|
|
int devid = _starpu_get_next_cuda_gpuid(config);
|
|
int devid = _starpu_get_next_cuda_gpuid(config);
|
|
enum starpu_perfmodel_archtype arch =
|
|
enum starpu_perfmodel_archtype arch =
|
|
(enum starpu_perfmodel_archtype)((int)STARPU_CUDA_DEFAULT + devid);
|
|
(enum starpu_perfmodel_archtype)((int)STARPU_CUDA_DEFAULT + devid);
|
|
|
|
+ config->workers[worker_idx].mp_nodeid = -1;
|
|
config->workers[worker_idx].devid = devid;
|
|
config->workers[worker_idx].devid = devid;
|
|
config->workers[worker_idx].perf_arch = arch;
|
|
config->workers[worker_idx].perf_arch = arch;
|
|
config->workers[worker_idx].worker_mask = STARPU_CUDA;
|
|
config->workers[worker_idx].worker_mask = STARPU_CUDA;
|
|
@@ -572,6 +850,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
|
|
config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
|
|
config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
|
|
enum starpu_perfmodel_archtype arch =
|
|
enum starpu_perfmodel_archtype arch =
|
|
(enum starpu_perfmodel_archtype)((int)STARPU_OPENCL_DEFAULT + devid);
|
|
(enum starpu_perfmodel_archtype)((int)STARPU_OPENCL_DEFAULT + devid);
|
|
|
|
+ config->workers[worker_idx].mp_nodeid = -1;
|
|
config->workers[worker_idx].devid = devid;
|
|
config->workers[worker_idx].devid = devid;
|
|
config->workers[worker_idx].perf_arch = arch;
|
|
config->workers[worker_idx].perf_arch = arch;
|
|
config->workers[worker_idx].worker_mask = STARPU_OPENCL;
|
|
config->workers[worker_idx].worker_mask = STARPU_OPENCL;
|
|
@@ -582,6 +861,78 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
|
|
topology->nworkers += topology->nopenclgpus;
|
|
topology->nworkers += topology->nopenclgpus;
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
+#ifdef STARPU_USE_SCC
|
|
|
|
+ int nscc = config->conf->nscc;
|
|
|
|
+
|
|
|
|
+ unsigned nb_scc_nodes = _starpu_scc_src_get_device_count();
|
|
|
|
+
|
|
|
|
+ if (nscc != 0)
|
|
|
|
+ {
|
|
|
|
+ /* The user did not disable SCC. We need to count
|
|
|
|
+ * the number of devices */
|
|
|
|
+ int nb_devices = nb_scc_nodes;
|
|
|
|
+
|
|
|
|
+ if (nscc == -1)
|
|
|
|
+ {
|
|
|
|
+ /* Nothing was specified, so let's choose ! */
|
|
|
|
+ nscc = nb_devices;
|
|
|
|
+ if (nscc > STARPU_MAXSCCDEVS)
|
|
|
|
+ {
|
|
|
|
+ _STARPU_DISP("Warning: %d SCC devices available. Only %d enabled. Use configuration option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nb_devices, STARPU_MAXSCCDEVS);
|
|
|
|
+ nscc = STARPU_MAXSCCDEVS;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ /* Let's make sure this value is OK. */
|
|
|
|
+ if (nscc > nb_devices)
|
|
|
|
+ {
|
|
|
|
+ /* The user requires more SCC devices than there is available */
|
|
|
|
+ _STARPU_DISP("Warning: %d SCC devices requested. Only %d available.\n", nscc, nb_devices);
|
|
|
|
+ nscc = nb_devices;
|
|
|
|
+ }
|
|
|
|
+ /* Let's make sure this value is OK. */
|
|
|
|
+ if (nscc > STARPU_MAXSCCDEVS)
|
|
|
|
+ {
|
|
|
|
+ _STARPU_DISP("Warning: %d SCC devices requested. Only %d enabled. Use configure option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nscc, STARPU_MAXSCCDEVS);
|
|
|
|
+ nscc = STARPU_MAXSCCDEVS;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* Now we know how many SCC devices will be used */
|
|
|
|
+ topology->nsccdevices = nscc;
|
|
|
|
+ STARPU_ASSERT(topology->nsccdevices + topology->nworkers <= STARPU_NMAXWORKERS);
|
|
|
|
+
|
|
|
|
+ _starpu_initialize_workers_scc_deviceid(config);
|
|
|
|
+
|
|
|
|
+ unsigned sccdev;
|
|
|
|
+ for (sccdev = 0; sccdev < topology->nsccdevices; sccdev++)
|
|
|
|
+ {
|
|
|
|
+ config->workers[topology->nworkers + sccdev].arch = STARPU_SCC_WORKER;
|
|
|
|
+ int devid = _starpu_get_next_scc_deviceid(config);
|
|
|
|
+ enum starpu_perfmodel_archtype arch = (enum starpu_perfmodel_archtype)((int)STARPU_SCC_DEFAULT + devid);
|
|
|
|
+ config->workers[topology->nworkers + sccdev].mp_nodeid = -1;
|
|
|
|
+ config->workers[topology->nworkers + sccdev].devid = devid;
|
|
|
|
+ config->workers[topology->nworkers + sccdev].perf_arch = arch;
|
|
|
|
+ config->workers[topology->nworkers + sccdev].worker_mask = STARPU_SCC;
|
|
|
|
+ config->worker_mask |= STARPU_SCC;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for (; sccdev < nb_scc_nodes; ++sccdev)
|
|
|
|
+ _starpu_scc_exit_useless_node(sccdev);
|
|
|
|
+
|
|
|
|
+ topology->nworkers += topology->nsccdevices;
|
|
|
|
+#endif /* STARPU_USE_SCC */
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ /* Unless not requested, we need to complete configuration with the
|
|
|
|
+ * ones of the mp nodes. */
|
|
|
|
+#ifdef STARPU_USE_MIC
|
|
|
|
+ if (! no_mp_config)
|
|
|
|
+ _starpu_init_mp_config (config, config->conf);
|
|
|
|
+#endif
|
|
|
|
+
|
|
/* we put the CPU section after the accelerator : in case there was an
|
|
/* we put the CPU section after the accelerator : in case there was an
|
|
* accelerator found, we devote one cpu */
|
|
* accelerator found, we devote one cpu */
|
|
#if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
|
|
#if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
|
|
@@ -591,8 +942,15 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
|
|
{
|
|
{
|
|
if (ncpu == -1)
|
|
if (ncpu == -1)
|
|
{
|
|
{
|
|
- unsigned already_busy_cpus = topology->ncudagpus + topology->nopenclgpus;
|
|
|
|
- long avail_cpus = topology->nhwcpus - already_busy_cpus;
|
|
|
|
|
|
+ unsigned mic_busy_cpus = 0;
|
|
|
|
+ unsigned i = 0;
|
|
|
|
+ for (i = 0; i < STARPU_MAXMICDEVS; i++)
|
|
|
|
+ mic_busy_cpus += (topology->nmiccores[i] ? 1 : 0);
|
|
|
|
+
|
|
|
|
+ unsigned already_busy_cpus = mic_busy_cpus + topology->ncudagpus
|
|
|
|
+ + topology->nopenclgpus + topology->nsccdevices;
|
|
|
|
+
|
|
|
|
+ long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
|
|
if (avail_cpus < 0)
|
|
if (avail_cpus < 0)
|
|
avail_cpus = 0;
|
|
avail_cpus = 0;
|
|
ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
|
|
ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
|
|
@@ -617,6 +975,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
|
|
int worker_idx = topology->nworkers + cpu;
|
|
int worker_idx = topology->nworkers + cpu;
|
|
config->workers[worker_idx].arch = STARPU_CPU_WORKER;
|
|
config->workers[worker_idx].arch = STARPU_CPU_WORKER;
|
|
config->workers[worker_idx].perf_arch = STARPU_CPU_DEFAULT;
|
|
config->workers[worker_idx].perf_arch = STARPU_CPU_DEFAULT;
|
|
|
|
+ config->workers[worker_idx].mp_nodeid = -1;
|
|
config->workers[worker_idx].devid = cpu;
|
|
config->workers[worker_idx].devid = cpu;
|
|
config->workers[worker_idx].worker_mask = STARPU_CPU;
|
|
config->workers[worker_idx].worker_mask = STARPU_CPU;
|
|
config->worker_mask |= STARPU_CPU;
|
|
config->worker_mask |= STARPU_CPU;
|
|
@@ -745,7 +1104,7 @@ _starpu_bind_thread_on_cpus (
|
|
|
|
|
|
|
|
|
|
static void
|
|
static void
|
|
-_starpu_init_workers_binding (struct _starpu_machine_config *config)
|
|
|
|
|
|
+_starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_config)
|
|
{
|
|
{
|
|
/* launch one thread per CPU */
|
|
/* launch one thread per CPU */
|
|
unsigned ram_memory_node;
|
|
unsigned ram_memory_node;
|
|
@@ -770,6 +1129,21 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config)
|
|
* combinations in a matrix which we initialize here. */
|
|
* combinations in a matrix which we initialize here. */
|
|
_starpu_initialize_busid_matrix();
|
|
_starpu_initialize_busid_matrix();
|
|
|
|
|
|
|
|
+#ifdef STARPU_USE_MIC
|
|
|
|
+ /* Each MIC device has its own memory node. */
|
|
|
|
+ unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
|
|
|
|
+
|
|
|
|
+ // Register the memory nodes for the MIC devices.
|
|
|
|
+ if (! no_mp_config) {
|
|
|
|
+ unsigned i = 0;
|
|
|
|
+ for (i = 0; i < config->topology.nmicdevices; i++) {
|
|
|
|
+ mic_memory_nodes[i] = _starpu_memory_node_register (STARPU_MIC_RAM, i);
|
|
|
|
+ _starpu_register_bus(0, mic_memory_nodes[i]);
|
|
|
|
+ _starpu_register_bus(mic_memory_nodes[i], 0);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+#endif
|
|
|
|
+
|
|
unsigned worker;
|
|
unsigned worker;
|
|
for (worker = 0; worker < config->topology.nworkers; worker++)
|
|
for (worker = 0; worker < config->topology.nworkers; worker++)
|
|
{
|
|
{
|
|
@@ -852,6 +1226,38 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config)
|
|
break;
|
|
break;
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
+#ifdef STARPU_USE_MIC
|
|
|
|
+ case STARPU_MIC_WORKER:
|
|
|
|
+ //if (may_bind_automatically)
|
|
|
|
+ //{
|
|
|
|
+ // /* StarPU is allowed to bind threads automatically */
|
|
|
|
+ // preferred_binding = _starpu_get_mic_affinity_vector(workerarg->devid);
|
|
|
|
+ // npreferred = config->topology.nhwcpus;
|
|
|
|
+ //}
|
|
|
|
+ is_a_set_of_accelerators = 1;
|
|
|
|
+ memory_node = mic_memory_nodes[workerarg->mp_nodeid];
|
|
|
|
+ _starpu_memory_node_add_nworkers(memory_node);
|
|
|
|
+ /* memory_node = _starpu_memory_node_register(STARPU_MIC_RAM, workerarg->devid);*/
|
|
|
|
+
|
|
|
|
+ /* _starpu_register_bus(0, memory_node);
|
|
|
|
+ * _starpu_register_bus(memory_node, 0); */
|
|
|
|
+ break;
|
|
|
|
+#endif /* STARPU_USE_MIC */
|
|
|
|
+
|
|
|
|
+#ifdef STARPU_USE_SCC
|
|
|
|
+ case STARPU_SCC_WORKER:
|
|
|
|
+ {
|
|
|
|
+ /* Node 0 represents the SCC shared memory when we're on SCC. */
|
|
|
|
+ struct _starpu_memory_node_descr *descr = _starpu_memory_node_get_description();
|
|
|
|
+ descr->nodes[ram_memory_node] = STARPU_SCC_SHM;
|
|
|
|
+
|
|
|
|
+ is_a_set_of_accelerators = 0;
|
|
|
|
+ memory_node = ram_memory_node;
|
|
|
|
+ _starpu_memory_node_add_nworkers(memory_node);
|
|
|
|
+ }
|
|
|
|
+ break;
|
|
|
|
+#endif
|
|
|
|
+
|
|
default:
|
|
default:
|
|
STARPU_ABORT();
|
|
STARPU_ABORT();
|
|
}
|
|
}
|
|
@@ -902,18 +1308,18 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config)
|
|
|
|
|
|
|
|
|
|
int
|
|
int
|
|
-_starpu_build_topology (struct _starpu_machine_config *config)
|
|
|
|
|
|
+_starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
|
|
{
|
|
{
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
- ret = _starpu_init_machine_config(config);
|
|
|
|
|
|
+ ret = _starpu_init_machine_config(config, no_mp_config);
|
|
if (ret)
|
|
if (ret)
|
|
return ret;
|
|
return ret;
|
|
|
|
|
|
/* for the data management library */
|
|
/* for the data management library */
|
|
_starpu_memory_nodes_init();
|
|
_starpu_memory_nodes_init();
|
|
|
|
|
|
- _starpu_init_workers_binding(config);
|
|
|
|
|
|
+ _starpu_init_workers_binding(config, no_mp_config);
|
|
|
|
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
@@ -922,6 +1328,10 @@ void
|
|
_starpu_destroy_topology (
|
|
_starpu_destroy_topology (
|
|
struct _starpu_machine_config *config __attribute__ ((unused)))
|
|
struct _starpu_machine_config *config __attribute__ ((unused)))
|
|
{
|
|
{
|
|
|
|
+#ifdef STARPU_USE_MIC
|
|
|
|
+ _starpu_deinit_mp_config(config);
|
|
|
|
+#endif
|
|
|
|
+
|
|
/* cleanup StarPU internal data structures */
|
|
/* cleanup StarPU internal data structures */
|
|
_starpu_memory_nodes_deinit();
|
|
_starpu_memory_nodes_deinit();
|
|
|
|
|