|
@@ -44,8 +44,10 @@
|
|
|
|
|
|
//#define STARPU_MAXFPGADEVS 4
|
|
|
/* the number of FPGA devices */
|
|
|
-static unsigned nfpgafpgas = -1;
|
|
|
-static size_t global_mem[STARPU_MAXFPGADEVS] = { 128ULL*1024*1024*1024 };
|
|
|
+static unsigned nfpgafpgas;
|
|
|
+static size_t global_mem[STARPU_MAXFPGADEVS];
|
|
|
+static max_engine_t *engines[STARPU_MAXFPGADEVS];
|
|
|
+static fpga_mem current_address[STARPU_MAXFPGADEVS];
|
|
|
|
|
|
static void _starpu_fpga_limit_global_mem(unsigned );
|
|
|
static size_t _starpu_fpga_get_global_mem_size(unsigned devid);
|
|
@@ -55,39 +57,84 @@ void fpga_msg(char *msg)
|
|
|
printf(FPGA_OK "%s\n" NORMAL, msg);
|
|
|
}
|
|
|
|
|
|
-void _starpu_init_fpga()
|
|
|
+max_engine_t *starpu_fpga_get_local_engine(void)
|
|
|
{
|
|
|
- nfpgafpgas = starpu_get_env_number("STARPU_NUM_FPGA_FPGA");
|
|
|
- if(nfpgafpgas == -1)
|
|
|
- nfpgafpgas =1;
|
|
|
- STARPU_ASSERT( nfpgafpgas <= STARPU_MAXFPGADEVS);
|
|
|
+ int worker = starpu_worker_get_id_check();
|
|
|
+ int devid = starpu_worker_get_devid(worker);
|
|
|
|
|
|
- //LMemInterface addLMemInterface()
|
|
|
- //// pour récupérer l'accès à la LMem
|
|
|
+ return engines[devid];
|
|
|
}
|
|
|
|
|
|
-#if 0
|
|
|
-int fpga_allocate_memory(fpga_mem *ptr, size_t size)
|
|
|
+void _starpu_init_fpga()
|
|
|
{
|
|
|
- //This allocates BYTES
|
|
|
- char *msg1="You asked to allocate ";
|
|
|
- //printf(KCYN "%s%d*%d\n" KBLU, msg1,size,sizeof(unsigned));
|
|
|
- printf(FPGA_OK "%s%lu bytes\n" NORMAL, msg1,size);
|
|
|
-
|
|
|
- *ptr =(fpga_mem) malloc(size);
|
|
|
-
|
|
|
- if (*ptr == NULL)
|
|
|
- return 0;
|
|
|
- else
|
|
|
- return 1;
|
|
|
}
|
|
|
-#endif
|
|
|
|
|
|
void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
|
|
|
{
|
|
|
//TODO: This is statically assigned, in the next round of integration
|
|
|
// I will have to read from the struct fpga in fpga
|
|
|
- config->topology.nhwfpgafpgas = nfpgafpgas;
|
|
|
+ struct starpu_max_load *load = _starpu_config.conf.fpga_load;
|
|
|
+ const char *sim_socket = max_config_get_string(MAX_CONFIG_USE_SIMULATION);
|
|
|
+ int n;
|
|
|
+
|
|
|
+ n = starpu_get_env_number("STARPU_NUM_FPGA_FPGA");
|
|
|
+ if (n != -1)
|
|
|
+ {
|
|
|
+ config->topology.nhwfpgafpgas = nfpgafpgas = n;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!load)
|
|
|
+ {
|
|
|
+ /* Nothing specified, single-FPGA execution with basic static
|
|
|
+ * interface, file will be auto-loaded by SLiC. */
|
|
|
+ n = 1;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ struct starpu_max_load *cur, *star = NULL;
|
|
|
+ size_t nstar = 0;
|
|
|
+
|
|
|
+ /* First check if we have a star, we will want to subtract non-star loads from it */
|
|
|
+ for (cur = load; cur->engine_id_pattern; cur++)
|
|
|
+ if (!strcmp(cur->engine_id_pattern, "*"))
|
|
|
+ {
|
|
|
+ STARPU_ASSERT_MSG(!cur[1].file, "in starpu_max_load array, * pattern must be last");
|
|
|
+ star = cur;
|
|
|
+
|
|
|
+ if (sim_socket)
|
|
|
+ /* not specified, assume 1 */
|
|
|
+ nstar = 1;
|
|
|
+ else
|
|
|
+ nstar = max_count_engines_free(cur->file, cur->engine_id_pattern);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ n = 0;
|
|
|
+ /* Now check the non-star loads */
|
|
|
+ for (cur = load; cur != star && cur->engine_id_pattern; cur++)
|
|
|
+ {
|
|
|
+ size_t size;
|
|
|
+
|
|
|
+ size = max_count_engines_free(load->file, load->engine_id_pattern);
|
|
|
+ STARPU_ASSERT_MSG(size > 0, "cannot load starpu_max_load element %d on %s", (unsigned) (cur - load), load->engine_id_pattern);
|
|
|
+ /* One FPGA more to be used */
|
|
|
+ n++;
|
|
|
+
|
|
|
+ if (nstar)
|
|
|
+ {
|
|
|
+ size = max_count_engines_free(load->file, "*");
|
|
|
+ if (size > 1)
|
|
|
+ /* One of the star devices will be used to load this file */
|
|
|
+ nstar--;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ n += nstar;
|
|
|
+ }
|
|
|
+
|
|
|
+ //LMemInterface addLMemInterface()
|
|
|
+ //// pour récupérer l'accès à la LMem
|
|
|
+ config->topology.nhwfpgafpgas = nfpgafpgas = n;
|
|
|
}
|
|
|
|
|
|
unsigned _starpu_fpga_get_device_count(void)
|
|
@@ -112,22 +159,47 @@ static size_t _starpu_fpga_get_global_mem_size(unsigned devid)
|
|
|
|
|
|
static void init_fpga_worker_context(unsigned workerid)
|
|
|
{
|
|
|
- //starpu_fpgaStreamCreate(&streams[devid][i]);
|
|
|
}
|
|
|
|
|
|
static void init_device_context(unsigned devid)
|
|
|
{
|
|
|
- unsigned i;
|
|
|
- //TODO: starpu_fpga_set_device
|
|
|
- starpu_fpga_set_device(devid);
|
|
|
-
|
|
|
- //TODO: Do we need the streams? I think no
|
|
|
- //cures = starpu_fpgaStreamCreate(&in_transfer_streams[devid]);
|
|
|
- //cures = starpu_fpgaStreamCreate(&out_transfer_streams[devid]);
|
|
|
- for (i = 0; i < nfpgafpgas; i++)
|
|
|
- {
|
|
|
- //starpu_fpgaStreamCreate(&in_peer_transfer_streams[i][devid]);
|
|
|
- //starpu_fpgaStreamCreate(&out_peer_transfer_streams[devid][i]);
|
|
|
+ struct starpu_max_load *load = _starpu_config.conf.fpga_load;
|
|
|
+
|
|
|
+ /* 0 would be seen as NULL, i.e. allocation failed... */
|
|
|
+ // FIXME: Maxeler FPGAs want 192-byte alignment
|
|
|
+ current_address[devid] = (fpga_mem) (8192*192);
|
|
|
+ global_mem[devid] = 128ULL*1024*1024*1024;
|
|
|
+
|
|
|
+ _starpu_fpga_limit_global_mem(devid);
|
|
|
+
|
|
|
+ if (!load) {
|
|
|
+ /* Nothing specified, single-FPGA execution with basic static
|
|
|
+ * interface, file will be auto-loaded by SLiC. */
|
|
|
+ return;
|
|
|
+ } else {
|
|
|
+ unsigned n;
|
|
|
+
|
|
|
+ /* Which load we shall use */
|
|
|
+ for (n = 0; load->file; load++)
|
|
|
+ {
|
|
|
+ if (!strcmp(load->engine_id_pattern, "*"))
|
|
|
+ break;
|
|
|
+ if (n == devid)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ STARPU_ASSERT(load->file);
|
|
|
+
|
|
|
+ if (!strcmp(load->engine_id_pattern, "*"))
|
|
|
+ {
|
|
|
+ char s[32];
|
|
|
+ snprintf(s, sizeof(s), "local:%u", (unsigned) devid);
|
|
|
+ /* FIXME: this assumes that the loads are in-order.
|
|
|
+ * Ideally we'd detect which ones had an explicit load */
|
|
|
+ engines[n] = max_load(load->file, load->engine_id_pattern);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ engines[n] = max_load(load->file, load->engine_id_pattern);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -141,6 +213,9 @@ int _starpu_fpga_driver_init(struct _starpu_worker *worker)
|
|
|
if (worker->memory_node != STARPU_MAIN_RAM)
|
|
|
_starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_fpga_get_global_mem_size(worker->devid));
|
|
|
|
|
|
+ // TODO: multiple fpga in same thread
|
|
|
+ init_device_context(devid);
|
|
|
+
|
|
|
snprintf(worker->name, sizeof(worker->name), "FPGA %d", devid);
|
|
|
snprintf(worker->short_name, sizeof(worker->short_name), "FPGA %d", devid);
|
|
|
starpu_pthread_setname(worker->short_name);
|
|
@@ -351,20 +426,16 @@ uintptr_t _starpu_fpga_allocate_memory(unsigned dst_node, size_t size, int flags
|
|
|
{
|
|
|
(void) flags;
|
|
|
unsigned devid = starpu_memory_node_get_devid(dst_node);
|
|
|
- STARPU_ASSERT(devid == 0); // For now
|
|
|
|
|
|
- /* 0 would be seen as NULL, i.e. allocation failed... */
|
|
|
- // FIXME: Maxeler FPGAs want 192-byte alignment
|
|
|
- static fpga_mem current_address = 8192*192;
|
|
|
fpga_mem addr, next_addr;
|
|
|
- addr = current_address;
|
|
|
- next_addr = current_address + size;
|
|
|
- if (next_addr >= global_mem[0])
|
|
|
+ addr = current_address[devid];
|
|
|
+ next_addr = current_address[devid] + size;
|
|
|
+ if (next_addr >= (fpga_mem) global_mem[devid])
|
|
|
{
|
|
|
- printf("Memory overflow\n");
|
|
|
+ printf("Memory overflow on %d\n", devid);
|
|
|
return 0;
|
|
|
}
|
|
|
- current_address = next_addr;
|
|
|
+ current_address[devid] = next_addr;
|
|
|
printf("fpga mem returned from allocation @: %p - %p\n",addr, addr + size);
|
|
|
return (uintptr_t) addr;
|
|
|
}
|
|
@@ -567,19 +638,19 @@ struct _starpu_driver_ops _starpu_driver_fpga_ops =
|
|
|
.deinit = _starpu_fpga_driver_deinit
|
|
|
};
|
|
|
|
|
|
-// TODO: structure node_ops, comme dans driver_cuda.c, avec starpu_fpga_allocate_memory, etc.
|
|
|
+// TODO: transfers
|
|
|
struct _starpu_node_ops _starpu_driver_fpga_node_ops =
|
|
|
{
|
|
|
.copy_data_to[STARPU_UNUSED] = NULL,
|
|
|
- .copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_data_from_fpga_to_cpu,
|
|
|
- .copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_fpga_to_fpga,
|
|
|
+ //.copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_data_from_fpga_to_cpu,
|
|
|
+ //.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_fpga_to_fpga,
|
|
|
.copy_data_to[STARPU_OPENCL_RAM] = NULL,
|
|
|
.copy_data_to[STARPU_DISK_RAM] = NULL,
|
|
|
.copy_data_to[STARPU_MIC_RAM] = NULL,
|
|
|
.copy_data_to[STARPU_MPI_MS_RAM] = NULL,
|
|
|
|
|
|
.copy_interface_to[STARPU_UNUSED] = NULL,
|
|
|
- .copy_interface_to[STARPU_CPU_RAM] = _starpu_fpga_copy_interface_from_fpga_to_cpu,
|
|
|
+ //.copy_interface_to[STARPU_CPU_RAM] = _starpu_fpga_copy_interface_from_fpga_to_cpu,
|
|
|
.copy_interface_to[STARPU_FPGA_RAM] = NULL,
|
|
|
.copy_interface_to[STARPU_OPENCL_RAM] = NULL,
|
|
|
.copy_interface_to[STARPU_DISK_RAM] = NULL,
|