|
@@ -24,275 +24,36 @@ const char * __attribute__ ((aligned (16))) SOCL_DRIVER_VERSION = "0.1";
|
|
|
|
|
|
const cl_uint __attribute__ ((aligned (16))) SOCL_DEVICE_VENDOR_ID = 666;
|
|
|
|
|
|
-const struct _cl_device_id socl_devices[] = {
|
|
|
- {
|
|
|
- .dispatch = &socl_master_dispatch,
|
|
|
- .type = CL_DEVICE_TYPE_CPU,
|
|
|
- .max_compute_units = 1,
|
|
|
- .max_work_item_dimensions = 3,
|
|
|
- .max_work_item_sizes = {1,1,1},
|
|
|
- .max_work_group_size = 1,
|
|
|
- .preferred_vector_widths = {16,8,4,2,4,2},
|
|
|
- .max_clock_frequency = 3000,
|
|
|
- .address_bits = 64,
|
|
|
- .max_mem_alloc_size = 1024*1024*1024,
|
|
|
- .image_support = CL_FALSE,
|
|
|
- .max_parameter_size = 256,
|
|
|
- .mem_base_addr_align = 0,
|
|
|
- .min_data_type_align_size = 0,
|
|
|
- .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
|
|
|
- .global_mem_cache_type = CL_READ_WRITE_CACHE,
|
|
|
- .global_mem_cacheline_size = 128,
|
|
|
- .global_mem_cache_size = 16*1024,
|
|
|
- .global_mem_size = (cl_ulong)4*1024*1024*1024,
|
|
|
- .max_constant_args = 8,
|
|
|
- .local_mem_type = CL_GLOBAL,
|
|
|
- .local_mem_size = 16*1024,
|
|
|
- .error_correction_support = CL_FALSE,
|
|
|
- .profiling_timer_resolution = 100,
|
|
|
- .endian_little = CL_TRUE,
|
|
|
- .available = CL_TRUE,
|
|
|
- .compiler_available = CL_TRUE,
|
|
|
- .execution_capabilities = CL_EXEC_KERNEL,
|
|
|
- .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
|
|
|
- .name = "StarPU virtual CPU 1",
|
|
|
- .extensions = ""
|
|
|
- },
|
|
|
- {
|
|
|
- .dispatch = &socl_master_dispatch,
|
|
|
- .type = CL_DEVICE_TYPE_GPU,
|
|
|
- .max_compute_units = 12,
|
|
|
- .max_work_item_dimensions = 3,
|
|
|
- .max_work_item_sizes = {512,512,64},
|
|
|
- .max_work_group_size = 512,
|
|
|
- .preferred_vector_widths = {16,8,4,2,4,2},
|
|
|
- .max_clock_frequency = 1600,
|
|
|
- .address_bits = 32,
|
|
|
- .max_mem_alloc_size = 100*1024*1024,
|
|
|
- .image_support = CL_FALSE,
|
|
|
- .max_parameter_size = 256,
|
|
|
- .mem_base_addr_align = 0,
|
|
|
- .min_data_type_align_size = 0,
|
|
|
- .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
|
|
|
- .global_mem_cache_type = CL_NONE,
|
|
|
- .global_mem_cacheline_size = 0,
|
|
|
- .global_mem_cache_size = 0,
|
|
|
- .global_mem_size = (cl_ulong)500*1024*1024,
|
|
|
- .max_constant_args = 8,
|
|
|
- .local_mem_type = CL_LOCAL,
|
|
|
- .local_mem_size = 16*1024,
|
|
|
- .error_correction_support = CL_FALSE,
|
|
|
- .profiling_timer_resolution = 10,
|
|
|
- .endian_little = CL_TRUE,
|
|
|
- .available = CL_TRUE,
|
|
|
- .compiler_available = CL_TRUE,
|
|
|
- .execution_capabilities = CL_EXEC_KERNEL,
|
|
|
- .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
|
|
|
- .name = "StarPU virtual GPU 1",
|
|
|
- .extensions = ""
|
|
|
- },
|
|
|
- {
|
|
|
- .dispatch = &socl_master_dispatch,
|
|
|
- .type = CL_DEVICE_TYPE_GPU,
|
|
|
- .max_compute_units = 12,
|
|
|
- .max_work_item_dimensions = 3,
|
|
|
- .max_work_item_sizes = {512,512,64},
|
|
|
- .max_work_group_size = 512,
|
|
|
- .preferred_vector_widths = {16,8,4,2,4,2},
|
|
|
- .max_clock_frequency = 1600,
|
|
|
- .address_bits = 32,
|
|
|
- .max_mem_alloc_size = 100*1024*1024,
|
|
|
- .image_support = CL_FALSE,
|
|
|
- .max_parameter_size = 256,
|
|
|
- .mem_base_addr_align = 0,
|
|
|
- .min_data_type_align_size = 0,
|
|
|
- .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
|
|
|
- .global_mem_cache_type = CL_NONE,
|
|
|
- .global_mem_cacheline_size = 0,
|
|
|
- .global_mem_cache_size = 0,
|
|
|
- .global_mem_size = (cl_ulong)500*1024*1024,
|
|
|
- .max_constant_args = 8,
|
|
|
- .local_mem_type = CL_LOCAL,
|
|
|
- .local_mem_size = 16*1024,
|
|
|
- .error_correction_support = CL_FALSE,
|
|
|
- .profiling_timer_resolution = 10,
|
|
|
- .endian_little = CL_TRUE,
|
|
|
- .available = CL_TRUE,
|
|
|
- .compiler_available = CL_TRUE,
|
|
|
- .execution_capabilities = CL_EXEC_KERNEL,
|
|
|
- .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
|
|
|
- .name = "StarPU virtual GPU 2",
|
|
|
- .extensions = ""
|
|
|
- },
|
|
|
- {
|
|
|
- .dispatch = &socl_master_dispatch,
|
|
|
- .type = CL_DEVICE_TYPE_GPU,
|
|
|
- .max_compute_units = 12,
|
|
|
- .max_work_item_dimensions = 3,
|
|
|
- .max_work_item_sizes = {512,512,64},
|
|
|
- .max_work_group_size = 512,
|
|
|
- .preferred_vector_widths = {16,8,4,2,4,2},
|
|
|
- .max_clock_frequency = 1600,
|
|
|
- .address_bits = 32,
|
|
|
- .max_mem_alloc_size = 100*1024*1024,
|
|
|
- .image_support = CL_FALSE,
|
|
|
- .max_parameter_size = 256,
|
|
|
- .mem_base_addr_align = 0,
|
|
|
- .min_data_type_align_size = 0,
|
|
|
- .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
|
|
|
- .global_mem_cache_type = CL_NONE,
|
|
|
- .global_mem_cacheline_size = 0,
|
|
|
- .global_mem_cache_size = 0,
|
|
|
- .global_mem_size = (cl_ulong)500*1024*1024,
|
|
|
- .max_constant_args = 8,
|
|
|
- .local_mem_type = CL_LOCAL,
|
|
|
- .local_mem_size = 16*1024,
|
|
|
- .error_correction_support = CL_FALSE,
|
|
|
- .profiling_timer_resolution = 10,
|
|
|
- .endian_little = CL_TRUE,
|
|
|
- .available = CL_TRUE,
|
|
|
- .compiler_available = CL_TRUE,
|
|
|
- .execution_capabilities = CL_EXEC_KERNEL,
|
|
|
- .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
|
|
|
- .name = "StarPU virtual GPU 3",
|
|
|
- .extensions = ""
|
|
|
- },
|
|
|
- {
|
|
|
- .dispatch = &socl_master_dispatch,
|
|
|
- .type = CL_DEVICE_TYPE_GPU,
|
|
|
- .max_compute_units = 12,
|
|
|
- .max_work_item_dimensions = 3,
|
|
|
- .max_work_item_sizes = {512,512,64},
|
|
|
- .max_work_group_size = 512,
|
|
|
- .preferred_vector_widths = {16,8,4,2,4,2},
|
|
|
- .max_clock_frequency = 1600,
|
|
|
- .address_bits = 32,
|
|
|
- .max_mem_alloc_size = 100*1024*1024,
|
|
|
- .image_support = CL_FALSE,
|
|
|
- .max_parameter_size = 256,
|
|
|
- .mem_base_addr_align = 0,
|
|
|
- .min_data_type_align_size = 0,
|
|
|
- .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
|
|
|
- .global_mem_cache_type = CL_NONE,
|
|
|
- .global_mem_cacheline_size = 0,
|
|
|
- .global_mem_cache_size = 0,
|
|
|
- .global_mem_size = (cl_ulong)500*1024*1024,
|
|
|
- .max_constant_args = 8,
|
|
|
- .local_mem_type = CL_LOCAL,
|
|
|
- .local_mem_size = 16*1024,
|
|
|
- .error_correction_support = CL_FALSE,
|
|
|
- .profiling_timer_resolution = 10,
|
|
|
- .endian_little = CL_TRUE,
|
|
|
- .available = CL_TRUE,
|
|
|
- .compiler_available = CL_TRUE,
|
|
|
- .execution_capabilities = CL_EXEC_KERNEL,
|
|
|
- .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
|
|
|
- .name = "StarPU virtual GPU 4",
|
|
|
- .extensions = ""
|
|
|
- },
|
|
|
- {
|
|
|
- .dispatch = &socl_master_dispatch,
|
|
|
- .type = CL_DEVICE_TYPE_GPU,
|
|
|
- .max_compute_units = 12,
|
|
|
- .max_work_item_dimensions = 3,
|
|
|
- .max_work_item_sizes = {512,512,64},
|
|
|
- .max_work_group_size = 512,
|
|
|
- .preferred_vector_widths = {16,8,4,2,4,2},
|
|
|
- .max_clock_frequency = 1600,
|
|
|
- .address_bits = 32,
|
|
|
- .max_mem_alloc_size = 100*1024*1024,
|
|
|
- .image_support = CL_FALSE,
|
|
|
- .max_parameter_size = 256,
|
|
|
- .mem_base_addr_align = 0,
|
|
|
- .min_data_type_align_size = 0,
|
|
|
- .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
|
|
|
- .global_mem_cache_type = CL_NONE,
|
|
|
- .global_mem_cacheline_size = 0,
|
|
|
- .global_mem_cache_size = 0,
|
|
|
- .global_mem_size = (cl_ulong)500*1024*1024,
|
|
|
- .max_constant_args = 8,
|
|
|
- .local_mem_type = CL_LOCAL,
|
|
|
- .local_mem_size = 16*1024,
|
|
|
- .error_correction_support = CL_FALSE,
|
|
|
- .profiling_timer_resolution = 10,
|
|
|
- .endian_little = CL_TRUE,
|
|
|
- .available = CL_TRUE,
|
|
|
- .compiler_available = CL_TRUE,
|
|
|
- .execution_capabilities = CL_EXEC_KERNEL,
|
|
|
- .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
|
|
|
- .name = "StarPU virtual GPU 5",
|
|
|
- .extensions = ""
|
|
|
- },
|
|
|
- {
|
|
|
- .dispatch = &socl_master_dispatch,
|
|
|
- .type = CL_DEVICE_TYPE_GPU,
|
|
|
- .max_compute_units = 12,
|
|
|
- .max_work_item_dimensions = 3,
|
|
|
- .max_work_item_sizes = {512,512,64},
|
|
|
- .max_work_group_size = 512,
|
|
|
- .preferred_vector_widths = {16,8,4,2,4,2},
|
|
|
- .max_clock_frequency = 1600,
|
|
|
- .address_bits = 32,
|
|
|
- .max_mem_alloc_size = 100*1024*1024,
|
|
|
- .image_support = CL_FALSE,
|
|
|
- .max_parameter_size = 256,
|
|
|
- .mem_base_addr_align = 0,
|
|
|
- .min_data_type_align_size = 0,
|
|
|
- .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
|
|
|
- .global_mem_cache_type = CL_NONE,
|
|
|
- .global_mem_cacheline_size = 0,
|
|
|
- .global_mem_cache_size = 0,
|
|
|
- .global_mem_size = (cl_ulong)500*1024*1024,
|
|
|
- .max_constant_args = 8,
|
|
|
- .local_mem_type = CL_LOCAL,
|
|
|
- .local_mem_size = 16*1024,
|
|
|
- .error_correction_support = CL_FALSE,
|
|
|
- .profiling_timer_resolution = 10,
|
|
|
- .endian_little = CL_TRUE,
|
|
|
- .available = CL_TRUE,
|
|
|
- .compiler_available = CL_TRUE,
|
|
|
- .execution_capabilities = CL_EXEC_KERNEL,
|
|
|
- .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
|
|
|
- .name = "StarPU virtual GPU 6",
|
|
|
- .extensions = ""
|
|
|
- },
|
|
|
- {
|
|
|
- .dispatch = &socl_master_dispatch,
|
|
|
- .type = CL_DEVICE_TYPE_GPU,
|
|
|
- .max_compute_units = 12,
|
|
|
- .max_work_item_dimensions = 3,
|
|
|
- .max_work_item_sizes = {512,512,64},
|
|
|
- .max_work_group_size = 512,
|
|
|
- .preferred_vector_widths = {16,8,4,2,4,2},
|
|
|
- .max_clock_frequency = 1600,
|
|
|
- .address_bits = 32,
|
|
|
- .max_mem_alloc_size = 100*1024*1024,
|
|
|
- .image_support = CL_FALSE,
|
|
|
- .max_parameter_size = 256,
|
|
|
- .mem_base_addr_align = 0,
|
|
|
- .min_data_type_align_size = 0,
|
|
|
- .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
|
|
|
- .global_mem_cache_type = CL_NONE,
|
|
|
- .global_mem_cacheline_size = 0,
|
|
|
- .global_mem_cache_size = 0,
|
|
|
- .global_mem_size = (cl_ulong)500*1024*1024,
|
|
|
- .max_constant_args = 8,
|
|
|
- .local_mem_type = CL_LOCAL,
|
|
|
- .local_mem_size = 16*1024,
|
|
|
- .error_correction_support = CL_FALSE,
|
|
|
- .profiling_timer_resolution = 10,
|
|
|
- .endian_little = CL_TRUE,
|
|
|
- .available = CL_TRUE,
|
|
|
- .compiler_available = CL_TRUE,
|
|
|
- .execution_capabilities = CL_EXEC_KERNEL,
|
|
|
- .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
|
|
|
- .name = "StarPU virtual GPU 7",
|
|
|
- .extensions = ""
|
|
|
- }
|
|
|
-
|
|
|
+const struct _cl_device_id socl_virtual_device = {
|
|
|
+ .dispatch = &socl_master_dispatch,
|
|
|
+ .type = CL_DEVICE_TYPE_ACCELERATOR,
|
|
|
+ .max_compute_units = 1,
|
|
|
+ .max_work_item_dimensions = 3,
|
|
|
+ .max_work_item_sizes = {1,1,1},
|
|
|
+ .max_work_group_size = 1,
|
|
|
+ .preferred_vector_widths = {16,8,4,2,4,2},
|
|
|
+ .max_clock_frequency = 3000,
|
|
|
+ .address_bits = 64,
|
|
|
+ .max_mem_alloc_size = 1024*1024*1024,
|
|
|
+ .image_support = CL_FALSE,
|
|
|
+ .max_parameter_size = 256,
|
|
|
+ .mem_base_addr_align = 0,
|
|
|
+ .min_data_type_align_size = 0,
|
|
|
+ .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
|
|
|
+ .global_mem_cache_type = CL_READ_WRITE_CACHE,
|
|
|
+ .global_mem_cacheline_size = 128,
|
|
|
+ .global_mem_cache_size = 16*1024,
|
|
|
+ .global_mem_size = (cl_ulong)4*1024*1024*1024,
|
|
|
+ .max_constant_args = 8,
|
|
|
+ .local_mem_type = CL_GLOBAL,
|
|
|
+ .local_mem_size = 16*1024,
|
|
|
+ .error_correction_support = CL_FALSE,
|
|
|
+ .profiling_timer_resolution = 100,
|
|
|
+ .endian_little = CL_TRUE,
|
|
|
+ .available = CL_TRUE,
|
|
|
+ .compiler_available = CL_TRUE,
|
|
|
+ .execution_capabilities = CL_EXEC_KERNEL,
|
|
|
+ .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
|
|
|
+ .name = "SOCL Virtual Device",
|
|
|
+ .extensions = ""
|
|
|
};
|
|
|
-
|
|
|
-const int socl_device_count = sizeof(socl_devices) / sizeof(struct _cl_device_id);
|
|
|
-
|
|
|
-
|
|
|
-
|