Browse Source

use the dynamic code loading facilities for the Gordon/mult example

Cédric Augonnet 16 years ago
parent
commit
3f3c7940ec

+ 26 - 1
examples/Makefile.am

@@ -21,6 +21,8 @@ TESTS = $(check_PROGRAMS)
 
 check_PROGRAMS =
 
+BUILT_SOURCES =
+
 EXTRA_DIST = 					\
 	cuda/incrementer_cuda.cu		\
 	cuda/spmv_cuda.cu
@@ -44,12 +46,28 @@ NVCC ?= nvcc
 	$(NVCC) $< -c -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS) -I${includedir}
 
 
-BUILT_SOURCES =					\
+BUILT_SOURCES +=				\
 	cuda/incrementer_cuda.cubin		\
 	cuda/spmv_cuda.cubin			
 
 endif
 
+if USE_GORDON
+
+SPU_CC ?= spu-gcc
+SPU_LD ?= spu-ld
+
+SPULDFLAGS = 
+SPULIBS = -lblas
+
+.c.spuo:
+	$(SPU_CC) -c -fpic $< -o $@
+
+.spuo.spuelf:
+	$(SPU_LD) $(SPULDFLAGS) $< -o $@ $(SPULIBS)
+
+endif
+
 examplebindir = $(libdir)/starpu/examples/
 
 examplebin_PROGRAMS =
@@ -110,6 +128,13 @@ mult_dw_mult_no_stride_no_tag_SOURCES =		\
 	common/blas.c				\
 	common/blas_model.c
 
+if USE_GORDON
+
+BUILT_SOURCES +=				\
+	mult/gordon/func_sgemm_ibm.spuelf
+
+endif
+
 endif
 
 ####################

+ 7 - 4
examples/mult/dw_mult.h

@@ -27,7 +27,6 @@
 #include <common/blas.h>
 #include <common/blas_model.h>
 
-#include <starpu_config.h>
 #include <starpu.h>
 
 #ifdef USE_CUDA
@@ -35,9 +34,13 @@
 #include <cublas.h>
 #endif
 
-#define MAXSLICESX	32
-#define MAXSLICESY	32
-#define MAXSLICESZ	32
+#ifdef USE_GORDON
+#include <cell/gordon/spu/functions.h> 
+#endif
+
+#define MAXSLICESX	64
+#define MAXSLICESY	64
+#define MAXSLICESZ	64
 
 #define BLAS3_FLOP(n1,n2,n3)	\
 	(2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))

+ 32 - 6
examples/mult/dw_mult_no_stride.c

@@ -15,6 +15,9 @@
  */
 
 #include "dw_mult.h"
+#ifdef USE_GORDON
+#include "gordon/func_sgemm_ibm.h"
+#endif
 
 static pthread_mutex_t mutex;
 static pthread_cond_t cond;
@@ -323,16 +326,35 @@ static starpu_codelet cl = {
 	.cublas_func = cublas_mult,
 #endif
 #ifdef USE_GORDON
-#ifdef SPU_FUNC_SGEMM
-	.gordon_func = SPU_FUNC_SGEMM,
-#else
-#warning SPU_FUNC_SGEMM is not available
-#endif
+	/* .gordon_func will be set by load_elf_sgemm */
 #endif
+
+	.model = &sgemm_model,
 	.where = CORE|CUBLAS|GORDON,
 	.nbuffers = 3
 };
 
+#ifdef USE_GORDON
+static const char *spu_func_sgemm_elf_file = "./gordon/func_sgemm_ibm.spuelf";
+static unsigned spu_func_sgemm_elf_id;
+static unsigned spu_func_sgemm_ibm_id;
+
+static void load_elf_sgemm(void)
+{
+	spu_func_sgemm_elf_id =
+		gordon_register_elf_plugin(spu_func_sgemm_elf_file);
+
+	spu_func_sgemm_ibm_id = gordon_register_kernel(spu_func_sgemm_elf_id, "func_sgemm_ibm");
+
+	
+	gordon_load_plugin_on_all_spu(spu_func_sgemm_elf_id);
+	gordon_load_kernel_on_all_spu(spu_func_sgemm_ibm_id);
+
+	cl.gordon_func = spu_func_sgemm_ibm_id;
+}
+
+#endif // USE_GORDON
+
 static struct starpu_task *construct_task(unsigned x, unsigned y, unsigned z, unsigned iter)
 {
 	struct starpu_task *task = starpu_task_create();
@@ -340,7 +362,7 @@ static struct starpu_task *construct_task(unsigned x, unsigned y, unsigned z, un
 	task->cl = &cl;
 
 	task->cl_arg = &conf;
-	task->cl_arg_size = sizeof(struct block_conf);
+	task->cl_arg_size = sizeof(struct ibm_sgemm_block_conf);
 
 	task->use_tag = 1;
 	task->tag_id = TAG(z, y, x, iter);
@@ -464,6 +486,10 @@ int main(__attribute__ ((unused)) int argc,
 	/* start the runtime */
 	starpu_init(NULL);
 
+#ifdef USE_GORDON
+	load_elf_sgemm();
+#endif
+
 	pthread_mutex_init(&mutex, NULL);
 	pthread_cond_init(&cond, NULL);
 

+ 32 - 6
examples/mult/dw_mult_no_stride_no_tag.c

@@ -15,6 +15,10 @@
  */
 
 #include "dw_mult.h"
+#ifdef USE_GORDON
+#include "gordon/func_sgemm_ibm.h"
+#endif
+
 
 static pthread_mutex_t mutex;
 static pthread_cond_t cond;
@@ -269,6 +273,8 @@ static void init_problem_data(void)
 	conf.m = BLOCKSIZEY;
 	conf.n = BLOCKSIZEX;
 
+	fprintf(stderr, "block size : x %d y %d z %d\n", BLOCKSIZEX, BLOCKSIZEY, BLOCKSIZEZ);
+
 	display_memory_consumption();
 }
 
@@ -356,16 +362,32 @@ static starpu_codelet cl = {
 	.cublas_func = cublas_mult,
 #endif
 #ifdef USE_GORDON
-#ifdef SPU_FUNC_SGEMM
-	.gordon_func = SPU_FUNC_SGEMM,
-#else
-#warning SPU_FUNC_SGEMM is not available
-#endif
+	/* .gordon_func will be set by load_elf_sgemm */
 #endif
 	.nbuffers = 3
 };
 
 
+#ifdef USE_GORDON
+static const char *spu_func_sgemm_elf_file = "./gordon/func_sgemm_ibm.spuelf";
+static unsigned spu_func_sgemm_elf_id;
+static unsigned spu_func_sgemm_ibm_id;
+
+static void load_elf_sgemm(void)
+{
+	spu_func_sgemm_elf_id =
+		gordon_register_elf_plugin(spu_func_sgemm_elf_file);
+
+	spu_func_sgemm_ibm_id = gordon_register_kernel(spu_func_sgemm_elf_id, "func_sgemm_ibm");
+	
+	gordon_load_plugin_on_all_spu(spu_func_sgemm_elf_id);
+	gordon_load_kernel_on_all_spu(spu_func_sgemm_ibm_id);
+
+	cl.gordon_func = spu_func_sgemm_ibm_id;
+}
+#endif
+
+
 static void construct_task(unsigned x, unsigned y, unsigned z, unsigned iter, struct pos *posp)
 {
 	struct starpu_task *task;
@@ -384,7 +406,7 @@ static void construct_task(unsigned x, unsigned y, unsigned z, unsigned iter, st
 	task->callback_arg = posp;
 
 	task->cl_arg = &conf;
-	task->cl_arg_size = sizeof(struct block_conf);
+	task->cl_arg_size = sizeof(struct ibm_sgemm_block_conf);
 
 	posp->z = z;
 	posp->iter = iter;
@@ -470,6 +492,10 @@ int main(__attribute__ ((unused)) int argc,
 	/* start the runtime */
 	starpu_init(NULL);
 
+#ifdef USE_GORDON
+	load_elf_sgemm();
+#endif
+
 	pthread_mutex_init(&mutex, NULL);
 	pthread_cond_init(&cond, NULL);
 

+ 26 - 0
examples/mult/gordon/func_sgemm_ibm.c

@@ -0,0 +1,26 @@
+#include "func_sgemm_ibm.h"
+
+#include <blas_s.h>
+
+void func_sgemm_ibm(__attribute__ ((unused)) void **alloc,
+		__attribute__ ((unused)) void **in,
+		__attribute__ ((unused)) void **inout,
+		__attribute__ ((unused)) void **out)
+{
+	/* we assume data will be in A:R,B:R,C:RW mode
+ 	 *  -> in[0] : describe problem
+ 	 *  -> in[1] : A
+ 	 *  -> in[2] : B
+ 	 *  -> inout[0] : C
+ 	 *
+ 	 *   C = AB + C
+ 	 *   but, being in fortran ordering, we compute
+ 	 *   t(C) = t(B)t(A) + t(C) instead
+ 	 */
+	struct ibm_sgemm_block_conf *conf = in[0];
+	float *A = in[1];
+	float *B = in[2];
+	float *C = inout[0];
+
+	sgemm_spu(conf->m, conf->n, conf->k, B, A, C);
+}

+ 13 - 0
examples/mult/gordon/func_sgemm_ibm.h

@@ -0,0 +1,13 @@
+#ifndef __FUNC_SGEMM_IBM_H__
+#define __FUNC_SGEMM_IBM_H__
+
+#include <stdint.h>
+
+struct ibm_sgemm_block_conf {
+	uint32_t m;
+	uint32_t n;
+	uint32_t k;
+	uint32_t pad;
+};
+
+#endif // __FUNC_SGEMM_IBM_H__