| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 | /* * StarPU * Copyright (C) INRIA 2008-2009 (see AUTHORS file) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */#ifndef __STRASSEN_H__#define __STRASSEN_H__#include <semaphore.h>#include <sys/time.h>#include <string.h>#include <math.h>#include <sys/types.h>#include <pthread.h>#include <signal.h>#include <cblas.h>#include <starpu_config.h>#ifdef STARPU_USE_CUDA#include <cuda.h>#include <cublas.h>#endif#include <starpu.h>typedef enum {	ADD,	SUB,	MULT,	SELFADD,	SELFSUB,	NONE} operation;typedef struct {	/* monitor the progress of the algorithm */	unsigned Ei12[7]; // Ei12[k] is 0, 1 or 2 (2 = finished Ei1 and Ei2)	unsigned Ei[7];	unsigned Ei_remaining_use[7];	unsigned Cij[4];	starpu_data_handle A, B, C;	starpu_data_handle A11, A12, A21, A22;	starpu_data_handle B11, B12, B21, B22;	starpu_data_handle C11, C12, C21, C22;	starpu_data_handle E1, E2, E3, E4, E5, E6, E7;	starpu_data_handle E11, E12, E21, E22, E31, E32, E41, E52, E62, E71;	starpu_data_handle E42, E51, E61, E72;	unsigned reclevel;		/* */	unsigned counter;	/* called at the end of the iteration */	void (*strassen_iter_callback)(void *);	void *argcb;} strassen_iter_state_t;typedef struct {	strassen_iter_state_t *iter;	/* phase 1 computes Ei1 or Ei2 with i in 0-6 */	unsigned i;} phase1_t;typedef struct {	strassen_iter_state_t *iter;	/* phase 2 computes Ei with i in 0-6 */	unsigned i;} phase2_t;typedef struct {	strassen_iter_state_t *iter;	/* phase 2 computes Ei with i in 0-6 */	unsigned i;} phase3_t;void mult_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);void sub_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);void add_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);void self_add_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);void self_sub_cpu_codelet(void *descr[], __attribute__((unused))  void *arg);#ifdef STARPU_USE_CUDAvoid mult_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);void sub_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);void add_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);void self_add_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);void self_sub_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);#endifvoid strassen(starpu_data_handle A, starpu_data_handle B, starpu_data_handle C, void (*callback)(void *), void *argcb, unsigned reclevel);extern struct starpu_perfmodel_t strassen_model_mult;extern struct starpu_perfmodel_t strassen_model_add_sub;extern struct starpu_perfmodel_t strassen_model_self_add_sub;#endif // __STRASSEN_H__
 |