Procházet zdrojové kódy

Separate out kernels, so we avoid having to use bins

Samuel Thibault před 4 roky
rodič
revize
50d532a6af
3 změnil soubory, kde provedl 119 přidání a 145 odebrání
  1. 0 36
      tests/fpga/StreamFMAKernel.maxj
  2. 112 22
      tests/fpga/StreamFMAManager.maxj
  3. 7 87
      tests/fpga/max_fpga.c

+ 0 - 36
tests/fpga/StreamFMAKernel.maxj

@@ -1,36 +0,0 @@
-package fpga;
-
-import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel;
-import com.maxeler.maxcompiler.v2.kernelcompiler.KernelParameters;
-import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEType;
-import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEVar;
-
-class StreamFMAKernel extends Kernel
-{
-	private static final DFEType type = dfeInt(32);
-
-	protected StreamFMAKernel(KernelParameters parameters)
-	{
-		super(parameters);
-
-                DFEVar inAT1 = io.input("inAT1", type);
-                DFEVar inBT1 = io.input("inBT1", type);
-                DFEVar oDataT1;
-
-                DFEVar inAT2 = io.input("inAT2", type);
-                DFEVar inBT2 = io.input("inBT2", type);
-                DFEVar oDataT2;
-
-                DFEVar inAT3 = io.input("inAT3", type);
-                DFEVar inBT3 = io.input("inBT3", type);
-                DFEVar oDataT3;
-
-                oDataT1 = inAT1+inBT1;
-                oDataT2 = inAT2*inBT2;
-                oDataT3 = inAT3+inBT3;
-
-                io.output("oDataT1", oDataT1, type);
-                io.output("oDataT2", oDataT2, type);
-                io.output("oDataT3", oDataT3, type);
-	}
-}

+ 112 - 22
tests/fpga/StreamFMAManager.maxj

@@ -6,51 +6,56 @@ import com.maxeler.maxcompiler.v2.managers.custom.DFELink;
 import com.maxeler.maxcompiler.v2.managers.custom.blocks.KernelBlock;
 import com.maxeler.maxcompiler.v2.managers.custom.stdlib.LMemCommandGroup;
 import com.maxeler.maxcompiler.v2.managers.custom.stdlib.LMemInterface;
-//import com.maxeler.maxcompiler.v2.managers.engine_interfaces.CPUTypes;
-//import com.maxeler.maxcompiler.v2.managers.engine_interfaces.EngineInterface;
-//import com.maxeler.maxcompiler.v2.managers.engine_interfaces.EngineInterface.Direction;
-//import com.maxeler.maxcompiler.v2.managers.engine_interfaces.InterfaceParam;
+import com.maxeler.maxcompiler.v2.managers.engine_interfaces.CPUTypes;
+import com.maxeler.maxcompiler.v2.managers.engine_interfaces.EngineInterface;
+import com.maxeler.maxcompiler.v2.managers.engine_interfaces.EngineInterface.Direction;
+import com.maxeler.maxcompiler.v2.managers.engine_interfaces.InterfaceParam;
 import com.maxeler.platform.max5.manager.MAX5CManager;
 
 public class StreamFMAManager extends MAX5CManager
 {
-        //private static final CPUTypes TYPE = CPUTypes.INT32;
-        private static final String kernel_name = "StreamFMAKernel";
+        private static final CPUTypes TYPE = CPUTypes.INT32;
 
         public StreamFMAManager(EngineParameters params)
 	{
                 super(params);
-                KernelBlock kernel = addKernel(new StreamFMAKernel(makeKernelParameters(kernel_name)));
+                KernelBlock kernel1 = addKernel(new Task1(makeKernelParameters("Task1")));
+                KernelBlock kernel2 = addKernel(new Task2(makeKernelParameters("Task2")));
+                KernelBlock kernel3 = addKernel(new Task3(makeKernelParameters("Task3")));
 
                 LMemInterface iface = addLMemInterface();
 
-                kernel.getInput("inAT1") <== addStreamFromCPU("inAT1");
-                kernel.getInput("inBT1") <== addStreamFromCPU("inBT1");
 
-                //addStreamToCPU("oDataT1") <== kernel.getOutput("oDataT1");
+                kernel1.getInput("inAT1") <== addStreamFromCPU("inAT1");
+
+                kernel1.getInput("inBT1") <== addStreamFromCPU("inBT1");
+
+                DFELink oDataT1 = iface.addStreamToLMem("oDataT1", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D);
+                oDataT1 <== kernel1.getOutput("oDataT1");
+
 
                 DFELink inAT2 = iface.addStreamFromLMem("inAT2", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D);
-                kernel.getInput("inAT2") <== inAT2;
+                kernel2.getInput("inAT2") <== inAT2;
 
                 DFELink inBT2 = iface.addStreamFromLMem("inBT2", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D);
-                kernel.getInput("inBT2") <== inBT2;
+                kernel2.getInput("inBT2") <== inBT2;
+
+                DFELink oDataT2 = iface.addStreamToLMem("oDataT2", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D);
+                oDataT2 <== kernel2.getOutput("oDataT2");
+
 
                 DFELink inAT3 = iface.addStreamFromLMem("inAT3", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D);
-                kernel.getInput("inAT3") <== inAT3;
+                kernel3.getInput("inAT3") <== inAT3;
 
                 DFELink inBT3 = iface.addStreamFromLMem("inBT3", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D);
-                kernel.getInput("inBT3") <== inBT3;
+                kernel3.getInput("inBT3") <== inBT3;
 
-                DFELink oDataT1 = iface.addStreamToLMem("oDataT1", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D);
-                oDataT1 <== kernel.getOutput("oDataT1");
-
-                DFELink oDataT2 = iface.addStreamToLMem("oDataT2", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D);
-                oDataT2 <== kernel.getOutput("oDataT2");
+                addStreamToCPU("oDataT3") <== kernel3.getOutput("oDataT3");
 
-                //DFELink oDataT3 = iface.addStreamToLMem("oDataT3", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D);
-                //oDataT3 <== kernel.getOutput("oDataT3");
 
-                addStreamToCPU("oDataT3") <== kernel.getOutput("oDataT3");
+                createSlicInterface(interfaceT1("interfaceT1"));
+                createSlicInterface(interfaceT2("interfaceT2"));
+                createSlicInterface(interfaceT3("interfaceT3"));
         }
 
         public static void main(String[] args)
@@ -59,4 +64,89 @@ public class StreamFMAManager extends MAX5CManager
 
                 manager.build();
         }
+
+        private static EngineInterface interfaceT1(String name)
+	{
+                EngineInterface ei = new EngineInterface(name);
+
+                InterfaceParam ptrC1;
+                InterfaceParam N;
+                InterfaceParam sizeInBytes;
+
+                N    = ei.addParam("N", TYPE);
+                sizeInBytes = N * TYPE.sizeInBytes();
+                ptrC1    = ei.addParam("ptrC1", TYPE);
+
+                ei.setTicks("Task1", N);
+
+                ei.setStream("inAT1", TYPE, sizeInBytes);
+                ei.setStream("inBT1", TYPE, sizeInBytes);
+                ei.setLMemLinear("oDataT1", ptrC1, sizeInBytes);
+
+                ei.ignoreAll(Direction.IN_OUT);
+
+		ei.ignoreKernel("Task2");
+		ei.ignoreKernel("Task3");
+
+                return ei;
+        }
+
+
+        private static EngineInterface interfaceT2(String name)
+	{
+                EngineInterface ei = new EngineInterface(name);
+
+                InterfaceParam ptrA2;
+                InterfaceParam ptrB2;
+                InterfaceParam ptrC2;
+                InterfaceParam N;
+                InterfaceParam sizeInBytes;
+
+                N    = ei.addParam("N", TYPE);
+                sizeInBytes = N * TYPE.sizeInBytes();
+                ptrA2    = ei.addParam("ptrA2", TYPE);
+                ptrB2    = ei.addParam("ptrB2", TYPE);
+                ptrC2    = ei.addParam("ptrC2", TYPE);
+
+                ei.setTicks("Task2", N);
+
+                ei.setLMemLinear("inAT2", ptrA2, sizeInBytes);
+                ei.setLMemLinear("inBT2", ptrB2, sizeInBytes);
+                ei.setLMemLinear("oDataT2", ptrC2, sizeInBytes);
+
+                ei.ignoreAll(Direction.IN_OUT);
+
+		ei.ignoreKernel("Task1");
+		ei.ignoreKernel("Task3");
+
+                return ei;
+        }
+
+       private static EngineInterface interfaceT3(String name)
+       {
+                EngineInterface ei = new EngineInterface(name);
+
+                InterfaceParam ptrA3;
+                InterfaceParam ptrB3;
+                InterfaceParam N;
+                InterfaceParam sizeInBytes;
+
+                N    = ei.addParam("N", TYPE);
+                sizeInBytes = N * TYPE.sizeInBytes();
+                ptrA3    = ei.addParam("ptrA3", TYPE);
+                ptrB3    = ei.addParam("ptrB3", TYPE);
+
+                ei.setTicks("Task3", N);
+
+                ei.setLMemLinear("inAT3", ptrA3, sizeInBytes);
+                ei.setLMemLinear("inBT3", ptrB3, sizeInBytes);
+                ei.setStream("oDataT3", TYPE, sizeInBytes);
+
+                ei.ignoreAll(Direction.IN_OUT);
+
+		ei.ignoreKernel("Task1");
+		ei.ignoreKernel("Task2");
+
+                return ei;
+        }
 }

+ 7 - 87
tests/fpga/max_fpga.c

@@ -31,14 +31,9 @@ void fpga_impl(void *buffers[], void *cl_arg)
 	int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]);
 	int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]);
 
-	int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
-	int sizeBytes=SIZE *sizeof(int32_t);
-	size_t LMemsize= SIZE *sizeof(int32_t);
-
-	size_t poubelle = 0xc0000;
-
+	// XXX: would rather use a scratch buffer
 	size_t ptrCT1 = 0x00000000000000c0;
 
 	size_t ptrAT2 = ptrCT1;
@@ -51,33 +46,15 @@ void fpga_impl(void *buffers[], void *cl_arg)
 	printf("Loading DFE memory.\n");
 
 	/* C = A+B */
-	StreamFMA(SIZE, ptrA, sizeBytes, ptrB, sizeBytes, poubelle_cpu, sizeBytes,
-		  poubelle, LMemsize,
-		  poubelle, LMemsize,
-		  poubelle, LMemsize,
-		  poubelle, LMemsize,
-		  ptrCT1, LMemsize,
-		  poubelle, LMemsize);
+	StreamFMA_interfaceT1(size, ptrCT1, ptrA, ptrB);
 	printf("T1 finished\n");
 
 	/* C = A*B */
-	StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes,
-		  ptrAT2, LMemsize,
-		  poubelle, LMemsize,
-		  ptrBT2, LMemsize,
-		  poubelle, LMemsize,
-		  poubelle, LMemsize,
-		  ptrCT2, LMemsize);
+	StreamFMA_interfaceT2(size, ptrAT2, ptrBT2, ptrCT2);
 	printf("T2 finished\n");
 
 	/* C = A+B */
-	StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, ptrC, sizeBytes,
-		  poubelle, LMemsize,
-		  ptrAT3, LMemsize,
-		  poubelle, LMemsize,
-		  ptrBT3, LMemsize,
-		  poubelle, LMemsize,
-		  poubelle, LMemsize);
+	StreamFMA_interfaceT3(size, ptrAT3, ptrBT3, ptrC);
 	printf("T3 finished\n");
 
 	printf("Running DFE.\n");
@@ -100,29 +77,11 @@ void fpga_impl1(void *buffers[], void *cl_arg)
 	int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]);
 	size_t   ptrC = (size_t)   STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */
 
-	int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
-	int sizeBytes=SIZE *sizeof(int32_t);
-	size_t LMemsize= SIZE *sizeof(int32_t);
-
-	size_t poubelle = 0xc0000;
-
-#if 0
-	printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC);
-	//XXX
-	ptrC = 0x00000000000000c0;
-#endif
-
 	printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC);
 	/* C = A+B */
-	StreamFMA(SIZE, ptrA, sizeBytes, ptrB, sizeBytes, poubelle_cpu, sizeBytes,
-		  poubelle, LMemsize,
-		  poubelle, LMemsize,
-		  poubelle, LMemsize,
-		  poubelle, LMemsize,
-		  ptrC, LMemsize,
-		  poubelle, LMemsize);
+	StreamFMA_interfaceT1(size, ptrC, ptrA, ptrB);
 	printf("T1 finished\n");
 }
 
@@ -143,31 +102,11 @@ void fpga_impl2(void *buffers[], void *cl_arg)
 	size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */
 	size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */
 
-	int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
-	int sizeBytes=SIZE *sizeof(int32_t);
-	size_t LMemsize= SIZE *sizeof(int32_t);
-
-	size_t poubelle = 0xc0000;
-
-#if 0
-	printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC);
-	//XXX
-	ptrA = 0x00000000000000c0;
-	ptrB = 0x00000000000000c0;
-	ptrC = 0x0000000000000180;
-#endif
-
 	printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC);
 	/* C = A*B */
-	StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes,
-		  ptrA, LMemsize,
-		  poubelle, LMemsize,
-		  ptrB, LMemsize,
-		  poubelle, LMemsize,
-		  poubelle, LMemsize,
-		  ptrC, LMemsize);
+	StreamFMA_interfaceT2(size, ptrA, ptrB, ptrC);
 	printf("T2 finished\n");
 }
 
@@ -187,30 +126,11 @@ void fpga_impl3(void *buffers[], void *cl_arg)
 	size_t   ptrB = (size_t)   STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */
 	int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]);
 
-	int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t));
 	int size = STARPU_VECTOR_GET_NX(buffers[0]);
 
-	int sizeBytes=SIZE *sizeof(int32_t);
-	size_t LMemsize= SIZE *sizeof(int32_t);
-
-	size_t poubelle = 0xc0000;
-
-#if 0
-	printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC);
-	//XXX
-	ptrA = 0x0000000000000180;
-	ptrB = 0x0000000000000180;
-#endif
-
 	printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC);
 	/* C = A+B */
-	StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, ptrC, sizeBytes,
-		  poubelle, LMemsize,
-		  ptrA, LMemsize,
-		  poubelle, LMemsize,
-		  ptrB, LMemsize,
-		  poubelle, LMemsize,
-		  poubelle, LMemsize);
+	StreamFMA_interfaceT3(size, ptrA, ptrB, ptrC);
 	printf("T3 finished\n");
 }