| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612 | /* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010, 2011, 2014-2015  Université de Bordeaux * Copyright (C) 2010, 2011, 2012, 2013  CNRS * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */#include <starpu.h>#include <math.h>#include <limits.h>#ifdef STARPU_HAVE_X11#include <X11/Xlib.h>#include <X11/Xutil.h>int use_x11 = 1;#endifint demo = 0;static double demozoom = 0.05;/* NB: The X11 code is inspired from the http://locklessinc.com/articles/mandelbrot/ article */static int nblocks = 20;static int height = 400;static int width = 640;static int maxIt = 20000; /* max number of iteration in the Mandelbrot function */static int niter = -1; /* number of loops in case we don't use X11, -1 means infinite */static int use_spmd = 0;static double leftX = -0.745;static double rightX = -0.74375;static double topY = .15;static double bottomY = .14875;/* *	X11 window management */#ifdef STARPU_HAVE_X11/* X11 data */static Display *dpy;static Window win;static XImage *bitmap;static GC gc;static KeySym Left=-1, Right, Down, Up, Alt ;static void exit_x11(void){	XDestroyImage(bitmap);	XDestroyWindow(dpy, win);	XCloseDisplay(dpy);}static void init_x11(int width, int height, unsigned *buffer){	/* Attempt to open the display */	dpy = XOpenDisplay(NULL);	/* Failure */	if (!dpy)		exit(0);	unsigned long white = WhitePixel(dpy,DefaultScreen(dpy));	unsigned long black = BlackPixel(dpy,DefaultScreen(dpy));	win = XCreateSimpleWindow(dpy, DefaultRootWindow(dpy), 0, 0,					width, height, 0, black, white);	/* We want to be notified when the window appears */	XSelectInput(dpy, win, StructureNotifyMask);	/* Make it appear */	XMapWindow(dpy, win);	XTextProperty tp;	char name[128] = "Mandelbrot - StarPU";	char *n = name;	Status st = XStringListToTextProperty(&n, 1, &tp);	if (st)		XSetWMName(dpy, win, &tp);	/* Wait for the MapNotify event */	XFlush(dpy);	int depth = DefaultDepth(dpy, DefaultScreen(dpy));	Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy));	/* Make bitmap */	bitmap = XCreateImage(dpy, visual, depth,		ZPixmap, 0, (char *)buffer,		width, height, 32, 0);	/* Init GC */	gc = XCreateGC(dpy, win, 0, NULL);	XSetForeground(dpy, gc, black);	XSelectInput(dpy, win, ExposureMask | KeyPressMask | StructureNotifyMask);	Atom wmDeleteMessage;	wmDeleteMessage = XInternAtom(dpy, "WM_DELETE_WINDOW", False);	XSetWMProtocols(dpy, win, &wmDeleteMessage, 1);        Left = XStringToKeysym ("Left");        Right = XStringToKeysym ("Right");        Up = XStringToKeysym ("Up");        Down = XStringToKeysym ("Down");        Alt = XStringToKeysym ("Alt");}static int handle_events(void){	XEvent event;	XNextEvent(dpy, &event);	KeySym key;	char text[255];	if (event.type == KeyPress)	{		XLookupString(&event.xkey,text,255,&key,0);		if (key == Left)		{			double widthX = rightX - leftX;			leftX -= 0.25*widthX;			rightX -= 0.25*widthX;		}		else if (key == Right)		{			double widthX = rightX - leftX;			leftX += 0.25*widthX;			rightX += 0.25*widthX;		}		else if (key == Up)		{			double heightY = topY - bottomY;			topY += 0.25*heightY;			bottomY += 0.25*heightY;		}		else if (key == Down)		{			double heightY = topY - bottomY;			topY -= 0.25*heightY;			bottomY -= 0.25*heightY;		}		else		{			double widthX = rightX - leftX;			double heightY = topY - bottomY;			if (text[0] == '-')			{				/* Zoom out */				leftX -= 0.125*widthX;				rightX += 0.125*widthX;				topY += 0.125*heightY;				bottomY -= 0.125*heightY;			}			else if (text[0] == '+')			{				/* Zoom in */				leftX += 0.125*widthX;				rightX -= 0.125*widthX;				topY -= 0.125*heightY;				bottomY += 0.125*heightY;			}		}		if (text[0]=='q')		{			return -1;		}	}	if (event.type==ButtonPress)	{		/* tell where the mouse Button was Pressed */		printf("You pressed a button at (%i,%i)\n",			event.xbutton.x,event.xbutton.y);	}	return 0;}#endif/* *	OpenCL kernel */#ifdef STARPU_USE_OPENCLchar *mandelbrot_opencl_src = "\#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n\#define MIN(a,b) (((a)<(b))? (a) : (b))					\n\__kernel void mandelbrot_kernel(__global unsigned* a,			\n\          double leftX, double topY,					\n\          double stepX, double stepY,					\n\          int maxIt, int iby, int block_size, int width)		\n\{									\n\    size_t id_x = get_global_id(0);	\n\    size_t id_y = get_global_id(1);	\n\    if ((id_x < width) && (id_y < block_size))				\n\    {									\n\        double xc = leftX + id_x * stepX;				\n\        double yc = topY - (id_y + iby*block_size) * stepY;		\n\        int it;								\n\        double x,y;							\n\        x = y = (double)0.0;						\n\        for (it=0;it<maxIt;it++)					\n\        {								\n\          double x2 = x*x;						\n\          double y2 = y*y;						\n\          if (x2+y2 > 4.0) break;					\n\          double twoxy = (double)2.0*x*y;				\n\          x = x2 - y2 + xc;						\n\          y = twoxy + yc;						\n\        }								\n\       unsigned int v = MIN((1024*((float)(it)/(2000))), 256);		\n\       a[id_x + width * id_y] = (v<<16|(255-v)<<8);			\n\   }									\n\}";static struct starpu_opencl_program opencl_programs;static void compute_block_opencl(void *descr[], void *cl_arg){	int iby, block_size;	double stepX, stepY;	int *pcnt; /* unused for CUDA tasks */	starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);	cl_mem data = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]);	cl_kernel kernel;	cl_command_queue queue;	cl_event event;	int id = starpu_worker_get_id();	int devid = starpu_worker_get_devid(id);	starpu_opencl_load_kernel(&kernel, &queue, &opencl_programs, "mandelbrot_kernel", devid);	clSetKernelArg(kernel, 0, sizeof(data), &data);	clSetKernelArg(kernel, 1, sizeof(leftX), &leftX);	clSetKernelArg(kernel, 2, sizeof(topY), &topY);	clSetKernelArg(kernel, 3, sizeof(stepX), &stepX);	clSetKernelArg(kernel, 4, sizeof(stepY), &stepY);	clSetKernelArg(kernel, 5, sizeof(maxIt), &maxIt);	clSetKernelArg(kernel, 6, sizeof(iby), &iby);	clSetKernelArg(kernel, 7, sizeof(block_size), &block_size);	clSetKernelArg(kernel, 8, sizeof(width), &width);	unsigned dim = 16;	size_t local[2] = {dim, 1};	size_t global[2] = {width, block_size};	clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, local, 0, NULL, NULL);	starpu_opencl_release_kernel(kernel);}#endif/* *	CPU kernel */static void compute_block(void *descr[], void *cl_arg){	int ix, iy;	int iby, block_size;	double stepX, stepY;	int *pcnt; /* unused for sequential tasks */	starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);	unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);	int local_iy;	for (local_iy = 0; local_iy < block_size; local_iy++)	{		iy = iby*block_size + local_iy;		for (ix = 0; ix < width; ix++)		{			double cx = leftX + ix * stepX;			double cy = topY - iy * stepY;			/* Z = X+I*Y */			double x = 0;			double y = 0;			int it;			for (it = 0; it < maxIt; it++)			{				double x2 = x*x;				double y2 = y*y;				/* Stop iterations when |Z| > 2 */				if (x2 + y2 > 4.0)					break;				double twoxy = 2.0*x*y;				/* Z = Z^2 + C */				x = x2 - y2 + cx;				y = twoxy + cy;			}			unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256);			data[ix + local_iy*width] = (v<<16|(255-v)<<8);		}	}}static void compute_block_spmd(void *descr[], void *cl_arg){	int iby, block_size;	double stepX, stepY;	int *pcnt;	starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);	unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);	int ix, iy; /* global coordinates */	int local_iy; /* current line */	while (1)	{		local_iy = STARPU_ATOMIC_ADD((unsigned int *)pcnt, 1) - 1;		if (local_iy >= block_size)			break;		iy = iby*block_size + local_iy;		for (ix = 0; ix < width; ix++)		{			double cx = leftX + ix * stepX;			double cy = topY - iy * stepY;			/* Z = X+I*Y */			double x = 0;			double y = 0;			int it;			for (it = 0; it < maxIt; it++)			{				double x2 = x*x;				double y2 = y*y;				/* Stop iterations when |Z| > 2 */				if (x2 + y2 > 4.0)					break;				double twoxy = 2.0*x*y;				/* Z = Z^2 + C */				x = x2 - y2 + cx;				y = twoxy + cy;			}			unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256);			data[ix + local_iy*width] = (v<<16|(255-v)<<8);		}	}}static struct starpu_codelet spmd_mandelbrot_cl ={	.type = STARPU_SPMD,	.max_parallelism = INT_MAX,	.cpu_funcs = {compute_block_spmd},#ifdef STARPU_USE_OPENCL	.opencl_funcs = {compute_block_opencl},	.opencl_flags = {STARPU_OPENCL_ASYNC},#endif	.nbuffers = 1};static struct starpu_codelet mandelbrot_cl ={	.type = STARPU_SEQ,	.cpu_funcs = {compute_block},#ifdef STARPU_USE_OPENCL	.opencl_funcs = {compute_block_opencl},	.opencl_flags = {STARPU_OPENCL_ASYNC},#endif	.nbuffers = 1};static void parse_args(int argc, char **argv){	int i;	for (i = 1; i < argc; i++)	{		if (strcmp(argv[i], "-h") == 0)		{			fprintf(stderr, "Usage: %s [-h] [ -width 800] [-height 600] [-nblocks 16] [-no-x11] [-pos leftx:rightx:bottomy:topy] [-niter 1000] [-spmd] [-demo] [-demozoom 0.2]\n", argv[0]);			exit(-1);		}		if (strcmp(argv[i], "-width") == 0)		{			char *argptr;			width = strtol(argv[++i], &argptr, 10);		}		if (strcmp(argv[i], "-height") == 0)		{			char *argptr;			height = strtol(argv[++i], &argptr, 10);		}		if (strcmp(argv[i], "-nblocks") == 0)		{			char *argptr;			nblocks = strtol(argv[++i], &argptr, 10);		}		if (strcmp(argv[i], "-niter") == 0)		{			char *argptr;			niter = strtol(argv[++i], &argptr, 10);		}		if (strcmp(argv[i], "-pos") == 0)		{			int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY);			assert(ret == 4);		}		if (strcmp(argv[i], "-demo") == 0)		{			demo = 1;			leftX = -50.22749575062760;			rightX = 48.73874621262927;			topY = -49.35016705749115;			bottomY = 49.64891691946615;		}		if (strcmp(argv[i], "-demozoom") == 0)		{			char *argptr;			demozoom = strtof(argv[++i], &argptr);		}		if (strcmp(argv[i], "-no-x11") == 0)		{#ifdef STARPU_HAVE_X11			use_x11 = 0;#endif		}		if (strcmp(argv[i], "-spmd") == 0)		{			use_spmd = 1;		}	}}int main(int argc, char **argv){	int ret;	parse_args(argc, argv);	/* We don't use CUDA in that example */	struct starpu_conf conf;	starpu_conf_init(&conf);	conf.ncuda = 0;	if (use_spmd)		conf.sched_policy_name = "peager";	ret = starpu_init(&conf);	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");	unsigned *buffer;	starpu_malloc((void **)&buffer, height*width*sizeof(unsigned));#ifdef STARPU_HAVE_X11	if (use_x11)		init_x11(width, height, buffer);#endif	int block_size = height/nblocks;	STARPU_ASSERT((height % nblocks) == 0);#ifdef STARPU_USE_OPENCL	starpu_opencl_load_opencl_from_string(mandelbrot_opencl_src, &opencl_programs, NULL);#endif	starpu_data_handle_t block_handles[nblocks];	int iby;	for (iby = 0; iby < nblocks; iby++)	{		unsigned *data = &buffer[iby*block_size*width];		starpu_vector_data_register(&block_handles[iby], STARPU_MAIN_RAM,                        (uintptr_t)data, block_size*width, sizeof(unsigned));	}	unsigned iter = 0;	double start, end;	start = starpu_timing_now();	while (niter-- != 0)	{		double stepX = (rightX - leftX)/width;		double stepY = (topY - bottomY)/height;		/* In case we have a SPMD task, each worker will grab tasks in		 * a greedy and select which piece of image to compute by		 * incrementing a counter shared by all the workers within the		 * parallel task. */		int per_block_cnt[nblocks];		for (iby = 0; iby < nblocks; iby++)		{			per_block_cnt[iby] = 0;			int *pcnt = &per_block_cnt[iby];			ret = starpu_task_insert(use_spmd?&spmd_mandelbrot_cl:&mandelbrot_cl,						 STARPU_VALUE, &iby, sizeof(iby),						 STARPU_VALUE, &block_size, sizeof(block_size),						 STARPU_VALUE, &stepX, sizeof(stepX),						 STARPU_VALUE, &stepY, sizeof(stepY),						 STARPU_W, block_handles[iby],						 STARPU_VALUE, &pcnt, sizeof(int *),						 STARPU_TAG_ONLY, (starpu_tag_t) (niter*nblocks + iby),						 0);			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");		}		for (iby = 0; iby < nblocks; iby++)		{#ifdef STARPU_HAVE_X11			if (use_x11)			{				starpu_data_acquire(block_handles[iby], STARPU_R);				XPutImage(dpy, win, gc, bitmap,					0, iby*block_size,					0, iby*block_size,					width, block_size);				starpu_data_release(block_handles[iby]);			}#endif		}		if (demo)		{			/* Zoom in */			double zoom_factor = demozoom;			double widthX = rightX - leftX;			double heightY = topY - bottomY;			iter++;			/* If the window is too small, we reset the demo and display some statistics */			if ((fabs(widthX) < 1e-12) || (fabs(heightY) < 1e-12))			{				leftX = -50.22749575062760;				rightX = 48.73874621262927;				topY = -49.35016705749115;				bottomY = 49.64891691946615;				end = starpu_timing_now();				double timing = end - start;				fprintf(stderr, "Time to generate %u frames : %f s\n", iter, timing/1000000.0);				fprintf(stderr, "Average FPS: %f\n", ((double)iter*1e+6)/timing);				/* Reset counters */				iter = 0;				start = starpu_timing_now();			}			else			{				leftX += (zoom_factor/2)*widthX;				rightX -= (zoom_factor/2)*widthX;				topY -= (zoom_factor/2)*heightY;				bottomY += (zoom_factor/2)*heightY;			}		}#ifdef STARPU_HAVE_X11		else if (use_x11 && handle_events())			break;#endif	}#ifdef STARPU_HAVE_X11	if (use_x11)		exit_x11();#endif	for (iby = 0; iby < nblocks; iby++)		starpu_data_unregister(block_handles[iby]);/*	starpu_data_free_pinned_if_possible(buffer); */	starpu_shutdown();	return 0;}
 |