/* Original version in Hoard Memory Allocator v2.1.2d * * This is a UNIX port of the latest version of the benchmark described * by Larson & Krishnan in "Memory Allocation for Long-Running Server * Applications", ISMM 1998. * * To see how it scales, try the following parameters, where P = 1 and * then the number of processors on your system, for larson and * larson_hoard: * * Multi-threaded test driver * C++ version (new and delete) * runtime (sec): 30 * chunk size (min,max): 8 16 * threads (min, max): P P * chunks/thread: 10000 * no of rounds: 10 * random seed: 1 */ #include #include #include #include #include #include #include #include #include #include "lran2.h" #define MAX_THREADS 100 #define MAX_BLOCKS 1000000 #ifndef BOOLEAN #define BOOLEAN enum BOOLEAN { FALSE, TRUE }; #endif /* BOOLEAN */ typedef void * LPVOID; typedef unsigned long ULONG; typedef long long _int64; typedef void * VoidFunction (void *); typedef struct thr_data { int threadno; int NumBlocks; long seed; int min_size; int max_size; char **array; long *blksize; int asize; int cAllocs; int cFrees; int cThreads; int cBytesAlloced; volatile int finished; struct lran2_st rgen; } thread_data; int volatile stopflag = FALSE; int min_size = 10, max_size = 500; struct lran2_st rgen; char *blkp[MAX_BLOCKS]; long blksize[MAX_BLOCKS]; static void QueryPerformanceFrequency(long *x) { *x = 1000000L; } static void QueryPerformanceCounter (long *x) { struct timeval tv; gettimeofday(&tv, NULL); *x = tv.tv_sec * 1000000L + tv.tv_usec; } static void Sleep(long x) { // printf ("sleeping for %ld seconds.\n", x/1000); sleep((unsigned int) (x/1000)); } static void _beginthread(VoidFunction x, void * z) { pthread_t pt; pthread_attr_t pa; pthread_attr_init (&pa); // printf ("creating a thread.\n"); pthread_create(&pt, &pa, x, z); } static void warmup(char **blkp, int num_chunks) { int cblks; long victim; long blk_size; LPVOID tmp; for(cblks = 0; cblks < num_chunks; cblks++) { blk_size = min_size + lran2(&rgen) % (max_size - min_size); blkp[cblks] = (char *) malloc((size_t) blk_size); blksize[cblks] = blk_size; assert(blkp[cblks] != NULL); } /* generate a random permutation of the chunks */ for(cblks = num_chunks; cblks > 0 ; cblks--) { victim = lran2(&rgen) % cblks; tmp = blkp[victim]; blkp[victim] = blkp[cblks-1]; blkp[cblks-1] = (char *) tmp; } for(cblks=0; cblks < 4 * num_chunks; cblks++) { victim = lran2(&rgen) % num_chunks; free(blkp[victim]); blk_size = min_size + lran2(&rgen) % (max_size - min_size); blkp[victim] = (char *) malloc((size_t) blk_size); blksize[victim] = blk_size; assert(blkp[victim] != NULL); } } static void * exercise_heap( void *pinput) { thread_data *pdea; int cblks = 0; long victim; long blk_size; int range; if( stopflag ) return 0; pdea = (thread_data *) pinput; pdea->finished = FALSE; pdea->cThreads++; range = pdea->max_size - pdea->min_size; /* allocate NumBlocks chunks of random size */ for(cblks=0; cblks < pdea->NumBlocks; cblks++) { victim = lran2(&pdea->rgen)%pdea->asize; free(pdea->array[victim]); pdea->cFrees++; blk_size = pdea->min_size+lran2(&pdea->rgen)%range; pdea->array[victim] = (char *) malloc((size_t) blk_size); pdea->blksize[victim] = blk_size; assert(pdea->array[victim] != NULL); pdea->cAllocs++; /* Write something! */ volatile char * chptr = ((char *) pdea->array[victim]); *chptr++ = 'a'; volatile char ch = *((char *) pdea->array[victim]); *chptr = 'b'; if( stopflag ) break; } // printf("Thread %u terminating: %d allocs, %d frees\n", // pdea->threadno, pdea->cAllocs, pdea->cFrees) ; pdea->finished = TRUE; if( !stopflag ) { _beginthread(exercise_heap, pdea); } return 0; } static void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds) { thread_data de_area[MAX_THREADS]; thread_data *pdea; long ticks_per_sec; int prevthreads; int num_threads; int nperthread; int sum_threads; int sum_allocs; int sum_frees; int i; long start_cnt, end_cnt; _int64 ticks; double duration ; double rate_1 = 0, rate_n; size_t reqd_space; size_t used_space; QueryPerformanceFrequency( &ticks_per_sec ); pdea = &de_area[0]; memset(&de_area[0], 0, sizeof(thread_data)); prevthreads = 0 ; for(num_threads=min_threads; num_threads <= max_threads; num_threads++) { warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread ); nperthread = chperthread ; stopflag = FALSE ; for(i = 0; i < num_threads; i++) { de_area[i].threadno = i+1 ; de_area[i].NumBlocks = num_rounds*nperthread; de_area[i].array = &blkp[i*nperthread]; de_area[i].blksize = &blksize[i*nperthread]; de_area[i].asize = nperthread; de_area[i].min_size = min_size; de_area[i].max_size = max_size; de_area[i].seed = lran2(&rgen); de_area[i].finished = 0; de_area[i].cAllocs = 0; de_area[i].cFrees = 0; de_area[i].cThreads = 0; de_area[i].finished = FALSE; lran2_init(&de_area[i].rgen, de_area[i].seed); _beginthread(exercise_heap, &de_area[i]); } QueryPerformanceCounter( &start_cnt ); printf ("Sleeping for %ld seconds.\n", sleep_cnt); Sleep(sleep_cnt * 1000L) ; stopflag = TRUE ; for(i = 0; i < num_threads; i++) { while( !de_area[i].finished ) { sched_yield(); } } QueryPerformanceCounter( &end_cnt ); sum_frees = sum_allocs =0 ; sum_threads = 0 ; for(i=0;i< num_threads; i++){ sum_allocs += de_area[i].cAllocs ; sum_frees += de_area[i].cFrees ; sum_threads += de_area[i].cThreads ; de_area[i].cAllocs = de_area[i].cFrees = 0; } ticks = end_cnt - start_cnt ; duration = (double)(ticks/ticks_per_sec); for(i = 0; i < num_threads; i++) { if( !de_area[i].finished ) { printf("Thread at %d not finished\n", i); } } rate_n = sum_allocs/duration ; if( rate_1 == 0){ rate_1 = rate_n ; } //reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ; //used_space = CountReservedSpace() - init_space; // FIXME Currently only one heap is used in the example used_space = get_allocated_space(&systemallocator.heaps[0]); reqd_space = get_used_space(&systemallocator.heaps[0]); //used_space = 0; printf(" Used space: %zu\n Requested space: %zu\n", used_space, reqd_space); printf("%2d ", num_threads ) ; printf("%6.3f", duration ) ; printf("%6.3f", rate_n/rate_1 ); printf("%8.0f", sum_allocs/duration); printf(" %6.3f %.3f", (double)(used_space/(1024*1024)), (used_space/reqd_space)); printf("\n") ; Sleep(5000L) ; // wait 5 sec for old threads to die prevthreads = num_threads; } } int main(void) { long sleep_cnt; int min_threads, max_threads; int num_chunks = 10000; int num_rounds; int chperthread; printf("Larson benchmark\n"); printf("runtime (sec): ") ; //scanf ("%ld", &sleep_cnt); sleep_cnt = 30; printf("%ld\n", sleep_cnt); printf("chunk size (min,max): ") ; //scanf("%d %d", &min_size, &max_size ) ; min_size = 32; max_size = 768; printf("%d %d\n", min_size, max_size); printf("threads (min, max): ") ; //scanf("%d %d", &min_threads, &max_threads) ; min_threads = 1; max_threads = 4; printf("%d %d\n", min_threads, max_threads); pthread_setconcurrency(max_threads); printf("chunks/thread: "); //scanf("%d", &chperthread ); chperthread = 10000; printf("%d\n", chperthread); num_chunks = max_threads * chperthread ; if( num_chunks > MAX_BLOCKS ){ printf("Max %d chunks - exiting\n", MAX_BLOCKS ) ; return 1; } printf("no of rounds: "); //scanf("%d", &num_rounds ); num_rounds = 10; printf("%d\n", num_rounds); runthreads(sleep_cnt, min_threads, max_threads, chperthread, num_rounds) ; return 0; }