| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340 | 
/* Original version in Hoard Memory Allocator v2.1.2d * * This is a UNIX port of the latest version of the benchmark described * by Larson & Krishnan in "Memory Allocation for Long-Running Server * Applications", ISMM 1998. *  * To see how it scales, try the following parameters, where P = 1 and * then the number of processors on your system, for larson and * larson_hoard: *  * Multi-threaded test driver  * C++ version (new and delete) * runtime (sec): 30 * chunk size (min,max): 8 16  * threads (min, max):   P P * chunks/thread:  10000 * no of rounds:   10 * random seed:    1 */#include <pthread.h>#include <stdio.h>#include <sys/time.h>#include <string.h>#include <assert.h>#include <unistd.h>#include <dmmlib/dmmlib.h>#include "lran2.h"#define MAX_THREADS     100#define MAX_BLOCKS  1000000#ifndef BOOLEAN#define BOOLEANenum BOOLEAN { FALSE, TRUE };#endif /* BOOLEAN */typedef void * LPVOID;typedef unsigned long ULONG;typedef long long _int64;typedef void * VoidFunction (void *);typedef struct thr_data {    int threadno;    int NumBlocks;    long seed;    int min_size;    int max_size;    char **array;    long *blksize;    int asize;    int cAllocs;    int cFrees;    int cThreads;    int cBytesAlloced;    volatile int finished;    struct lran2_st rgen;} thread_data;int volatile stopflag = FALSE;int min_size = 10, max_size = 500;struct lran2_st rgen;char *blkp[MAX_BLOCKS];long blksize[MAX_BLOCKS];static void QueryPerformanceFrequency(long *x) {    *x = 1000000L;}static void QueryPerformanceCounter (long *x) {    struct timeval tv;    gettimeofday(&tv, NULL);    *x = tv.tv_sec * 1000000L + tv.tv_usec;}static void Sleep(long x) {    //  printf ("sleeping for %ld seconds.\n", x/1000);    sleep((unsigned int) (x/1000));}static void _beginthread(VoidFunction x, void * z) {    pthread_t pt;    pthread_attr_t pa;    pthread_attr_init (&pa);    //  printf ("creating a thread.\n");    pthread_create(&pt, &pa, x, z);}static void warmup(char **blkp, int num_chunks) {    int cblks;    long victim;    long blk_size;    LPVOID tmp;    for(cblks = 0; cblks < num_chunks; cblks++) {        blk_size = min_size + lran2(&rgen) % (max_size - min_size);        blkp[cblks] = (char *) malloc((size_t) blk_size);        blksize[cblks] = blk_size;        assert(blkp[cblks] != NULL);    }    /* generate a random permutation of the chunks */    for(cblks = num_chunks; cblks > 0 ; cblks--) {        victim = lran2(&rgen) % cblks;        tmp = blkp[victim];        blkp[victim]  = blkp[cblks-1];        blkp[cblks-1] = (char *) tmp;    }    for(cblks=0; cblks < 4 * num_chunks; cblks++) {        victim = lran2(&rgen) % num_chunks;        free(blkp[victim]);        blk_size = min_size + lran2(&rgen) % (max_size - min_size);        blkp[victim] = (char *) malloc((size_t) blk_size);        blksize[victim] = blk_size;        assert(blkp[victim] != NULL);    }}static void * exercise_heap( void *pinput) {    thread_data  *pdea;    int           cblks = 0;    long          victim;    long          blk_size;    int           range;    if( stopflag ) return 0;    pdea = (thread_data *) pinput;    pdea->finished = FALSE;    pdea->cThreads++;    range = pdea->max_size - pdea->min_size;    /* allocate NumBlocks chunks of random size */    for(cblks=0; cblks < pdea->NumBlocks; cblks++) {        victim = lran2(&pdea->rgen)%pdea->asize;        free(pdea->array[victim]);        pdea->cFrees++;        blk_size = pdea->min_size+lran2(&pdea->rgen)%range;        pdea->array[victim] = (char *) malloc((size_t) blk_size);        pdea->blksize[victim] = blk_size;        assert(pdea->array[victim] != NULL);        pdea->cAllocs++;        /* Write something! */        volatile char * chptr = ((char *) pdea->array[victim]);        *chptr++ = 'a';        volatile char ch = *((char *) pdea->array[victim]);        *chptr = 'b';        if( stopflag ) break;    }    //  	printf("Thread %u terminating: %d allocs, %d frees\n",    //		      pdea->threadno, pdea->cAllocs, pdea->cFrees) ;    pdea->finished = TRUE;    if( !stopflag ) {        _beginthread(exercise_heap, pdea);    }    return 0;}static void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds) {    thread_data de_area[MAX_THREADS];    thread_data *pdea;    long ticks_per_sec;    int prevthreads;    int num_threads;    int nperthread;    int sum_threads;    int sum_allocs;    int sum_frees;    int i;    long start_cnt, end_cnt;    _int64 ticks;    double duration ;    double rate_1 = 0, rate_n;    size_t reqd_space;    size_t used_space;	    QueryPerformanceFrequency( &ticks_per_sec );    pdea = &de_area[0];    memset(&de_area[0], 0, sizeof(thread_data));	    prevthreads = 0 ;    for(num_threads=min_threads; num_threads <= max_threads; num_threads++) {        warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread );        nperthread = chperthread ;        stopflag   = FALSE ;        for(i = 0; i < num_threads; i++) {            de_area[i].threadno    = i+1 ;            de_area[i].NumBlocks   = num_rounds*nperthread;            de_area[i].array       = &blkp[i*nperthread];            de_area[i].blksize     = &blksize[i*nperthread];            de_area[i].asize       = nperthread;            de_area[i].min_size    = min_size;            de_area[i].max_size    = max_size;            de_area[i].seed        = lran2(&rgen);            de_area[i].finished    = 0;            de_area[i].cAllocs     = 0;            de_area[i].cFrees      = 0;            de_area[i].cThreads    = 0;            de_area[i].finished    = FALSE;            lran2_init(&de_area[i].rgen, de_area[i].seed);            _beginthread(exercise_heap, &de_area[i]);        }        QueryPerformanceCounter( &start_cnt );        printf ("Sleeping for %ld seconds.\n", sleep_cnt);        Sleep(sleep_cnt * 1000L) ;        stopflag = TRUE ;        for(i = 0; i < num_threads; i++) {            while( !de_area[i].finished ) {                sched_yield();            }        }        QueryPerformanceCounter( &end_cnt );        sum_frees = sum_allocs =0  ;        sum_threads = 0 ;        for(i=0;i< num_threads; i++){            sum_allocs    += de_area[i].cAllocs ;            sum_frees     += de_area[i].cFrees ;            sum_threads   += de_area[i].cThreads ;            de_area[i].cAllocs = de_area[i].cFrees = 0;        }        ticks = end_cnt - start_cnt ;        duration = (double)(ticks/ticks_per_sec);        for(i = 0; i < num_threads; i++) {            if( !de_area[i].finished ) {                printf("Thread at %d not finished\n", i);            }        }        rate_n = sum_allocs/duration ;        if( rate_1 == 0){            rate_1 = rate_n ;        }        //reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ;        //used_space = CountReservedSpace() - init_space;        // FIXME Currently only one heap is used in the example        /* used_space = get_allocated_space(&systemallocator.heaps[0]); */        /* reqd_space = get_used_space(&systemallocator.heaps[0]); */        //used_space = 0;        printf(" Used space: %zu\n Requested space: %zu\n", used_space, reqd_space);        printf("%2d ", num_threads ) ;        printf("%6.3f", duration  ) ;        printf("%6.3f", rate_n/rate_1 );        printf("%8.0f", sum_allocs/duration);        /* printf(" %6.3f %.3f", (double)(used_space/(1024*1024)), (used_space/reqd_space)); */        printf("\n") ;        Sleep(5000L) ; // wait 5 sec for old threads to die        prevthreads = num_threads;    }}int main(void) {    long sleep_cnt;    int min_threads, max_threads;    int num_chunks = 10000;    int num_rounds;    int chperthread;    printf("Larson benchmark\n");    printf("runtime (sec): ") ;    //scanf ("%ld", &sleep_cnt);    sleep_cnt = 30;    printf("%ld\n", sleep_cnt);    printf("chunk size (min,max): ") ;    //scanf("%d %d", &min_size, &max_size ) ;    min_size = 32;    max_size = 64;    printf("%d %d\n", min_size, max_size);    printf("threads (min, max):   ") ;     //scanf("%d %d", &min_threads, &max_threads) ;    min_threads = 2;    max_threads = 2;    printf("%d %d\n", min_threads, max_threads);    pthread_setconcurrency(max_threads);    printf("chunks/thread:  ");    //scanf("%d", &chperthread );    chperthread = 10000;    printf("%d\n", chperthread);    num_chunks = max_threads * chperthread ;    if( num_chunks > MAX_BLOCKS ){        printf("Max %d chunks - exiting\n", MAX_BLOCKS ) ;        return 1;    }    printf("no of rounds:   ");    //scanf("%d", &num_rounds );    num_rounds = 10;    printf("%d\n", num_rounds);    runthreads(sleep_cnt, min_threads, max_threads, chperthread, num_rounds) ;    return 0;}
 |