123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342 |
- /* Original version in Hoard Memory Allocator v2.1.2d
- *
- * This is a UNIX port of the latest version of the benchmark described
- * by Larson & Krishnan in "Memory Allocation for Long-Running Server
- * Applications", ISMM 1998.
- *
- * To see how it scales, try the following parameters, where P = 1 and
- * then the number of processors on your system, for larson and
- * larson_hoard:
- *
- * Multi-threaded test driver
- * C++ version (new and delete)
- * runtime (sec): 30
- * chunk size (min,max): 8 16
- * threads (min, max): P P
- * chunks/thread: 10000
- * no of rounds: 10
- * random seed: 1
- */
- #include <pthread.h>
- #include <stdio.h>
- #include <sys/time.h>
- #include <string.h>
- #include <assert.h>
- #include <unistd.h>
- /* #include <stdlib.h> */
- #include <dmmlib/dmmlib.h>
- /* #include <dmmlib/print_stats.h> */
- #include "lran2.h"
- #define MAX_THREADS 100
- #define MAX_BLOCKS 1000000
- #ifndef BOOLEAN
- #define BOOLEAN
- enum BOOLEAN { FALSE, TRUE };
- #endif /* BOOLEAN */
- typedef void * LPVOID;
- typedef unsigned long ULONG;
- typedef long long _int64;
- typedef void * VoidFunction (void *);
- typedef struct thr_data {
- int threadno;
- int NumBlocks;
- long seed;
- int min_size;
- int max_size;
- char **array;
- long *blksize;
- int asize;
- int cAllocs;
- int cFrees;
- int cThreads;
- int cBytesAlloced;
- volatile int finished;
- struct lran2_st rgen;
- } thread_data;
- int volatile stopflag = FALSE;
- int min_size = 10, max_size = 500;
- struct lran2_st rgen;
- char *blkp[MAX_BLOCKS];
- long blksize[MAX_BLOCKS];
- static void QueryPerformanceFrequency(long *x) {
- *x = 1000000L;
- }
- static void QueryPerformanceCounter (long *x) {
- struct timeval tv;
- gettimeofday(&tv, NULL);
- *x = tv.tv_sec * 1000000L + tv.tv_usec;
- }
- static void Sleep(long x) {
- // printf ("sleeping for %ld seconds.\n", x/1000);
- sleep((unsigned int) (x/1000));
- }
- static void _beginthread(VoidFunction x, void * z) {
- pthread_t pt;
- pthread_attr_t pa;
- pthread_attr_init (&pa);
- // printf ("creating a thread.\n");
- pthread_create(&pt, &pa, x, z);
- }
- static void warmup(char **blkp, int num_chunks) {
- int cblks;
- long victim;
- long blk_size;
- LPVOID tmp;
- for(cblks = 0; cblks < num_chunks; cblks++) {
- blk_size = min_size + lran2(&rgen) % (max_size - min_size);
- blkp[cblks] = (char *) dmmlib_malloc((size_t) blk_size);
- blksize[cblks] = blk_size;
- assert(blkp[cblks] != NULL);
- }
- /* generate a random permutation of the chunks */
- for(cblks = num_chunks; cblks > 0 ; cblks--) {
- victim = lran2(&rgen) % cblks;
- tmp = blkp[victim];
- blkp[victim] = blkp[cblks-1];
- blkp[cblks-1] = (char *) tmp;
- }
- for(cblks=0; cblks < 4 * num_chunks; cblks++) {
- victim = lran2(&rgen) % num_chunks;
- dmmlib_free(blkp[victim]);
- blk_size = min_size + lran2(&rgen) % (max_size - min_size);
- blkp[victim] = (char *) dmmlib_malloc((size_t) blk_size);
- blksize[victim] = blk_size;
- assert(blkp[victim] != NULL);
- }
- }
- static void * exercise_heap( void *pinput) {
- thread_data *pdea;
- int cblks = 0;
- long victim;
- long blk_size;
- int range;
- if( stopflag ) return 0;
- pdea = (thread_data *) pinput;
- pdea->finished = FALSE;
- pdea->cThreads++;
- range = pdea->max_size - pdea->min_size;
- /* allocate NumBlocks chunks of random size */
- for(cblks=0; cblks < pdea->NumBlocks; cblks++) {
- victim = lran2(&pdea->rgen)%pdea->asize;
- dmmlib_free(pdea->array[victim]);
- pdea->cFrees++;
- blk_size = pdea->min_size+lran2(&pdea->rgen)%range;
- pdea->array[victim] = (char *) dmmlib_malloc((size_t) blk_size);
- pdea->blksize[victim] = blk_size;
- assert(pdea->array[victim] != NULL);
- pdea->cAllocs++;
- /* Write something! */
- volatile char * chptr = ((char *) pdea->array[victim]);
- *chptr++ = 'a';
- volatile char ch = *((char *) pdea->array[victim]);
- *chptr = 'b';
- if( stopflag ) break;
- }
- // printf("Thread %u terminating: %d allocs, %d frees\n",
- // pdea->threadno, pdea->cAllocs, pdea->cFrees) ;
- pdea->finished = TRUE;
- if( !stopflag ) {
- _beginthread(exercise_heap, pdea);
- }
- return 0;
- }
- static void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds) {
- thread_data de_area[MAX_THREADS];
- thread_data *pdea;
- long ticks_per_sec;
- int prevthreads;
- int num_threads;
- int nperthread;
- int sum_threads;
- int sum_allocs;
- int sum_frees;
- int i;
- long start_cnt, end_cnt;
- _int64 ticks;
- double duration ;
- double rate_1 = 0, rate_n;
- size_t reqd_space;
- size_t used_space;
- QueryPerformanceFrequency( &ticks_per_sec );
- pdea = &de_area[0];
- memset(&de_area[0], 0, sizeof(thread_data));
- prevthreads = 0 ;
- for(num_threads=min_threads; num_threads <= max_threads; num_threads++) {
- warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread );
- nperthread = chperthread ;
- stopflag = FALSE ;
- for(i = 0; i < num_threads; i++) {
- de_area[i].threadno = i+1 ;
- de_area[i].NumBlocks = num_rounds*nperthread;
- de_area[i].array = &blkp[i*nperthread];
- de_area[i].blksize = &blksize[i*nperthread];
- de_area[i].asize = nperthread;
- de_area[i].min_size = min_size;
- de_area[i].max_size = max_size;
- de_area[i].seed = lran2(&rgen);
- de_area[i].finished = 0;
- de_area[i].cAllocs = 0;
- de_area[i].cFrees = 0;
- de_area[i].cThreads = 0;
- de_area[i].finished = FALSE;
- lran2_init(&de_area[i].rgen, de_area[i].seed);
- _beginthread(exercise_heap, &de_area[i]);
- }
- QueryPerformanceCounter( &start_cnt );
- printf ("Sleeping for %ld seconds.\n", sleep_cnt);
- Sleep(sleep_cnt * 1000L) ;
- stopflag = TRUE ;
- for(i = 0; i < num_threads; i++) {
- while( !de_area[i].finished ) {
- sched_yield();
- }
- }
- QueryPerformanceCounter( &end_cnt );
- sum_frees = sum_allocs =0 ;
- sum_threads = 0 ;
- for(i=0;i< num_threads; i++){
- sum_allocs += de_area[i].cAllocs ;
- sum_frees += de_area[i].cFrees ;
- sum_threads += de_area[i].cThreads ;
- de_area[i].cAllocs = de_area[i].cFrees = 0;
- }
- ticks = end_cnt - start_cnt ;
- duration = (double)(ticks/ticks_per_sec);
- for(i = 0; i < num_threads; i++) {
- if( !de_area[i].finished ) {
- printf("Thread at %d not finished\n", i);
- }
- }
- rate_n = sum_allocs/duration ;
- if( rate_1 == 0){
- rate_1 = rate_n ;
- }
- //reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ;
- //used_space = CountReservedSpace() - init_space;
- // FIXME Currently only one heap is used in the example
- /* used_space = get_allocated_space(&systemallocator.heaps[0]); */
- /* reqd_space = get_used_space(&systemallocator.heaps[0]); */
- //used_space = 0;
- printf(" Used space: %zu\n Requested space: %zu\n", used_space, reqd_space);
- printf("%2d ", num_threads ) ;
- printf("%6.3f", duration ) ;
- printf("%6.3f", rate_n/rate_1 );
- printf("%8.0f", sum_allocs/duration);
- /* printf(" %6.3f %.3f", (double)(used_space/(1024*1024)), (used_space/reqd_space)); */
- printf("\n") ;
- Sleep(5000L) ; // wait 5 sec for old threads to die
- prevthreads = num_threads;
- }
- }
- int main(void) {
- long sleep_cnt;
- int min_threads, max_threads;
- int num_chunks = 10000;
- int num_rounds;
- int chperthread;
- printf("Larson benchmark\n");
- printf("runtime (sec): ") ;
- //scanf ("%ld", &sleep_cnt);
- sleep_cnt = 30;
- printf("%ld\n", sleep_cnt);
- printf("chunk size (min,max): ") ;
- //scanf("%d %d", &min_size, &max_size ) ;
- min_size = 32;
- max_size = 64;
- printf("%d %d\n", min_size, max_size);
- printf("threads (min, max): ") ;
- //scanf("%d %d", &min_threads, &max_threads) ;
- min_threads = 2;
- max_threads = 2;
- printf("%d %d\n", min_threads, max_threads);
- pthread_setconcurrency(max_threads);
- printf("chunks/thread: ");
- //scanf("%d", &chperthread );
- chperthread = 10000;
- printf("%d\n", chperthread);
- num_chunks = max_threads * chperthread ;
- if( num_chunks > MAX_BLOCKS ){
- printf("Max %d chunks - exiting\n", MAX_BLOCKS ) ;
- return 1;
- }
- printf("no of rounds: ");
- //scanf("%d", &num_rounds );
- num_rounds = 10;
- printf("%d\n", num_rounds);
- runthreads(sleep_cnt, min_threads, max_threads, chperthread, num_rounds) ;
- return 0;
- }
|