|
@@ -0,0 +1,329 @@
|
|
|
+#include <pthread.h>
|
|
|
+#include <stdio.h>
|
|
|
+#include <sys/time.h>
|
|
|
+#include <string.h>
|
|
|
+#include <assert.h>
|
|
|
+#include <unistd.h>
|
|
|
+#include "custom_malloc.h"
|
|
|
+#include "custom_free.h"
|
|
|
+#include "other.h"
|
|
|
+#include "lran2.h"
|
|
|
+#include "dmm_init.h"
|
|
|
+
|
|
|
+#define MAX_THREADS 100
|
|
|
+#define MAX_BLOCKS 1000000
|
|
|
+
|
|
|
+#ifndef BOOLEAN
|
|
|
+#define BOOLEAN
|
|
|
+enum BOOLEAN { FALSE, TRUE };
|
|
|
+#endif /* BOOLEAN */
|
|
|
+
|
|
|
+typedef void * LPVOID;
|
|
|
+typedef unsigned long ULONG;
|
|
|
+typedef long long _int64;
|
|
|
+typedef void * VoidFunction (void *);
|
|
|
+
|
|
|
+typedef struct thr_data {
|
|
|
+
|
|
|
+ int threadno;
|
|
|
+ int NumBlocks;
|
|
|
+ long seed;
|
|
|
+
|
|
|
+ int min_size;
|
|
|
+ int max_size;
|
|
|
+
|
|
|
+ char **array;
|
|
|
+ int *blksize;
|
|
|
+ int asize;
|
|
|
+
|
|
|
+ int cAllocs;
|
|
|
+ int cFrees;
|
|
|
+ int cThreads;
|
|
|
+ int cBytesAlloced;
|
|
|
+
|
|
|
+ volatile int finished;
|
|
|
+ struct lran2_st rgen;
|
|
|
+
|
|
|
+} thread_data;
|
|
|
+
|
|
|
+allocator_t *myallocator;
|
|
|
+
|
|
|
+int volatile stopflag = FALSE;
|
|
|
+int min_size = 10, max_size = 500;
|
|
|
+struct lran2_st rgen;
|
|
|
+char *blkp[MAX_BLOCKS];
|
|
|
+int blksize[MAX_BLOCKS];
|
|
|
+
|
|
|
+static void QueryPerformanceFrequency(long *x) {
|
|
|
+ *x = 1000000L;
|
|
|
+}
|
|
|
+
|
|
|
+static void QueryPerformanceCounter (long *x) {
|
|
|
+ struct timezone tz;
|
|
|
+ struct timeval tv;
|
|
|
+ gettimeofday(&tv, &tz);
|
|
|
+ *x = tv.tv_sec * 1000000L + tv.tv_usec;
|
|
|
+}
|
|
|
+
|
|
|
+static void Sleep(long x) {
|
|
|
+ // printf ("sleeping for %ld seconds.\n", x/1000);
|
|
|
+ sleep((unsigned int) (x/1000));
|
|
|
+}
|
|
|
+
|
|
|
+static void _beginthread(VoidFunction x, void * z) {
|
|
|
+ pthread_t pt;
|
|
|
+ pthread_attr_t pa;
|
|
|
+ pthread_attr_init (&pa);
|
|
|
+
|
|
|
+ // printf ("creating a thread.\n");
|
|
|
+ pthread_create(&pt, &pa, x, z);
|
|
|
+}
|
|
|
+
|
|
|
+static void warmup(char **blkp, int num_chunks) {
|
|
|
+ int cblks;
|
|
|
+ int victim;
|
|
|
+ int blk_size;
|
|
|
+ LPVOID tmp;
|
|
|
+
|
|
|
+ heap_t *myheap;
|
|
|
+ int heap_id;
|
|
|
+
|
|
|
+ heap_id = map_thread_heap();
|
|
|
+ myheap = &myallocator->heaps[heap_id];
|
|
|
+
|
|
|
+ for(cblks = 0; cblks < num_chunks; cblks++) {
|
|
|
+ blk_size = min_size + lran2(&rgen) % (max_size - min_size);
|
|
|
+ blkp[cblks] = (char *) custom_malloc(myheap, (size_t) blk_size);
|
|
|
+ blksize[cblks] = blk_size;
|
|
|
+ assert(blkp[cblks] != NULL);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* generate a random permutation of the chunks */
|
|
|
+ for(cblks = num_chunks; cblks > 0 ; cblks--) {
|
|
|
+ victim = lran2(&rgen) % cblks;
|
|
|
+ tmp = blkp[victim];
|
|
|
+ blkp[victim] = blkp[cblks-1];
|
|
|
+ blkp[cblks-1] = (char *) tmp;
|
|
|
+ }
|
|
|
+
|
|
|
+ for(cblks=0; cblks < 4 * num_chunks; cblks++) {
|
|
|
+ victim = lran2(&rgen) % num_chunks;
|
|
|
+ custom_free(myheap, blkp[victim]);
|
|
|
+
|
|
|
+ blk_size = min_size + lran2(&rgen) % (max_size - min_size);
|
|
|
+ blkp[victim] = (char *) custom_malloc(myheap, (size_t) blk_size);
|
|
|
+ blksize[victim] = blk_size;
|
|
|
+ assert(blkp[victim] != NULL);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void * exercise_heap( void *pinput) {
|
|
|
+ thread_data *pdea;
|
|
|
+ int cblks = 0;
|
|
|
+ int victim;
|
|
|
+ long blk_size;
|
|
|
+ int range;
|
|
|
+
|
|
|
+ heap_t *myheap;
|
|
|
+ int heap_id;
|
|
|
+
|
|
|
+ heap_id = map_thread_heap();
|
|
|
+ myheap = &myallocator->heaps[heap_id];
|
|
|
+
|
|
|
+ if( stopflag ) return 0;
|
|
|
+
|
|
|
+ pdea = (thread_data *) pinput;
|
|
|
+ pdea->finished = FALSE;
|
|
|
+ pdea->cThreads++;
|
|
|
+ range = pdea->max_size - pdea->min_size;
|
|
|
+
|
|
|
+ /* allocate NumBlocks chunks of random size */
|
|
|
+ for(cblks=0; cblks < pdea->NumBlocks; cblks++) {
|
|
|
+ victim = lran2(&pdea->rgen)%pdea->asize;
|
|
|
+ custom_free(myheap, pdea->array[victim]);
|
|
|
+ pdea->cFrees++;
|
|
|
+
|
|
|
+ blk_size = pdea->min_size+lran2(&pdea->rgen)%range;
|
|
|
+ pdea->array[victim] = (char *) custom_malloc(myheap, (size_t) blk_size);
|
|
|
+
|
|
|
+ pdea->blksize[victim] = blk_size;
|
|
|
+ assert(pdea->array[victim] != NULL);
|
|
|
+
|
|
|
+ pdea->cAllocs++;
|
|
|
+
|
|
|
+ /* Write something! */
|
|
|
+
|
|
|
+ volatile char * chptr = ((char *) pdea->array[victim]);
|
|
|
+ *chptr++ = 'a';
|
|
|
+ volatile char ch = *((char *) pdea->array[victim]);
|
|
|
+ *chptr = 'b';
|
|
|
+
|
|
|
+
|
|
|
+ if( stopflag ) break;
|
|
|
+ }
|
|
|
+
|
|
|
+ // printf("Thread %u terminating: %d allocs, %d frees\n",
|
|
|
+ // pdea->threadno, pdea->cAllocs, pdea->cFrees) ;
|
|
|
+ pdea->finished = TRUE;
|
|
|
+
|
|
|
+ if( !stopflag ) {
|
|
|
+ _beginthread(exercise_heap, pdea);
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+static void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds) {
|
|
|
+ thread_data de_area[MAX_THREADS];
|
|
|
+ thread_data *pdea;
|
|
|
+ long ticks_per_sec;
|
|
|
+ int prevthreads;
|
|
|
+ int num_threads;
|
|
|
+ int nperthread;
|
|
|
+ int sum_threads;
|
|
|
+ int sum_allocs;
|
|
|
+ int sum_frees;
|
|
|
+
|
|
|
+ int i;
|
|
|
+
|
|
|
+ long start_cnt, end_cnt;
|
|
|
+ _int64 ticks;
|
|
|
+ double duration ;
|
|
|
+
|
|
|
+ double rate_1 = 0, rate_n;
|
|
|
+ double reqd_space;
|
|
|
+ ULONG used_space;
|
|
|
+
|
|
|
+ QueryPerformanceFrequency( &ticks_per_sec );
|
|
|
+
|
|
|
+ pdea = &de_area[0];
|
|
|
+ memset(&de_area[0], 0, sizeof(thread_data));
|
|
|
+
|
|
|
+ prevthreads = 0 ;
|
|
|
+ for(num_threads=min_threads; num_threads <= max_threads; num_threads++) {
|
|
|
+
|
|
|
+ warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread );
|
|
|
+
|
|
|
+ nperthread = chperthread ;
|
|
|
+ stopflag = FALSE ;
|
|
|
+
|
|
|
+ for(i = 0; i < num_threads; i++) {
|
|
|
+ de_area[i].threadno = i+1 ;
|
|
|
+ de_area[i].NumBlocks = num_rounds*nperthread;
|
|
|
+ de_area[i].array = &blkp[i*nperthread];
|
|
|
+ de_area[i].blksize = &blksize[i*nperthread];
|
|
|
+ de_area[i].asize = nperthread;
|
|
|
+ de_area[i].min_size = min_size;
|
|
|
+ de_area[i].max_size = max_size;
|
|
|
+ de_area[i].seed = lran2(&rgen);
|
|
|
+ de_area[i].finished = 0;
|
|
|
+ de_area[i].cAllocs = 0;
|
|
|
+ de_area[i].cFrees = 0;
|
|
|
+ de_area[i].cThreads = 0;
|
|
|
+ de_area[i].finished = FALSE;
|
|
|
+ lran2_init(&de_area[i].rgen, de_area[i].seed);
|
|
|
+ _beginthread(exercise_heap, &de_area[i]);
|
|
|
+ }
|
|
|
+
|
|
|
+ QueryPerformanceCounter( &start_cnt );
|
|
|
+
|
|
|
+ printf ("Sleeping for %ld seconds.\n", sleep_cnt);
|
|
|
+ Sleep(sleep_cnt * 1000L) ;
|
|
|
+
|
|
|
+ stopflag = TRUE ;
|
|
|
+
|
|
|
+ for(i = 0; i < num_threads; i++) {
|
|
|
+ while( !de_area[i].finished ) {
|
|
|
+ sched_yield();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ QueryPerformanceCounter( &end_cnt );
|
|
|
+
|
|
|
+ sum_frees = sum_allocs =0 ;
|
|
|
+ sum_threads = 0 ;
|
|
|
+ for(i=0;i< num_threads; i++){
|
|
|
+ sum_allocs += de_area[i].cAllocs ;
|
|
|
+ sum_frees += de_area[i].cFrees ;
|
|
|
+ sum_threads += de_area[i].cThreads ;
|
|
|
+ de_area[i].cAllocs = de_area[i].cFrees = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ ticks = end_cnt - start_cnt ;
|
|
|
+ duration = (double)ticks/ticks_per_sec ;
|
|
|
+
|
|
|
+ for(i = 0; i < num_threads; i++) {
|
|
|
+ if( !de_area[i].finished ) {
|
|
|
+ printf("Thread at %d not finished\n", i);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rate_n = sum_allocs/duration ;
|
|
|
+ if( rate_1 == 0){
|
|
|
+ rate_1 = rate_n ;
|
|
|
+ }
|
|
|
+ reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ;
|
|
|
+ // used_space = CountReservedSpace() - init_space;
|
|
|
+ used_space = 0;
|
|
|
+
|
|
|
+ printf("%2d ", num_threads ) ;
|
|
|
+ printf("%6.3f", duration ) ;
|
|
|
+ printf("%6.3f", rate_n/rate_1 ) ;
|
|
|
+ printf("%8.0f", sum_allocs/duration ) ;
|
|
|
+ printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ;
|
|
|
+ printf("\n") ;
|
|
|
+
|
|
|
+ Sleep(5000L) ; // wait 5 sec for old threads to die
|
|
|
+
|
|
|
+ prevthreads = num_threads;
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+int main(void) {
|
|
|
+ long sleep_cnt;
|
|
|
+ int min_threads, max_threads;
|
|
|
+ int num_chunks = 10000;
|
|
|
+ int num_rounds;
|
|
|
+ int chperthread;
|
|
|
+
|
|
|
+ myallocator = dmm_init();
|
|
|
+
|
|
|
+ printf("Larson benchmark\n");
|
|
|
+
|
|
|
+ printf("runtime (sec): ") ;
|
|
|
+ //scanf ("%ld", &sleep_cnt);
|
|
|
+ sleep_cnt = 10;
|
|
|
+
|
|
|
+ printf("chunk size (min,max): ") ;
|
|
|
+ //scanf("%d %d", &min_size, &max_size ) ;
|
|
|
+ min_size = 32;
|
|
|
+ max_size = 256;
|
|
|
+
|
|
|
+ printf("threads (min, max): ") ;
|
|
|
+ //scanf("%d %d", &min_threads, &max_threads) ;
|
|
|
+ min_threads = 1;
|
|
|
+ max_threads = 1;
|
|
|
+
|
|
|
+ pthread_setconcurrency(max_threads);
|
|
|
+
|
|
|
+ printf("chunks/thread: ");
|
|
|
+ //scanf("%d", &chperthread );
|
|
|
+ chperthread = 1;
|
|
|
+
|
|
|
+ num_chunks = max_threads * chperthread ;
|
|
|
+ if( num_chunks > MAX_BLOCKS ){
|
|
|
+ printf("Max %d chunks - exiting\n", MAX_BLOCKS ) ;
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ printf("no of rounds: ");
|
|
|
+ //scanf("%d", &num_rounds );
|
|
|
+ num_rounds = 1;
|
|
|
+
|
|
|
+ runthreads(sleep_cnt, min_threads, max_threads, chperthread, num_rounds) ;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|