まずは精度を測定するプログラムを示します。 CPU時間を測定するシステムコールの精度について、次に エラプス時間を測定するシステムコールの精度について、それぞれ 計るものです。前者の時間単位はティックです。10ミリ秒で1ティックです。 後者の時間単位はマイクロ秒です。
#include <sys/times.h> #include <unistd.h> #include <stdio.h> struct timeval { long tv_sec; /* seconds */ long tv_usec; /* microseconds */ }; struct timezone { int tz_minuteswest; /* minutes W of Greenwich */ int tz_dsttime; /* type of dst correction */ }; main( int argc, char **argv) { clock_t tmscur; struct tms tmsstart, tmscurrent; int timevalcur; struct timeval timevalstart; struct timeval timevalcurrent; struct timezone timezonestart, timezonecurrent; tmscur = times(&tmsstart); do { tmscur = times(&tmscurrent); } while( tmsstart.tms_utime == tmscurrent.tms_utime && tmsstart.tms_stime == tmscurrent.tms_stime ); printf("times : utime difference = %d\n", tmscurrent.tms_utime - tmsstart.tms_utime); printf("times : stime difference = %d\n", tmscurrent.tms_stime - tmsstart.tms_stime); timevalcur = gettimeofday(&timevalstart, &timezonestart); do { timevalcur = gettimeofday(&timevalcurrent, &timezonecurrent); } while ( timevalstart.tv_sec == timevalcurrent.tv_sec && timevalstart.tv_usec == timevalcurrent.tv_usec ); printf("gettimeofday : sec difference = %d\n", timevalcurrent.tv_sec - timevalstart.tv_sec ); printf("gettimeofday : usec difference = %d\n", timevalcurrent.tv_usec - timevalstart.tv_usec ); }まずはコンパイルして、
gcc -o time-precision time-precision.cこれを実行して見るとつぎのようになります。
% ./time-precision times : utime difference = 1 times : stime difference = 0 gettimeofday : sec difference = 0 gettimeofday : usec difference = 3どちらのシステムコールもその単位とほぼ同じ精度がでています。
次のプログラムは測定対象を複数回、測定を繰り返すことで 平均値、最小値、最大値、RMSを時間を求めるプログラムです。
#include <unistd.h> #include <math.h> double mean(double* data, int count, double *min, double *max); double rms(double* data, int count); double mean(double* data, int count, double *min, double *max) { double average = 0.0; int i; *min = data[0]; *max = data[0]; for( i = 0; i < count; i++) { average += data[i]; if(*min > data[i]) *min = data[i]; if(*max < data[i]) *max = data[i]; } average /= (double)count; return average; } double rms(double* data, int count) { double average, variance, min, max; double sum = 0.0; int i; average = mean(data,count, &min, &max); printf("average, min. and max. = %f, %f and %f\n", average, min, max); for( i = 0; i < count; i++) { variance = data[i]; variance -= average; sum += variance*variance; } sum /= (double)count; sum = sqrt(sum); return (sum); }この部分に測定したい測定対象を入れてください。
void measure(void); void measure(void) { int i; /* for(i = 0; i< 10; i++); for(i = 0; i< 100; i++); */ for(i = 0; i< 10000; i++); /* usleep(50000); */ } void result(double* data_time, int loop); void result(double* data_time, int loop) { printf("RMS of measured data is %f.\n", rms(data_time, loop)); } main() { unsigned long long start; int i, j, loop; double root_mean_square, data_time[10000]; loop = 100; for (j = 0; j < loop; j++) { start = microsecond(); measure(); data_time[j] =(double)( microsecond() - start); } result(data_time, loop); }まずはプログラムのコンパイルです。
gcc -o rmstest rmstest.c microsec.c -lmこのプログラムを実行するとつぎのようになります。
average, min. and max. = 455.030000, 435.000000 and 1048.000000 RMS of measured data is 63.478257.なお、このプログラムのコンパイルにひつようなmicrosec.cは 後にでて来ます。
#include <sys/times.h> #include <unistd.h> #include <stdio.h> void print_times( char *title, clock_t real, int loop, struct tms *start, struct tms *end ) { int clktck; float etime, cputime; clktck = sysconf(_SC_CLK_TCK); printf("%s\n", title); etime = (float)real / (double) clktck * 1000000. / (float)loop; cputime = (float)(end->tms_utime - start->tms_utime) + (float)(end->tms_stime - start->tms_stime); cputime = cputime / (double) clktck * 1000000. / (float)loop; printf(" elapsed time : %f microsec.\n", etime); printf(" cpu time : %f microsec.\n", cputime); } main( int argc, char **argv) { int i, loop; clock_t start, end; struct tms tmsstart, tmsend; if( argc < 2 ) loop = 1000000; else loop = atoi(argv[1]); start = times(&tmsstart); for(i = 0; i < loop; i++) { } end = times(&tmsend); print_times("Measurement", end - start, loop, &tmsstart, &tmsend); }
gettimeofday.c: #include <time.h> #include <sys/time.h> #include <unistd.h> #include <stdio.h> int e_time(struct timeval *first, struct timeval *second) { if (first->tv_usec > second->tv_usec) { second->tv_usec += 1000000; second->tv_sec--; } return( (int)(second->tv_sec - first->tv_sec)*1000000 + (int)(second->tv_usec - first->tv_usec)); } int main( int argc, char **argv) { int i, default_etime; struct timeval start, stop; struct timezone tzpstart, tzpstop; gettimeofday (&start, &tzpstart); gettimeofday (&stop, &tzpstop); default_etime = e_time( &start, &stop); gettimeofday (&start, &tzpstart);このforループを測定対象に置き換えてください。
for( i = 0; i < 1000; i++) { } gettimeofday (&stop, &tzpstop); printf("elapsed time of 'for( i = 0; i < 1000; i++) {}' = %d in microsec.\n", e_time( &start, &stop) - default_etime ); printf("overhead of gettimeofday = %d in microsec.\n", default_etime ); }これをコンパイルし、実行すると次のようになります。
% gcc -O -o gettimeofday gettimeofday.c % ./gettimeofday elapsed time of 'for( i = 0; i < 1000; i++) {}' = 6 in microsec. overhead of gettimeofday = 3 in microsec. % gcc -o gettimeofday gettimeofday.c % ./gettimeofday elapsed time of 'for( i = 0; i < 1000; i++) {}' = 13 in microsec. overhead of gettimeofday = 4 in microsec.
rdtsctest.c : #include <stdio.h> #include <stdlib.h> #include <unistd.h> /* * define the functions... */ unsigned long long int rdtsc(void) { unsigned long long int x; __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); return x; } main() { unsigned long long int a, b; unsigned long long int a1,b1; unsigned long long int a2,b2; unsigned long long int a3,b3; unsigned long long int a4,b4; int i; a = rdtsc(); a4 = rdtsc(); for(i = 0; i < 1000; i++); b4 = rdtsc(); a3 = rdtsc(); for(i = 0; i < 100; i++); b3 = rdtsc(); a2 = rdtsc(); for(i = 0; i < 10; i++); b2 = rdtsc(); a1 = rdtsc(); b1 = rdtsc(); printf("current clock is %x%x count in hex.\n",(int)(a>>32),(int)a ); printf("No instruction took %d count in dec.\n",(int)(b1-a1)); printf("for(i = 0; i < 10; i++) statement took %d count\n",(int)(b2-a2)); printf("for(i = 0; i < 100; i++) statement took %d count\n", (int)(b3-a3)); printf("for(i = 0; i < 1000; i++) statement took %d count\n",(int)(b4-a4)); a = rdtsc(); usleep(100000); b = rdtsc(); printf("usleep(100000) took %d count\n", (int)(b-a)); a = rdtsc(); usleep(1000); b = rdtsc(); printf("usleep(1000) took %d count\n", (int)(b-a)); }コンパイルし、実行すると次のようになります。
% gcc -o rdtsctest rdtsctest.c % ./rdtsctest current clock is 27f6e4759204 count in hex. No instruction took 49 count in dec. for(i = 0; i < 10; i++) statement took 126 count for(i = 0; i < 100; i++) statement took 486 count for(i = 0; i < 1000; i++) statement took 4086 count usleep(100000) took 26845482 count usleep(1000) took 5308115 count % gcc -O -o rdtsctest rdtsctest.c % ./rdtsctest current clock is 27fc346619cb count in hex. No instruction took 47 count in dec. for(i = 0; i < 10; i++) statement took 88 count for(i = 0; i < 100; i++) statement took 270 count for(i = 0; i < 1000; i++) statement took 2064 count usleep(100000) took 28856180 count usleep(1000) took 5310232 countこの計算機は266MHzのPentiumIIなので、rdtscの呼び出しによる オーバーヘッドは約200ナノ秒(47/266=0.177)ということになります。 ちなみに、gettimeofdayで計ったforループ(1000回繰り返し)は 6マイクロ秒となっていましたが、rdtscによる測定では 約7.7マイクロ秒となっています。計算機の実行環境は時間によって ある程度変わりますから、つじつまがあった結果と言えましょう。
#include <time.h> #include <sys/time.h> #include <unistd.h> #include <stdio.h> void get_time(struct timeval *timevalue) { struct timezone tzp; gettimeofday (timevalue, &tzp); } int e_time(struct timeval *first, struct timeval *second) { if (first->tv_usec > second->tv_usec) { second->tv_usec += 1000000; second->tv_sec--; } #ifdef DEBUG printf("elapsed time %d sec, %d microsec\n", second->tv_sec - first->tv_sec, second->tv_usec - first->tv_usec); #endif return( (int)(second->tv_sec - first->tv_sec)*1000000 + (int)(second->tv_usec - first->tv_usec)); } int main( int argc, char **argv) { struct timeval start, stop; struct timespec req, rem; int sleeptime; if(argc < 2) sleeptime = 1000; else sleeptime = atol(argv[1]); get_time(&start); usleep((unsigned long)sleeptime); get_time(&stop); printf("elapsed time for usleep(%d) = %d in microsec.\n", sleeptime, e_time( &start, &stop)); req.tv_sec = sleeptime/1000000; req.tv_nsec = (sleeptime%1000000)*1000; get_time(&start); nanosleep( &req, &rem); get_time(&stop); printf("elapsed time for nanosleep(%d) = %d in microsec.\n", sleeptime*1000, e_time( &start, &stop)); }コンパイルは
gcc -o precision precision.c実行するとつぎのようにでています。
% ./precision 1 elapsed time for usleep(1) = 14407 in microsec. elapsed time for nanosleep(1000) = 19725 in microsec. % ./precision 10000 elapsed time for usleep(10000) = 18102 in microsec. elapsed time for nanosleep(10000000) = 19589 in microsec. % ./precision 100000 elapsed time for usleep(100000) = 100986 in microsec. elapsed time for nanosleep(100000000) = 109612 in microsec.この結果、usleepはマイクロ秒の単位で、nanasleepはナノ秒の単位で、 入力できるが、その精度は10ミリ秒程度であることがわかります。
#include <stdio.h> #include <sys/types.h> #include <sys/errno.h> #include <sys/time.h> #include <unistd.h> #include "camlib.h" #define LOOP 100 #define SWREG 3 -1 extern int errno; /* Functions */ error_exit(status) int status; { perror((char *)strerror(status)); exit(status); } main( int argc, char **argv) { int i; int read_swreg, dumy, q, x; struct timeval timevalue0[LOOP], timevalue1[LOOP]; int elapsed[LOOP]; int average = 0; int overhead = 0; read_swreg = NAF(SWREG, 1, 0); if (CAMOPN()) { printf("CAMAC open error = %d", errno ); exit(1); } CSETCR(0); CGENZ(); CGENC(); CREMI(); for( i = 0; i < LOOP; i++ ) { get_time(&timevalue0[i]); get_time(&timevalue1[i]); overhead += e_time(&timevalue0[i], &timevalue1[i]); } overhead /= LOOP; for( i = 0; i < LOOP; i++) { get_time(&timevalue0[i]); CAMAC(read_swreg, &dumy, &q, &x); get_time(&timevalue1[i]); } for(i = 0; i < LOOP; i++ ) { elapsed[i] = e_time(&timevalue0[i], &timevalue1[i]); /* printf("elapsed time %d microsec\n", elapsed[i] ); */ average += elapsed[i] - overhead; } printf("CAMAC READ single action takes %d microsec.\n",average/LOOP); /* close CAMAC */ CAM_Close(); } get_time(timevalue) struct timeval *timevalue; { struct timezone tzp; gettimeofday (timevalue, &tzp); } int e_time(first, second) struct timeval *first, *second; { if (first->tv_usec > second->tv_usec) { second->tv_usec += 1000000; second->tv_sec--; } return( (int)(second->tv_sec - first->tv_sec)*1000000 + (int)(second->tv_usec - first->tv_usec)); }上記のプログラムを走らせると、つぎのようになる。
% ./camsingle_measure CAMAC READ single action takes 9 microsec.
#include <stdio.h> #include <sys/types.h> #include <signal.h> #include <sys/time.h> #include <unistd.h> #include <sys/ipc.h> #include <sys/errno.h> #include "camlib.h" #define LOOP 10 #define INTREG 5-1 #define OUTREG 6-1 #define ENABLE_INTREG (1<<(INTREG-1)) #define TIME_60_SEC 60 extern int errno; /* Functions */ error_exit(status) int status; { perror((char *)strerror(status)); exit(status); } int write_pulse, dumy, q, x; struct timeval timevalue0[LOOP], timevalue1[LOOP]; int elapsed[LOOP]; long average, aloop; main( int argc, char **argv) { int i, j; int loop,bit; int status, intrno, intcount; int pid; int pipeid[2]; int overhead = 0; setup(); if( pipe(pipeid) < 0 ) { printf("pipe open error\n"); exit(0); } for( i = 0; i < LOOP; i++ ) { get_time(&timevalue0[i]); CAMAC(write_pulse, &dumy, &q, &x); get_time(&timevalue1[i]); if( i > 1 ) overhead += e_time(&timevalue0[i], &timevalue1[i]); } overhead /= (LOOP-2); pid = fork(); if( pid == 0 ) { /* Child process */ /* Generate Interrupt Pulse */ close(pipeid[0]); sleep(1); for( i = 0; i < LOOP; i++ ) { dumy =0xF; get_time(&timevalue0[i]); CAMAC(write_pulse, &dumy, &q, &x); sleep(1); write( pipeid[1], &timevalue0[i], sizeof(struct timeval)); sleep(1); } exit(0); } else if ( pid < 0 ) { /* Error */ printf("Can't fork\n"); exit(1); } /* Parent process */ close(pipeid[1]); if( status = CAM_EnableLAM(ENABLE_INTREG) ) error_exit(status); for( i = 0; i < LOOP; i++ ) { if( status = CAMAC(NAF(INTREG,0,26), &dumy, &q, &x ) ) error_exit(status); if( status = CAM_WaitLAM(TIME_60_SEC) ) error_exit(status); get_time(&timevalue1[i]); read(pipeid[0], &timevalue0[i], sizeof(struct timeval)); if( status = CAMACW(NAF(INTREG,0,9), &dumy, &q, &x ) ) error_exit(status); } average = 0; for(i = 2; i < LOOP; i++ ) { elapsed[i] = e_time(&timevalue0[i], &timevalue1[i]); /* printf("elapsed time %d microsec\n", elapsed[i] ); */ average += elapsed[i] - overhead; } printf("CAM_WaitLAM takes %d microsec.\n",average/(LOOP-2)); /* close CAMAC */ CAM_Close(); } setup() { if (CAMOPN()) { printf("CAMAC open error = %d", errno ); exit(1); } write_pulse = NAF(OUTREG, 0, 17); CSETCR(0); CGENZ(); CREMI(); } get_time(timevalue) struct timeval *timevalue; { struct timezone tzp; gettimeofday (timevalue, &tzp); } int e_time(first, second) struct timeval *first, *second; { if (first->tv_usec > second->tv_usec) { second->tv_usec += 1000000; second->tv_sec--; } /* printf("elapsed time %d sec, %d microsec\n", second->tv_sec - first->tv_sec, second->tv_usec - first->tv_usec); */ return( (int)(second->tv_sec - first->tv_sec)*1000000 + (int)(second->tv_usec - first->tv_usec)); }上記のプログラムを走れせると、次のようになる。
% ./camint_measure CAM_WaitLAM takes 41 microsec.