性能測定
測定時間の種類まずは精度を測定するプログラムを示します。 CPU時間を測定するシステムコールの精度について、次に エラプス時間を測定するシステムコールの精度について、それぞれ 計るものです。前者の時間単位はティックです。10ミリ秒で1ティックです。 後者の時間単位はマイクロ秒です。
#include <sys/times.h>
#include <unistd.h>
#include <stdio.h>
struct timeval {
long tv_sec; /* seconds */
long tv_usec; /* microseconds */
};
struct timezone {
int tz_minuteswest; /* minutes W of Greenwich */
int tz_dsttime; /* type of dst correction */
};
main( int argc, char **argv) {
clock_t tmscur;
struct tms tmsstart, tmscurrent;
int timevalcur;
struct timeval timevalstart;
struct timeval timevalcurrent;
struct timezone timezonestart, timezonecurrent;
tmscur = times(&tmsstart);
do {
tmscur = times(&tmscurrent);
} while( tmsstart.tms_utime == tmscurrent.tms_utime &&
tmsstart.tms_stime == tmscurrent.tms_stime );
printf("times : utime difference = %d\n",
tmscurrent.tms_utime - tmsstart.tms_utime);
printf("times : stime difference = %d\n",
tmscurrent.tms_stime - tmsstart.tms_stime);
timevalcur = gettimeofday(&timevalstart, &timezonestart);
do {
timevalcur = gettimeofday(&timevalcurrent, &timezonecurrent);
} while ( timevalstart.tv_sec == timevalcurrent.tv_sec &&
timevalstart.tv_usec == timevalcurrent.tv_usec );
printf("gettimeofday : sec difference = %d\n",
timevalcurrent.tv_sec - timevalstart.tv_sec );
printf("gettimeofday : usec difference = %d\n",
timevalcurrent.tv_usec - timevalstart.tv_usec );
}
まずはコンパイルして、
gcc -o time-precision time-precision.cこれを実行して見るとつぎのようになります。
% ./time-precision times : utime difference = 1 times : stime difference = 0 gettimeofday : sec difference = 0 gettimeofday : usec difference = 3どちらのシステムコールもその単位とほぼ同じ精度がでています。
次のプログラムは測定対象を複数回、測定を繰り返すことで 平均値、最小値、最大値、RMSを時間を求めるプログラムです。
#include <unistd.h>
#include <math.h>
double mean(double* data, int count, double *min, double *max);
double rms(double* data, int count);
double mean(double* data, int count, double *min, double *max) {
double average = 0.0;
int i;
*min = data[0];
*max = data[0];
for( i = 0; i < count; i++) {
average += data[i];
if(*min > data[i]) *min = data[i];
if(*max < data[i]) *max = data[i];
}
average /= (double)count;
return average;
}
double rms(double* data, int count) {
double average, variance, min, max;
double sum = 0.0;
int i;
average = mean(data,count, &min, &max);
printf("average, min. and max. = %f, %f and %f\n",
average, min, max);
for( i = 0; i < count; i++) {
variance = data[i];
variance -= average;
sum += variance*variance;
}
sum /= (double)count;
sum = sqrt(sum);
return (sum);
}
この部分に測定したい測定対象を入れてください。
void measure(void);
void measure(void) {
int i;
/*
for(i = 0; i< 10; i++);
for(i = 0; i< 100; i++);
*/
for(i = 0; i< 10000; i++);
/*
usleep(50000);
*/
}
void result(double* data_time, int loop);
void result(double* data_time, int loop) {
printf("RMS of measured data is %f.\n",
rms(data_time, loop));
}
main() {
unsigned long long start;
int i, j, loop;
double root_mean_square, data_time[10000];
loop = 100;
for (j = 0; j < loop; j++) {
start = microsecond();
measure();
data_time[j] =(double)( microsecond() - start);
}
result(data_time, loop);
}
まずはプログラムのコンパイルです。
gcc -o rmstest rmstest.c microsec.c -lmこのプログラムを実行するとつぎのようになります。
average, min. and max. = 455.030000, 435.000000 and 1048.000000 RMS of measured data is 63.478257.なお、このプログラムのコンパイルにひつようなmicrosec.cは 後にでて来ます。
時間測定のデバイスの種類
timesシステムコールを使ったCPU時間の測定
#include <sys/times.h>
#include <unistd.h>
#include <stdio.h>
void print_times( char *title, clock_t real, int loop,
struct tms *start, struct tms *end )
{
int clktck;
float etime, cputime;
clktck = sysconf(_SC_CLK_TCK);
printf("%s\n", title);
etime = (float)real / (double) clktck * 1000000. / (float)loop;
cputime = (float)(end->tms_utime - start->tms_utime) +
(float)(end->tms_stime - start->tms_stime);
cputime = cputime / (double) clktck * 1000000. / (float)loop;
printf(" elapsed time : %f microsec.\n", etime);
printf(" cpu time : %f microsec.\n", cputime);
}
main( int argc, char **argv) {
int i, loop;
clock_t start, end;
struct tms tmsstart, tmsend;
if( argc < 2 )
loop = 1000000;
else
loop = atoi(argv[1]);
start = times(&tmsstart);
for(i = 0; i < loop; i++) {
}
end = times(&tmsend);
print_times("Measurement", end - start, loop, &tmsstart, &tmsend);
}
gettimeofdayシステムコールを使ったエラプス時間の測定
gettimeofday.c:
#include <time.h>
#include <sys/time.h>
#include <unistd.h>
#include <stdio.h>
int e_time(struct timeval *first, struct timeval *second) {
if (first->tv_usec > second->tv_usec) {
second->tv_usec += 1000000;
second->tv_sec--;
}
return( (int)(second->tv_sec - first->tv_sec)*1000000
+ (int)(second->tv_usec - first->tv_usec));
}
int main( int argc, char **argv) {
int i, default_etime;
struct timeval start, stop;
struct timezone tzpstart, tzpstop;
gettimeofday (&start, &tzpstart);
gettimeofday (&stop, &tzpstop);
default_etime = e_time( &start, &stop);
gettimeofday (&start, &tzpstart);
このforループを測定対象に置き換えてください。
for( i = 0; i < 1000; i++) {
}
gettimeofday (&stop, &tzpstop);
printf("elapsed time of 'for( i = 0; i < 1000; i++) {}' = %d in microsec.\n",
e_time( &start, &stop) - default_etime );
printf("overhead of gettimeofday = %d in microsec.\n", default_etime );
}
これをコンパイルし、実行すると次のようになります。
% gcc -O -o gettimeofday gettimeofday.c
% ./gettimeofday
elapsed time of 'for( i = 0; i < 1000; i++) {}' = 6 in microsec.
overhead of gettimeofday = 3 in microsec.
% gcc -o gettimeofday gettimeofday.c
% ./gettimeofday
elapsed time of 'for( i = 0; i < 1000; i++) {}' = 13 in microsec.
overhead of gettimeofday = 4 in microsec.
CPUの周波数クロックを使ったエラプス時間の測定
rdtsctest.c :
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
/*
* define the functions...
*/
unsigned long long int rdtsc(void)
{
unsigned long long int x;
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}
main() {
unsigned long long int a, b;
unsigned long long int a1,b1;
unsigned long long int a2,b2;
unsigned long long int a3,b3;
unsigned long long int a4,b4;
int i;
a = rdtsc();
a4 = rdtsc();
for(i = 0; i < 1000; i++);
b4 = rdtsc();
a3 = rdtsc();
for(i = 0; i < 100; i++);
b3 = rdtsc();
a2 = rdtsc();
for(i = 0; i < 10; i++);
b2 = rdtsc();
a1 = rdtsc();
b1 = rdtsc();
printf("current clock is %x%x count in hex.\n",(int)(a>>32),(int)a );
printf("No instruction took %d count in dec.\n",(int)(b1-a1));
printf("for(i = 0; i < 10; i++) statement took %d count\n",(int)(b2-a2));
printf("for(i = 0; i < 100; i++) statement took %d count\n", (int)(b3-a3));
printf("for(i = 0; i < 1000; i++) statement took %d count\n",(int)(b4-a4));
a = rdtsc();
usleep(100000);
b = rdtsc();
printf("usleep(100000) took %d count\n", (int)(b-a));
a = rdtsc();
usleep(1000);
b = rdtsc();
printf("usleep(1000) took %d count\n", (int)(b-a));
}
コンパイルし、実行すると次のようになります。
% gcc -o rdtsctest rdtsctest.c % ./rdtsctest current clock is 27f6e4759204 count in hex. No instruction took 49 count in dec. for(i = 0; i < 10; i++) statement took 126 count for(i = 0; i < 100; i++) statement took 486 count for(i = 0; i < 1000; i++) statement took 4086 count usleep(100000) took 26845482 count usleep(1000) took 5308115 count % gcc -O -o rdtsctest rdtsctest.c % ./rdtsctest current clock is 27fc346619cb count in hex. No instruction took 47 count in dec. for(i = 0; i < 10; i++) statement took 88 count for(i = 0; i < 100; i++) statement took 270 count for(i = 0; i < 1000; i++) statement took 2064 count usleep(100000) took 28856180 count usleep(1000) took 5310232 countこの計算機は266MHzのPentiumIIなので、rdtscの呼び出しによる オーバーヘッドは約200ナノ秒(47/266=0.177)ということになります。 ちなみに、gettimeofdayで計ったforループ(1000回繰り返し)は 6マイクロ秒となっていましたが、rdtscによる測定では 約7.7マイクロ秒となっています。計算機の実行環境は時間によって ある程度変わりますから、つじつまがあった結果と言えましょう。
外部のクロックデバイスを使ったエラプス時間の測定
スリープ関数の精度
#include <time.h>
#include <sys/time.h>
#include <unistd.h>
#include <stdio.h>
void get_time(struct timeval *timevalue) {
struct timezone tzp;
gettimeofday (timevalue, &tzp);
}
int e_time(struct timeval *first, struct timeval *second) {
if (first->tv_usec > second->tv_usec) {
second->tv_usec += 1000000;
second->tv_sec--;
}
#ifdef DEBUG
printf("elapsed time %d sec, %d microsec\n",
second->tv_sec - first->tv_sec, second->tv_usec - first->tv_usec);
#endif
return( (int)(second->tv_sec - first->tv_sec)*1000000
+ (int)(second->tv_usec - first->tv_usec));
}
int main( int argc, char **argv) {
struct timeval start, stop;
struct timespec req, rem;
int sleeptime;
if(argc < 2) sleeptime = 1000;
else sleeptime = atol(argv[1]);
get_time(&start);
usleep((unsigned long)sleeptime);
get_time(&stop);
printf("elapsed time for usleep(%d) = %d in microsec.\n",
sleeptime, e_time( &start, &stop));
req.tv_sec = sleeptime/1000000;
req.tv_nsec = (sleeptime%1000000)*1000;
get_time(&start);
nanosleep( &req, &rem);
get_time(&stop);
printf("elapsed time for nanosleep(%d) = %d in microsec.\n",
sleeptime*1000, e_time( &start, &stop));
}
コンパイルは
gcc -o precision precision.c実行するとつぎのようにでています。
% ./precision 1 elapsed time for usleep(1) = 14407 in microsec. elapsed time for nanosleep(1000) = 19725 in microsec. % ./precision 10000 elapsed time for usleep(10000) = 18102 in microsec. elapsed time for nanosleep(10000000) = 19589 in microsec. % ./precision 100000 elapsed time for usleep(100000) = 100986 in microsec. elapsed time for nanosleep(100000000) = 109612 in microsec.この結果、usleepはマイクロ秒の単位で、nanasleepはナノ秒の単位で、 入力できるが、その精度は10ミリ秒程度であることがわかります。
CAMACシングル転送スピードの測定
#include <stdio.h>
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <unistd.h>
#include "camlib.h"
#define LOOP 100
#define SWREG 3 -1
extern int errno;
/* Functions */
error_exit(status)
int status; {
perror((char *)strerror(status));
exit(status);
}
main( int argc, char **argv) {
int i;
int read_swreg, dumy, q, x;
struct timeval timevalue0[LOOP], timevalue1[LOOP];
int elapsed[LOOP];
int average = 0;
int overhead = 0;
read_swreg = NAF(SWREG, 1, 0);
if (CAMOPN()) {
printf("CAMAC open error = %d", errno );
exit(1);
}
CSETCR(0);
CGENZ();
CGENC();
CREMI();
for( i = 0; i < LOOP; i++ ) {
get_time(&timevalue0[i]);
get_time(&timevalue1[i]);
overhead += e_time(&timevalue0[i], &timevalue1[i]);
}
overhead /= LOOP;
for( i = 0; i < LOOP; i++) {
get_time(&timevalue0[i]);
CAMAC(read_swreg, &dumy, &q, &x);
get_time(&timevalue1[i]);
}
for(i = 0; i < LOOP; i++ ) {
elapsed[i] = e_time(&timevalue0[i], &timevalue1[i]);
/* printf("elapsed time %d microsec\n", elapsed[i] );
*/
average += elapsed[i] - overhead;
}
printf("CAMAC READ single action takes %d microsec.\n",average/LOOP);
/* close CAMAC */
CAM_Close();
}
get_time(timevalue)
struct timeval *timevalue; {
struct timezone tzp;
gettimeofday (timevalue, &tzp);
}
int e_time(first, second)
struct timeval *first, *second; {
if (first->tv_usec > second->tv_usec) {
second->tv_usec += 1000000;
second->tv_sec--;
}
return( (int)(second->tv_sec - first->tv_sec)*1000000
+ (int)(second->tv_usec - first->tv_usec));
}
上記のプログラムを走らせると、つぎのようになる。
% ./camsingle_measure CAMAC READ single action takes 9 microsec.
CAMAC割り込みスピードの測定
#include <stdio.h>
#include <sys/types.h>
#include <signal.h>
#include <sys/time.h>
#include <unistd.h>
#include <sys/ipc.h>
#include <sys/errno.h>
#include "camlib.h"
#define LOOP 10
#define INTREG 5-1
#define OUTREG 6-1
#define ENABLE_INTREG (1<<(INTREG-1))
#define TIME_60_SEC 60
extern int errno;
/* Functions */
error_exit(status)
int status; {
perror((char *)strerror(status));
exit(status);
}
int write_pulse, dumy, q, x;
struct timeval timevalue0[LOOP], timevalue1[LOOP];
int elapsed[LOOP];
long average, aloop;
main( int argc, char **argv) {
int i, j;
int loop,bit;
int status, intrno, intcount;
int pid;
int pipeid[2];
int overhead = 0;
setup();
if( pipe(pipeid) < 0 ) {
printf("pipe open error\n");
exit(0);
}
for( i = 0; i < LOOP; i++ ) {
get_time(&timevalue0[i]);
CAMAC(write_pulse, &dumy, &q, &x);
get_time(&timevalue1[i]);
if( i > 1 )
overhead += e_time(&timevalue0[i], &timevalue1[i]);
}
overhead /= (LOOP-2);
pid = fork();
if( pid == 0 ) { /* Child process */
/* Generate Interrupt Pulse */
close(pipeid[0]);
sleep(1);
for( i = 0; i < LOOP; i++ ) {
dumy =0xF;
get_time(&timevalue0[i]);
CAMAC(write_pulse, &dumy, &q, &x);
sleep(1);
write( pipeid[1], &timevalue0[i], sizeof(struct timeval));
sleep(1);
}
exit(0);
} else if ( pid < 0 ) { /* Error */
printf("Can't fork\n");
exit(1);
}
/* Parent process */
close(pipeid[1]);
if( status = CAM_EnableLAM(ENABLE_INTREG) )
error_exit(status);
for( i = 0; i < LOOP; i++ ) {
if( status = CAMAC(NAF(INTREG,0,26), &dumy, &q, &x ) )
error_exit(status);
if( status = CAM_WaitLAM(TIME_60_SEC) )
error_exit(status);
get_time(&timevalue1[i]);
read(pipeid[0], &timevalue0[i], sizeof(struct timeval));
if( status = CAMACW(NAF(INTREG,0,9), &dumy, &q, &x ) )
error_exit(status);
}
average = 0;
for(i = 2; i < LOOP; i++ ) {
elapsed[i] = e_time(&timevalue0[i], &timevalue1[i]);
/* printf("elapsed time %d microsec\n", elapsed[i] );
*/
average += elapsed[i] - overhead;
}
printf("CAM_WaitLAM takes %d microsec.\n",average/(LOOP-2));
/* close CAMAC */
CAM_Close();
}
setup() {
if (CAMOPN()) {
printf("CAMAC open error = %d", errno );
exit(1);
}
write_pulse = NAF(OUTREG, 0, 17);
CSETCR(0);
CGENZ();
CREMI();
}
get_time(timevalue)
struct timeval *timevalue; {
struct timezone tzp;
gettimeofday (timevalue, &tzp);
}
int e_time(first, second)
struct timeval *first, *second; {
if (first->tv_usec > second->tv_usec) {
second->tv_usec += 1000000;
second->tv_sec--;
}
/*
printf("elapsed time %d sec, %d microsec\n",
second->tv_sec - first->tv_sec, second->tv_usec - first->tv_usec);
*/
return( (int)(second->tv_sec - first->tv_sec)*1000000
+ (int)(second->tv_usec - first->tv_usec));
}
上記のプログラムを走れせると、次のようになる。
% ./camint_measure CAM_WaitLAM takes 41 microsec.