101 #include <sys/timeb.h>
102 #define WIN32_HIGHRES_TIME
105 int getopt(
int,
char *
const *,
const char *);
107 # define SLEEP_SEC(x) Sleep((x)*1000)
109 # define SLEEP_MSEC(x) Sleep(x)
113 # define SLEEP_SEC(x) sleep(x)
115 # define SLEEP_MSEC(x) \
119 usleep((x) % 1000 * 1000); \
133 #define DEF_OUTFILE NULL
139 #define DEF_OUTBUF 10000
143 #define DEF_RUNTIME 1
147 #define DEF_LINEWID 79
155 asm volatile ("rdtsc" : "=a" (lo), "=d" (hi)); \
156 x = (uint64_t)hi << 32 | lo; \
160 #define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
240 const char *
OptString =
"b:c:f:hk:m:o:p:r:sw:";
241 const char *
usage =
"[-b bins] [-c cpu] [-f file] [-h] [-k knee] [-m min] [-o outbuf] [-p pause] [-r runtime] [-s] [-w width]";
245 const char *
usage =
"[-b bins] [-f file] [-h] [-k knee] [-m min] [-o outbuf] [-p pause] [-r runtime] [-s] [-w width]";
265 set_affinity(
const char* affinity) {
268 fprintf(stderr,
"Requesting CPU affinity on core %s\n", affinity);
269 if ((bm=bitmask_alloc(__CPU_SETSIZE)) == NULL) {
270 fprintf(stderr,
"bitmask_alloc failed\n");
273 if (bitmask_parselist(affinity, bm) != 0) {
274 fprintf(stderr,
"bitmask_parselist failed\n");
277 if (sched_setaffinity(0, bitmask_nbytes(bm),
278 (cpu_set_t *)bitmask_mask(bm)) < 0) {
279 fprintf(stderr,
"%s: failed to set affinity to %s\n", affinity);
284 #ifndef HAVE_ASPRINTF
295 #define RET_MAX_LEN 10
299 va_start(ap, format);
301 fprintf(stderr,
"asprintf: malloc() failed\n");
319 t2ts(uint64_t ticks,
double tpns) {
320 double ns = ticks/tpns;
323 if (ns < 1E3 )
asprintf(&s,
"%4.3gns", ns);
324 else if (ns < 1E6 )
asprintf(&s,
"%4.3gus", ns/1E3);
325 else if (ns < 1E9 )
asprintf(&s,
"%4.3gms", ns/1E6);
326 else if (ns < 1E12)
asprintf(&s,
"%4.3gs", ns/1E9);
351 args.cpu = strdup(
optarg);
396 fprintf(stderr,
"Couldn't allocate memory for outlier buffer\n");
400 fprintf(stderr,
"Unable to create outliers file %s\n",
413 fprintf(stderr,
"Couldn't allocate memory for histogram bins\n");
421 for (bp=histo; bp<histo+(args.
bins/2); bp++) {
428 uint64_t mult = args.
knee;
429 for ( ; bp<histo+args.
bins; bp++) {
440 histo[args.
bins-1].
ub = UINT64_MAX;
451 uint64_t start_us, stop_us, now_us;
452 uint64_t start_tsc, stop_tsc;
454 uint64_t deltas[10], *dp;
456 struct timeval now_gtod;
458 const char *cnt_graph_hdr =
"Time Ticks Count Percent Cumulative ";
459 const char *sum_graph_hdr =
"Time Ticks Sum Percent Cumulative ";
460 const char *graph_str =
"*******************************************************************";
468 fprintf(stderr,
"Min (%" PRIu64
469 ") must be < knee (%" PRIu64
")\n",
474 fprintf(stderr,
"Too few (%" PRIu64
475 ") discrete values between min (%" PRIu64
476 ") and knee (%" PRIu64
") for linear histogram bins (%d)\n",
480 if (args.
linewid < strlen(cnt_graph_hdr)+1) {
481 fprintf(stderr,
"Minimum line width is %zd\n", strlen(cnt_graph_hdr)+1);
484 if (args.
linewid > strlen(cnt_graph_hdr)+strlen(graph_str)) {
485 fprintf(stderr,
"Maximum line width is %zd\n",
486 strlen(cnt_graph_hdr)+strlen(graph_str));
491 fprintf(stderr,
"%s\n%s %s\n",
version, argv[0],
usage);
496 if (args.cpu != NULL)
497 set_affinity(args.cpu);
511 uint64_t timing_ticks = 0;
512 uint64_t delta_count = 0;
513 uint64_t delta_sum = 0;
529 gettimeofday(&now_gtod, NULL);
530 start_us = now_gtod.tv_sec * 1000000 + now_gtod.tv_usec;
531 stop_us = start_us + 1000000*args.
runtime;
539 register uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10;
568 timing_ticks += t10-t0;
574 for (dp=deltas; dp<deltas+
ARRAY_SIZE(deltas); dp++) {
582 for (bp=histo; *dp>bp->
ub; bp++) {}
591 avg += ((double)*dp-avg)/delta_count;
592 svn += ((double)*dp-avg)*((double)*dp-last_avg);
595 if (outbuf!=NULL && *dp>args.
knee) {
601 if (obp-outbuf >= args.
outbuf) {
608 gettimeofday(&now_gtod, NULL);
609 now_us = now_gtod.tv_sec * 1000000 + now_gtod.tv_usec;
611 }
while (now_us < stop_us);
614 double tpns = (stop_tsc-start_tsc)/1000.0/(stop_us-start_us);
617 printf(
"%sGraph ln(Count-e)\n", (args.
sum) ? sum_graph_hdr : cnt_graph_hdr);
620 uint64_t max_count=0, max_sum=0;
621 for (bp=histo; bp<histo+args.
bins; bp++) {
631 graph_scale = (double)(args.
linewid-strlen(cnt_graph_hdr))/log(((
double)max_sum )-M_E);
633 graph_scale = (double)(args.
linewid-strlen(cnt_graph_hdr))/log(((
double)max_count)-M_E);
637 uint64_t c_count = 0;
638 uint64_t mid_count = 0;
640 uint64_t mid_sum = 0;
641 for (bp=histo; bp<histo+args.
bins; bp++) {
646 if (bp == histo+(args.
bins/2)) {
655 char *ub_str, ubbuf[99];
656 if (bp->
ub == UINT64_MAX)
659 sprintf(ubbuf,
"%-8" PRIu64, bp->
ub);
669 graphwid = graph_scale*log(((
double)bp->
delta_sum )-M_E);
671 graphwid = graph_scale*log(((
double)bp->
delta_count)-M_E);
685 printf(
"%s %s %-12d %7.4f%% %8.4f%% %*.*s\n",
687 100.0*bp->
delta_sum/delta_sum, 100.0*c_sum/delta_sum,
688 graphwid, graphwid, graph_str);
690 printf(
"%s %s %-12d %7.4f%% %8.4f%% %*.*s\n",
692 100.0*bp->
delta_count/delta_count, 100.0*c_count/delta_count,
693 graphwid, graphwid, graph_str);
697 if (bp-histo+1 == args.
bins/2)
705 double std_dev = sqrt(svn/delta_count);
708 printf(
"\nTiming was measured for %s, %5.2f%% of runtime\n",
709 t2ts(timing_ticks, tpns), 100.0*timing_ticks/(stop_tsc-start_tsc));
710 printf(
"CPU speed measured : %7.2f MHz over %" PRIu64
" iterations\n",
711 (
double)(stop_tsc-start_tsc)/(stop_us-start_us), delta_count);
712 printf(
"Min / Average / Std Dev / Max : %" PRIu64
" / %" PRIu64
" / %3.0f / %" PRIu64
" ticks\n",
713 min, delta_sum/delta_count, std_dev, max);
714 printf(
"Min / Average / Std Dev / Max : %s / %s / %s / %s\n",
715 t2ts(min, tpns),
t2ts(delta_sum/delta_count, tpns),
716 t2ts((uint64_t)std_dev, tpns),
t2ts(max, tpns));
719 if (min<args.
min || args.
min<0.80*min) {
720 printf(
"Recommend min setting of %3.0f ticks\n", 0.80*min);
722 if (outbuf==NULL && 100.0*mid_count/delta_count<90.0) {
723 printf(
"Recommend increasing knee setting from %" PRIu64
" ticks\n",
726 if (outbuf==NULL && 100.0*mid_count/delta_count>99.0) {
727 printf(
"Recommend decreasing knee setting from %" PRIu64
" ticks\n",
733 if (outbuf != NULL) {
735 printf(
"Recommend increasing knee setting from %" PRIu64
" ticks\n",
737 }
else if (obp-outbuf < args.
outbuf/4) {
738 printf(
"Recommend decreasing knee setting from %" PRIu64
" ticks\n",
741 for (obp=outbuf; obp<outbuf+args.
outbuf; obp++) {
745 (obp->
when-start_tsc)/tpns/1000000.0,
746 obp->
delta/tpns/1000.0);