1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
% ./bench.sh bench.c 100000
Benchmarking isalnum on bench.c (1321 bytes; 100000 rounds)
gcc: 2520.0 cycles median
musl-gcc: 17700.0 cycles median
./bench.sh bench.c 100000 1.13s user 0.07s system 123% cpu 0.973 total
% echo $((17700.0/2520.0))
7.0238095238095237
% ./bench.sh quickjs/quickjs.c 1000
Benchmarking isalnum on quickjs/quickjs.c (1739268 bytes; 1000 rounds)
gcc: 3274560.0 cycles median
musl-gcc: 20875305.0 cycles median
./bench.sh quickjs/quickjs.c 1000 8.34s user 0.07s system 101% cpu 8.246 total
% echo $((20875305.0/3274560.0))
6.3749954192318965
% ./bench.sh rand.bin 1000
Benchmarking isalnum on rand.bin (1048576 bytes; 1000 rounds)
gcc: 1968330.0 cycles median
musl-gcc: 11749530.0 cycles median
./bench.sh rand.bin 1000 4.82s user 0.08s system 103% cpu 4.712 total
% echo $((11749530.0/1968330.0))
5.9692886863483254
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#!/bin/sh
set -eu
input="$1"
rounds="$2"
echo "Benchmarking isalnum on $input ($(stat -c %s "$input") bytes; $rounds rounds)"
for CC in gcc musl-gcc; do
$CC -o "bench_$CC" -D"ROUNDS=$rounds" -O3 -g bench.c
median="$(./"bench_$CC" < "$input" | python -c 'print(__import__("numpy").median(list(map(int, __import__("sys").stdin))))')"
printf "%10s: %10s cycles median\n" "$CC" "$median"
done
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <unistd.h>
#include <ctype.h>
#include <stdlib.h>
#ifndef ROUNDS
#define ROUNDS 10000
#endif
static uint64_t diffs[ROUNDS];
void bench(const char *input, size_t len) {
volatile int ret;
for (int i = 0; i < ROUNDS; ++i) {
// https://www.intel.com/content/www/us/en/embedded/training/ia-32-ia-64-benchmark-code-execution-paper.html
uint32_t start_low, start_high, end_low, end_high;
asm volatile ("CPUID\n"
"RDTSC\n"
"mov %%edx, %0\n" "mov %%eax, %1\n"
: "=r" (start_high), "=r" (start_low)
:: "%rax", "%rbx", "%rcx", "%rdx");
for (size_t j = 0; j < len; ++j) {
ret = isalnum(input[j]);
}
asm volatile("RDTSCP\n"
"mov %%edx, %0\n"
"mov %%eax, %1\n"
"CPUID\n"
: "=r" (end_high), "=r" (end_low)
:: "%rax", "%rbx", "%rcx", "%rdx");
uint64_t start = ((uint64_t)start_high << 32) | start_low;
uint64_t end = ((uint64_t)end_high << 32) | end_low;
diffs[i] = end - start;
}
}
int main(int argc, char *argv[])
{
char *buf = NULL;
size_t len = 0;
while(!feof(stdin)) {
buf = realloc(buf, len + 4096);
len += fread(buf + len, 1, 4096, stdin);
}
bench(buf, len);
for (int i = 0; i < ROUNDS; ++i) {
printf("%" PRIu64 "\n", diffs[i]);
}
return 0;
}