multicore-bench-0.1.7/0000755000175000017500000000000014707160665013305 5ustar kylekylemulticore-bench-0.1.7/update-gh-pages-for-tag0000755000175000017500000000263014704505212017530 0ustar kylekyle#!/bin/bash set -xeuo pipefail TMP=tmp NAME=multicore-bench MAIN=doc GIT="git@github.com:ocaml-multicore/$NAME.git" DOC="_build/default/_doc/_html" GH_PAGES=gh-pages TAG="$1" if ! [ -e $NAME.opam ] || [ $# -ne 1 ] || \ { [ "$TAG" != main ] && ! [ "$(git tag -l "$TAG")" ]; }; then CMD="${0##*/}" cat << EOF Usage: $CMD tag-name-or-main This script - clones the repository into a temporary directory ($TMP/$NAME), - builds the documentation for the specified tag or main, - updates $GH_PAGES branch with the documentation in directory for the tag, - prompts whether to also update the main documentation in $MAIN directory, and - prompts whether to push changes to $GH_PAGES. EOF exit 1 fi opam install sherlodoc mkdir $TMP cd $TMP git clone $GIT cd $NAME git checkout "$TAG" dune build @doc --root=. git checkout $GH_PAGES if [ "$TAG" != main ]; then echo "Updating the $TAG doc." if [ -e "$TAG" ]; then git rm -rf "$TAG" fi cp -r $DOC "$TAG" git add "$TAG" fi read -p "Update the main doc? (y/N) " -n 1 -r echo if [[ $REPLY =~ ^[Yy]$ ]]; then if [ -e $MAIN ]; then git rm -rf $MAIN fi cp -r $DOC $MAIN git add $MAIN else echo "Skipped main doc update." fi git commit -m "Update $NAME doc for $TAG" read -p "Push changes to $GH_PAGES? (y/N) " -n 1 -r echo if ! [[ $REPLY =~ ^[Yy]$ ]]; then echo "Leaving $TMP for you to examine." exit 1 fi git push cd .. cd .. rm -rf $TMP multicore-bench-0.1.7/multicore-bench.opam0000644000175000017500000000176314704505212017236 0ustar kylekyleversion: "0.1.7" # This file is generated by dune, edit dune-project instead opam-version: "2.0" synopsis: "Framework for writing multicore benchmark executables to run on current-bench" maintainer: ["Vesa Karvonen "] authors: ["Vesa Karvonen "] license: "ISC" homepage: "https://github.com/ocaml-multicore/multicore-bench" bug-reports: "https://github.com/ocaml-multicore/multicore-bench/issues" depends: [ "dune" {>= "3.14"} "domain-local-await" {>= "1.0.1"} "multicore-magic" {>= "2.1.0"} "mtime" {>= "2.0.0"} "yojson" {>= "2.1.0"} "domain_shims" {>= "0.1.0"} "backoff" {>= "0.1.0"} "mdx" {>= "2.4.0" & with-test} "sherlodoc" {>= "0.2" & with-doc} "odoc" {>= "2.4.1" & with-doc} "ocaml" {>= "4.12.0"} ] build: [ ["dune" "subst"] {dev} [ "dune" "build" "-p" name "-j" jobs "@install" "@runtest" {with-test} "@doc" {with-doc} ] ] dev-repo: "git+https://github.com/ocaml-multicore/multicore-bench.git"multicore-bench-0.1.7/bench.Dockerfile0000644000175000017500000000057014704505212016343 0ustar kylekyleFROM ocaml/opam:debian-ocaml-5.2 RUN sudo ln -sf /usr/bin/opam-2.1 /usr/bin/opam WORKDIR bench-dir RUN sudo chown opam . COPY *.opam ./ RUN opam remote add origin https://github.com/ocaml/opam-repository.git && \ opam update RUN opam pin -yn --with-version=dev . RUN opam install -y --deps-only --with-test . COPY . ./ RUN opam exec -- dune build --release bench/main.exe multicore-bench-0.1.7/README.md0000644000175000017500000002466414704505212014564 0ustar kylekyle[API reference](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/index.html) · [Benchmarks](https://bench.ci.dev/ocaml-multicore/multicore-bench/branch/main?worker=pascal&image=bench.Dockerfile) # Multicore-bench Multicore bench is a framework for writing multicore benchmark executables to run locally on your computer and on [current-bench](https://github.com/ocurrent/current-bench). Benchmarking multicore algorithms tends to require a certain amount of setup, such as spawning domains, synchronizing them before work, timing the work, collecting the times, and joining domains, that this framework tries to take care of for you as conveniently as possible. Furthermore, benchmarking multicore algorithms in OCaml also involves a number of pitfalls related to how the OCaml runtime works. For example, when only a single domain is running, several operations provided by the OCaml runtime use specialized implementations that take advantage of the fact that there is only a single domain running. In most cases, when trying to benchmark multicore algorithms, you don't actually want to measure those specialized runtime implementations. The design of multicore bench is considered **_experimental_**. We are planning to improve the design along with [current-bench](https://github.com/ocurrent/current-bench) in the future to allow more useful benchmarking experience. ## Crash course to [current-bench](https://github.com/ocurrent/current-bench) Note that, at the time of writing this, [current-bench](https://github.com/ocurrent/current-bench) is work in progress and does not accept enrollment for community projects. However, assuming you have access to it, to run multicore benchmarks with [current-bench](https://github.com/ocurrent/current-bench) a number of things need to be setup: - You will need a [Makefile](Makefile) with a `bench` target at the root of the project. The [current-bench](https://github.com/ocurrent/current-bench) service will run your benchmarks through that. - You likely also want to have a [bench.Dockerfile](bench.Dockerfile) and [.dockerignore](.dockerignore) at the root of the project. Make sure that the Dockerfile is layered such that it will pickup opam updates when desired while also avoiding unnecessary work during rebuilds. - You will also need the benchmarks and that is where this framework may help. You can find examples of multicore benchmarks from the [Saturn](https://github.com/ocaml-multicore/saturn/tree/main/bench), [Kcas](https://github.com/ocaml-multicore/kcas/tree/main/bench), and [Picos](https://github.com/ocaml-multicore/picos/tree/main/bench) projects and from the [bench](bench) directory of this repository. For multicore benchmarks you will also need to have [current-bench](https://github.com/ocurrent/current-bench) configured to use a multicore machine, which currently needs to be done by the [current-bench](https://github.com/ocurrent/current-bench) maintainers. ## Example: Benchmarking `Atomic.incr` under contention Let's look at a simple example with detailed comments of how one might benchmark `Atomic.incr` under contention. Note that this example is written here as a [MDX](https://github.com/realworldocaml/mdx) document or test. Normally you would write a benchmark as a command line executable and would likely compile it in release mode with a native compiler. We first open the [`Multicore_bench`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/index.html) module: ```ocaml # open Multicore_bench ``` This brings into scope multiple modules including [`Suite`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Suite/index.html), [`Util`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Util/index.html), [`Times`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Times/index.html), and [`Cmd`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Cmd/index.html) that we used below. Typically one would divide a benchmark executable into benchmark suites for different algorithms and data structures. To illustrate that pattern, let's create a module `Bench_atomic` for our benchmarks suite on atomics: ```ocaml # module Bench_atomic : sig (* The entrypoint to a suite is basically a function. There is a type alias for the signature. *) val run_suite : Suite.t end = struct (* [run_one] runs a single benchmark with the given budget and number of domains. *) let run_one ~budgetf ~n_domains () = (* We scale the number of operations using [Util.iter_factor], which depends on various factors such as whether we are running on a 32- or 64-bit machine, using a native or bytecode compiler, and whether we are running on multicore OCaml. The idea is to make it possible to use the benchmark executable as a test that can be run even on slow CI machines. *) let n = 10 * Util.iter_factor in (* In this example, [atomic] is the data structure we are benchmarking. *) let atomic = Atomic.make 0 |> Multicore_magic.copy_as_padded (* We explicitly pad the [atomic] to avoid false sharing. With false sharing measurements are likely to have a lot of noise that makes it difficult to get useful results. *) in (* We store the number of operations to perform in a scalable countdown counter. The idea is that we want all the workers or domains to work at the same time as much as possible, because we want to measure performance under contention. So, instead of e.g. simply having each domain run a fixed count loop, which could lead to some domains finishing well before others, we let the number of operations performed by each domain vary. *) let n_ops_to_do = Countdown.create ~n_domains () in (* [init] is called on each domain before [work]. The return value of [init] is passed to [work]. *) let init _domain_index = (* It doesn't matter that we set the countdown counter multiple times. We could also use a [before] callback to do setup before [work]. *) Countdown.non_atomic_set n_ops_to_do n in (* [work] is called on each domain and the time it takes is recorded. The second argument comes from [init]. *) let work domain_index () = (* Because we are benchmarking operations that take a very small amount of time, we run our own loop to perform the operations. This has pros and cons. One con is that the loop overhead will be part of the measurement, which is something to keep in mind when interpreting the results. One pro is that this gives more flexibility in various ways. *) let rec work () = (* We try to allocate some number of operations to perform. *) let n = Countdown.alloc n_ops_to_do ~domain_index ~batch:100 in (* If we got zero, then we should stop. *) if n <> 0 then begin (* Otherwise we perform the operations and try again. *) for _=1 to n do Atomic.incr atomic done; work () end in work () in (* [config] is a name for the configuration of the benchmark. In this case we distinguish by the number of workers or domains. *) let config = Printf.sprintf "%d worker%s" n_domains (if n_domains = 1 then "" else "s") in (* [Times.record] does the heavy lifting to spawn domains and measure the time [work] takes on them. *) let times = Times.record ~budgetf ~n_domains ~init ~work () in (* [Times.to_thruput_metrics] takes the measurements and produces both a metric for the time of a single operation and for the total thruput over all the domains. *) Times.to_thruput_metrics ~n ~singular:"incr" ~config times (* [run_suite] runs the benchmarks in this suite with the given budget. *) let run_suite ~budgetf = (* In this case we run the benchmark with various number of domains. We use [concat_map] to collect the results as a flat list of outputs. *) [ 1; 2; 4; 8 ] |> List.concat_map @@ fun n_domains -> run_one ~budgetf ~n_domains () end module Bench_atomic : sig val run_suite : Suite.t end ``` We then collect all the suites into an association list. The association list has a name and entry point for each suite: ```ocaml # let benchmarks = [ ("Atomic", Bench_atomic.run_suite) ] val benchmarks : (string * Suite.t) list = [("Atomic", )] ``` Usually the list of benchmarks is in the main module of the benchmark executable along with an invocation of [`Cmd.run`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Cmd/index.html#val-run): ```ocaml non-deterministic # Cmd.run ~benchmarks ~argv:[||] () { "results": [ { "name": "Atomic", "metrics": [ { "name": "time per incr/1 worker", "value": 11.791, "units": "ns", "trend": "lower-is-better", "description": "Time to process one incr", "#best": 9.250000000000002, "#mean": 12.149960000000002, "#median": 11.791, "#sd": 1.851061543655424, "#runs": 25 }, { "name": "incrs over time/1 worker", "value": 84.81044864727335, "units": "M/s", "trend": "higher-is-better", "description": "Total number of incrs processed", "#best": 108.1081081081081, "#mean": 84.25129565093134, "#median": 84.81044864727335, "#sd": 12.911113376793846, "#runs": 25 }, // ... ] } ] } - : unit = () ``` By default [`Cmd.run`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Cmd/index.html#val-run) interprets command line arguments from [`Sys.argv`](https://v2.ocaml.org/api/Sys.html#VALargv). Unlike what one would typically do, we explicitly specify `~argv:[||]`, because this code is being run through the [MDX](https://github.com/realworldocaml/mdx) tool. Note that the output above is just a sample. The timings are non-deterministic and will slightly vary from one run of the benchmark to another even on a single computer. multicore-bench-0.1.7/dune-project0000644000175000017500000000164414704505212015620 0ustar kylekyle(lang dune 3.14) (name multicore-bench) (version 0.1.7) (generate_opam_files true) (implicit_transitive_deps false) (authors "Vesa Karvonen ") (maintainers "Vesa Karvonen ") (source (github ocaml-multicore/multicore-bench)) (homepage "https://github.com/ocaml-multicore/multicore-bench") (license ISC) (using mdx 0.4) (package (name multicore-bench) (synopsis "Framework for writing multicore benchmark executables to run on current-bench") (depends (domain-local-await (>= 1.0.1)) (multicore-magic (>= 2.1.0)) (mtime (>= 2.0.0)) (yojson (>= 2.1.0)) (domain_shims (>= 0.1.0)) (backoff (>= 0.1.0)) ;; Test dependencies (mdx (and (>= 2.4.0) :with-test)) ;; Documentation dependencies (sherlodoc (and (>= 0.2) :with-doc)) (odoc (and (>= 2.4.1) :with-doc)) ;; OCaml version (ocaml (>= 4.12.0)))) multicore-bench-0.1.7/.ocamlformat0000644000175000017500000000007214704505212015575 0ustar kylekyleprofile = default version = 0.26.2 exp-grouping=preserve multicore-bench-0.1.7/bench/0000755000175000017500000000000014707160665014364 5ustar kylekylemulticore-bench-0.1.7/bench/bench_hashtbl.ml0000644000175000017500000000705114704505212017471 0ustar kylekyleopen Multicore_bench module Int = struct include Int let hash = Fun.id end module Htbl = Hashtbl.Make (Int) let mutex = Mutex.create () let run_one ~budgetf ~n_domains ~use_mutex ?(n_keys = 1000) ~percent_mem ?(percent_add = (100 - percent_mem + 1) / 2) ?(prepopulate = true) () = let limit_mem = percent_mem in let limit_add = percent_mem + percent_add in assert (0 <= limit_mem && limit_mem <= 100); assert (limit_mem <= limit_add && limit_add <= 100); let t = Htbl.create n_keys in if prepopulate then for _ = 1 to n_keys do let value = Random.bits () in let key = value mod n_keys in Htbl.replace t key value done; let n_ops = (if use_mutex then 100 else 400) * Util.iter_factor in let n_ops = (100 + percent_mem) * n_ops / 100 in let n_ops_todo = Countdown.create ~n_domains () in let init _ = Countdown.non_atomic_set n_ops_todo n_ops; Random.State.make_self_init () in let work_no_mutex domain_index state = let rec work () = let n = Countdown.alloc n_ops_todo ~domain_index ~batch:100 in if n <> 0 then let rec loop n = if 0 < n then let value = Random.State.bits state in let op = (value asr 20) mod 100 in let key = value mod n_keys in if op < percent_mem then begin begin match Htbl.find t key with _ -> () | exception Not_found -> () end; loop (n - 1) end else if op < limit_add then begin Htbl.replace t key value; loop (n - 1) end else begin Htbl.remove t key; loop (n - 1) end else work () in loop n in work () in let work_mutex domain_index state = let rec work () = let n = Countdown.alloc n_ops_todo ~domain_index ~batch:100 in if n <> 0 then let rec loop n = if 0 < n then let value = Random.State.bits state in let op = (value asr 20) mod 100 in let key = value mod n_keys in if op < percent_mem then begin Mutex.lock mutex; begin match Htbl.find t key with _ -> () | exception Not_found -> () end; Mutex.unlock mutex; loop (n - 1) end else if op < limit_add then begin Mutex.lock mutex; Htbl.replace t key value; Mutex.unlock mutex; loop (n - 1) end else begin Mutex.lock mutex; Htbl.remove t key; Mutex.unlock mutex; loop (n - 1) end else work () in loop n in work () in let config = let percent_mem = Printf.sprintf "%d%% reads" percent_mem in if use_mutex then Printf.sprintf "%d worker%s, %s" n_domains (if n_domains = 1 then "" else "s") percent_mem else Printf.sprintf "one domain, %s" percent_mem in let work = if use_mutex then work_mutex else work_no_mutex in Times.record ~budgetf ~n_domains ~init ~work () |> Times.to_thruput_metrics ~n:n_ops ~singular:"operation" ~config let run_suite ~budgetf = ([ 10; 50; 90 ] |> List.concat_map @@ fun percent_mem -> run_one ~budgetf ~n_domains:1 ~use_mutex:false ~percent_mem ()) @ (Util.cross [ 10; 50; 90 ] [ 1; 2; 4; 8 ] |> List.concat_map @@ fun (percent_mem, n_domains) -> run_one ~budgetf ~n_domains ~use_mutex:true ~percent_mem ()) multicore-bench-0.1.7/bench/bench_unix.ml0000644000175000017500000000202614704505212017024 0ustar kylekyleopen Multicore_bench let run_one ~budgetf ~n_domains () = let block_size = 4096 in let n_blocks = 16 in let init _ = let inn, out = Unix.pipe ~cloexec:true () in (inn, out, Bytes.create block_size, Bytes.create 1) in let work _ (inn, out, block, byte) = for _ = 1 to n_blocks do let n = Unix.write out block 0 block_size in assert (n = block_size); for _ = 1 to block_size do let n : int = Unix.read inn byte 0 1 in assert (n = 1) done done; Unix.close inn; Unix.close out in let config = Printf.sprintf "%d worker%s" n_domains (if n_domains = 1 then "" else "s") in Times.record ~budgetf ~n_domains ~n_warmups:1 ~n_runs_min:1 ~init ~work () |> Times.to_thruput_metrics ~n:(block_size * n_blocks * n_domains) ~singular:"blocking read" ~config let run_suite ~budgetf = [ 1; 2; 4 ] |> List.concat_map @@ fun n_domains -> if Sys.win32 || Domain.recommended_domain_count () < n_domains then [] else run_one ~budgetf ~n_domains () multicore-bench-0.1.7/bench/bench_incr.ml0000644000175000017500000000440714704505212017001 0ustar kylekyleopen Multicore_bench let run_one ~budgetf ~n_domains ~approach () = let counter = Atomic.make 0 |> Multicore_magic.copy_as_padded in let n_ops = 500 * Util.iter_factor / n_domains in let n_ops_todo = Countdown.create ~n_domains () in let init _ = Countdown.non_atomic_set n_ops_todo n_ops in let work domain_index () = match approach with | `Cas -> let rec work () = let n = Countdown.alloc n_ops_todo ~domain_index ~batch:100 in if n <> 0 then let rec loop n = if 0 < n then begin let v = Atomic.get counter in let success = Atomic.compare_and_set counter v (v + 1) in loop (n - Bool.to_int success) end else work () in loop n in work () | `Cas_backoff -> let rec work () = let n = Countdown.alloc n_ops_todo ~domain_index ~batch:100 in if n <> 0 then let rec loop backoff n = if 0 < n then begin let v = Atomic.get counter in if Atomic.compare_and_set counter v (v + 1) then loop Backoff.default (n - 1) else loop (Backoff.once backoff) n end else work () in loop Backoff.default n in work () | `Incr -> let rec work () = let n = Countdown.alloc n_ops_todo ~domain_index ~batch:100 in if n <> 0 then let rec loop n = if 0 < n then begin Atomic.incr counter; loop (n - 1) end else work () in loop n in work () in let config = Printf.sprintf "%s, %d domains" (match approach with | `Cas -> "CAS" | `Cas_backoff -> "CAS with backoff" | `Incr -> "Incr") n_domains in Times.record ~budgetf ~n_domains ~init ~work () |> Times.to_thruput_metrics ~n:n_ops ~singular:"op" ~config let run_suite ~budgetf = Util.cross [ `Cas; `Cas_backoff; `Incr ] [ 1; 2; 4; 8 ] |> List.concat_map @@ fun (approach, n_domains) -> if Domain.recommended_domain_count () < n_domains then [] else run_one ~budgetf ~n_domains ~approach () multicore-bench-0.1.7/bench/bench_ref_mutex.ml0000644000175000017500000000354214704505212020043 0ustar kylekyleopen Multicore_bench module Ref = struct type 'a t = 'a ref let make = ref let[@inline] compare_and_set x before after = !x == before && begin x := after; true end let[@inline] exchange x after = let before = !x in x := after; before end type t = Op : string * 'a * ('a Ref.t -> unit) * ('a Ref.t -> unit) -> t (** For some reason allocating the mutex inside [run_one] tends to cause performance hiccups, i.e. some operations appear to be 10x slower than others, which doesn't make sense. So, we allocate the mutex here. *) let mutex = Mutex.create () let run_one ~budgetf ?(n_iter = 250 * Util.iter_factor) (Op (name, value, op1, op2)) = let loc = Ref.make value in let init _ = () in let work _ () = let rec loop i = if i > 0 then begin Mutex.lock mutex; op1 loc; Mutex.unlock mutex; Mutex.lock mutex; op2 loc; Mutex.unlock mutex; loop (i - 2) end in loop n_iter in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_iter ~singular:"op" ~config:name let run_suite ~budgetf = [ (let get x = !x |> ignore in Op ("get", 42, get, get)); (let incr x = x := !x + 1 in Op ("incr", 0, incr, incr)); (let push x = x := 101 :: !x and pop x = match !x with [] -> () | _ :: xs -> x := xs in Op ("push & pop", [], push, pop)); (let cas01 x = Ref.compare_and_set x 0 1 |> ignore and cas10 x = Ref.compare_and_set x 1 0 |> ignore in Op ("cas int", 0, cas01, cas10)); (let xchg1 x = Ref.exchange x 1 |> ignore and xchg0 x = Ref.exchange x 0 |> ignore in Op ("xchg int", 0, xchg1, xchg0)); (let swap x = let l, r = !x in x := (r, l) in Op ("swap", (4, 2), swap, swap)); ] |> List.concat_map @@ run_one ~budgetf multicore-bench-0.1.7/bench/bench_queue.ml0000644000175000017500000000106014704505212017162 0ustar kylekyleopen Multicore_bench module Queue = Stdlib.Queue let run_one_domain ~budgetf ?(n_msgs = 50 * Util.iter_factor) () = let t = Queue.create () in let op push = if push then Queue.push 101 t else Queue.take_opt t |> ignore in let init _ = assert (Queue.is_empty t); Util.generate_push_and_pop_sequence n_msgs in let work _ bits = Util.Bits.iter op bits in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config:"one domain" let run_suite ~budgetf = run_one_domain ~budgetf () multicore-bench-0.1.7/bench/bench_ref.ml0000644000175000017500000000361714704505212016624 0ustar kylekyleopen Multicore_bench module Ref = struct type 'a t = 'a ref let make = ref let get = ( ! ) let[@poll error] [@inline never] incr x = x := !x + 1 let[@poll error] [@inline never] compare_and_set x before after = !x == before && begin x := after; true end let[@poll error] [@inline never] exchange x after = let before = !x in x := after; before let rec modify ?(backoff = Backoff.default) x f = let before = get x in let after = f before in if not (compare_and_set x before after) then modify ~backoff:(Backoff.once backoff) x f end type t = Op : string * int * 'a * ('a Ref.t -> unit) * ('a Ref.t -> unit) -> t let run_one ~budgetf ?(n_iter = 500 * Util.iter_factor) (Op (name, extra, value, op1, op2)) = let n_iter = n_iter * extra in let loc = Ref.make value in let init _ = () in let work _ () = let rec loop i = if i > 0 then begin op1 loc; op2 loc; loop (i - 2) end in loop n_iter in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_iter ~singular:"op" ~config:name let run_suite ~budgetf = [ (let get x = Ref.get x |> ignore in Op ("get", 10, 42, get, get)); (let incr x = Ref.incr x in Op ("incr", 1, 0, incr, incr)); (let push x = Ref.modify x (fun xs -> 101 :: xs) and pop x = Ref.modify x (function [] -> [] | _ :: xs -> xs) in Op ("push & pop", 2, [], push, pop)); (let cas01 x = Ref.compare_and_set x 0 1 |> ignore and cas10 x = Ref.compare_and_set x 1 0 |> ignore in Op ("cas int", 1, 0, cas01, cas10)); (let xchg1 x = Ref.exchange x 1 |> ignore and xchg0 x = Ref.exchange x 0 |> ignore in Op ("xchg int", 1, 0, xchg1, xchg0)); (let swap x = Ref.modify x (fun (x, y) -> (y, x)) in Op ("swap", 2, (4, 2), swap, swap)); ] |> List.concat_map @@ run_one ~budgetf multicore-bench-0.1.7/bench/main.ml0000644000175000017500000000071514704505212015631 0ustar kylekylelet benchmarks = [ ("Ref with [@poll error]", Bench_ref.run_suite); ("Ref with Mutex", Bench_ref_mutex.run_suite); ("Atomic", Bench_atomic.run_suite); ("Hashtbl", Bench_hashtbl.run_suite); ("Queue", Bench_queue.run_suite); ("Stack", Bench_stack.run_suite); ("Unix", Bench_unix.run_suite); ("Atomic incr", Bench_incr.run_suite); ("Bounded_q", Bench_bounded_q.run_suite); ] let () = Multicore_bench.Cmd.run ~benchmarks () multicore-bench-0.1.7/bench/bench_bounded_q.ml0000644000175000017500000001000614704505212017776 0ustar kylekyleopen Multicore_bench module Queue = Stdlib.Queue module Bounded_q : sig type 'a t val create : ?capacity:int -> unit -> 'a t val is_empty : 'a t -> bool val push : 'a t -> 'a -> unit val pop : 'a t -> 'a val pop_opt : 'a t -> 'a option end = struct type 'a t = { mutex : Mutex.t; queue : 'a Queue.t; capacity : int; not_empty : Condition.t; not_full : Condition.t; } let create ?(capacity = Int.max_int) () = if capacity < 0 then invalid_arg "negative capacity" else let mutex = Mutex.create () and queue = Queue.create () and not_empty = Condition.create () and not_full = Condition.create () in { mutex; queue; capacity; not_empty; not_full } let is_empty t = Mutex.lock t.mutex; let result = Queue.is_empty t.queue in Mutex.unlock t.mutex; result let is_full_unsafe t = t.capacity <= Queue.length t.queue let push t x = Mutex.lock t.mutex; match while is_full_unsafe t do Condition.wait t.not_full t.mutex done with | () -> Queue.push x t.queue; let n = Queue.length t.queue in Mutex.unlock t.mutex; if n = 1 then Condition.broadcast t.not_empty | exception exn -> Mutex.unlock t.mutex; raise exn let pop t = Mutex.lock t.mutex; match while Queue.length t.queue = 0 do Condition.wait t.not_empty t.mutex done with | () -> let n = Queue.length t.queue in let elem = Queue.pop t.queue in Mutex.unlock t.mutex; if n = t.capacity then Condition.broadcast t.not_full; elem | exception exn -> Mutex.unlock t.mutex; raise exn let pop_opt t = Mutex.lock t.mutex; let n = Queue.length t.queue in let elem_opt = Queue.take_opt t.queue in Mutex.unlock t.mutex; if n = t.capacity then Condition.broadcast t.not_full; elem_opt end let run_one_domain ~budgetf ?(n_msgs = 50 * Util.iter_factor) () = let t = Bounded_q.create () in let op push = if push then Bounded_q.push t 101 else Bounded_q.pop_opt t |> ignore in let init _ = assert (Bounded_q.is_empty t); Util.generate_push_and_pop_sequence n_msgs in let work _ bits = Util.Bits.iter op bits in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config:"one domain" let run_one ~budgetf ~n_adders ~n_takers ?(n_msgs = 50 * Util.iter_factor) () = let n_domains = n_adders + n_takers in let t = Bounded_q.create () in let n_msgs_to_take = Countdown.create ~n_domains:n_takers () in let n_msgs_to_add = Countdown.create ~n_domains:n_adders () in let init _ = assert (Bounded_q.is_empty t); Countdown.non_atomic_set n_msgs_to_take n_msgs; Countdown.non_atomic_set n_msgs_to_add n_msgs in let work i () = if i < n_adders then let domain_index = i in let rec work () = let n = Countdown.alloc n_msgs_to_add ~domain_index ~batch:100 in if 0 < n then begin for i = 1 to n do Bounded_q.push t i done; work () end in work () else let domain_index = i - n_adders in let rec work () = let n = Countdown.alloc n_msgs_to_take ~domain_index ~batch:100 in if n <> 0 then begin for _ = 1 to n do ignore (Bounded_q.pop t) done; work () end in work () in let config = let format role n = Printf.sprintf "%d %s%s" n role (if n = 1 then "" else "s") in Printf.sprintf "%s, %s" (format "adder" n_adders) (format "taker" n_takers) in Times.record ~budgetf ~n_domains ~init ~work () |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config let run_suite ~budgetf = run_one_domain ~budgetf () @ (Util.cross [ 1; 2; 4 ] [ 1; 2; 4 ] |> List.concat_map @@ fun (n_adders, n_takers) -> if Domain.recommended_domain_count () < n_adders + n_takers then [] else run_one ~budgetf ~n_adders ~n_takers ()) multicore-bench-0.1.7/bench/dune0000644000175000017500000000054214704505212015227 0ustar kylekyle(* -*- tuareg -*- *) let maybe_domain_shims_and_threads = if Jbuild_plugin.V1.ocaml_version < "5" then "domain_shims threads.posix" else "" let () = Jbuild_plugin.V1.send @@ {| (test (name main) (action (run %{test} -brief)) (libraries multicore-bench backoff unix multicore-magic |} ^ maybe_domain_shims_and_threads ^ {| )) |} multicore-bench-0.1.7/bench/bench_atomic.ml0000644000175000017500000000316514704505212017322 0ustar kylekyleopen Multicore_bench module Atomic = struct include Stdlib.Atomic let rec modify ?(backoff = Backoff.default) x f = let before = Atomic.get x in let after = f before in if not (Atomic.compare_and_set x before after) then modify ~backoff:(Backoff.once backoff) x f end type t = | Op : string * int * 'a * ('a Atomic.t -> unit) * ('a Atomic.t -> unit) -> t let run_one ~budgetf ?(n_iter = 500 * Util.iter_factor) (Op (name, extra, value, op1, op2)) = let n_iter = n_iter * extra in let loc = Atomic.make value in let init _ = () in let work _ () = let rec loop i = if i > 0 then begin op1 loc; op2 loc; loop (i - 2) end in loop n_iter in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_iter ~singular:"op" ~config:name let run_suite ~budgetf = [ (let get x = Atomic.get x |> ignore in Op ("get", 10, 42, get, get)); (let incr x = Atomic.incr x in Op ("incr", 1, 0, incr, incr)); (let push x = Atomic.modify x (fun xs -> 101 :: xs) and pop x = Atomic.modify x (function [] -> [] | _ :: xs -> xs) in Op ("push & pop", 2, [], push, pop)); (let cas01 x = Atomic.compare_and_set x 0 1 |> ignore and cas10 x = Atomic.compare_and_set x 1 0 |> ignore in Op ("cas int", 1, 0, cas01, cas10)); (let xchg1 x = Atomic.exchange x 1 |> ignore and xchg0 x = Atomic.exchange x 0 |> ignore in Op ("xchg int", 1, 0, xchg1, xchg0)); (let swap x = Atomic.modify x (fun (x, y) -> (y, x)) in Op ("swap", 2, (4, 2), swap, swap)); ] |> List.concat_map @@ run_one ~budgetf multicore-bench-0.1.7/bench/bench_stack.ml0000644000175000017500000000105714704505212017151 0ustar kylekyleopen Multicore_bench module Stack = Stdlib.Stack let run_one_domain ~budgetf ?(n_msgs = 50 * Util.iter_factor) () = let t = Stack.create () in let op push = if push then Stack.push 101 t else Stack.pop_opt t |> ignore in let init _ = assert (Stack.is_empty t); Util.generate_push_and_pop_sequence n_msgs in let work _ bits = Util.Bits.iter op bits in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config:"one domain" let run_suite ~budgetf = run_one_domain ~budgetf () multicore-bench-0.1.7/LICENSE.md0000644000175000017500000000136314704505212014700 0ustar kylekyleCopyright (c) 2024, Vesa Karvonen Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. multicore-bench-0.1.7/.prettierrc0000644000175000017500000000021514704505212015453 0ustar kylekyle{ "arrowParens": "avoid", "bracketSpacing": false, "printWidth": 80, "semi": false, "singleQuote": true, "proseWrap": "always" } multicore-bench-0.1.7/.dockerignore0000644000175000017500000000000714704505212015742 0ustar kylekyle_build multicore-bench-0.1.7/lib/0000755000175000017500000000000014704505212014037 5ustar kylekylemulticore-bench-0.1.7/lib/countdown.ml0000644000175000017500000000365614704505212016423 0ustar kylekylemodule Atomic = Multicore_magic.Transparent_atomic type t = int Atomic.t array let create ~n_domains () = if n_domains < 1 then invalid_arg "n_domains < 1"; let ceil_pow_2_minus_1 n = let open Nativeint in let n = of_int n in let n = logor n (shift_right_logical n 1) in let n = logor n (shift_right_logical n 2) in let n = logor n (shift_right_logical n 4) in let n = logor n (shift_right_logical n 8) in let n = logor n (shift_right_logical n 16) in to_int (if Sys.int_size > 32 then logor n (shift_right_logical n 32) else n) in let n = ceil_pow_2_minus_1 n_domains in let atomics = Array.init n_domains (fun _ -> Atomic.make_contended 0) in Array.init n @@ fun i -> Array.unsafe_get atomics (i mod n_domains) let rec arity t i = if i < Array.length t && Array.unsafe_get t i != Array.unsafe_get t 0 then arity t (i + 1) else i let[@inline] arity t = arity t 1 let non_atomic_set t count = if count < 0 then invalid_arg "count < 0"; let n = arity t in let d = count / n in let j = count - (n * d) in for i = 0 to n - 1 do Atomic.set (Array.unsafe_get t i) (d + Bool.to_int (i < j)) done let rec get t count i = if i < Array.length t && Array.unsafe_get t i != Array.unsafe_get t 0 then get t (count + Int_ext.max 0 (Atomic.get (Array.unsafe_get t i))) (i + 1) else count let[@inline] get t = get t (Int_ext.max 0 (Atomic.get (Array.unsafe_get t 0))) 1 let rec alloc t ~batch i = if i < Array.length t then let c = Array.unsafe_get t i in if 0 < Atomic.get c then let n = Atomic.fetch_and_add c (-batch) in if 0 < n then Int_ext.min n batch else alloc t ~batch (i + 1) else alloc t ~batch (i + 1) else 0 let[@inline] alloc t ~domain_index ~batch = let c = Array.unsafe_get t domain_index in if 0 < Atomic.get c then let n = Atomic.fetch_and_add c (-batch) in if 0 < n then Int_ext.min n batch else alloc t ~batch 0 else alloc t ~batch 0 multicore-bench-0.1.7/lib/set_ext.ml0000644000175000017500000000021014704505212016035 0ustar kylekylelet make (type t) (compare : t -> _) = let (module Elt) = Ordered.make compare in (module Set.Make (Elt) : Set.S with type elt = t) multicore-bench-0.1.7/lib/barrier.mli0000644000175000017500000000015214704505212016166 0ustar kylekyletype t val make : int -> t val await : t -> unit val poison : t -> exn -> Printexc.raw_backtrace -> unit multicore-bench-0.1.7/lib/util.ml0000644000175000017500000000431314704505212015347 0ustar kylekylelet iter_factor = let factor b = if b then 10 else 1 in factor (64 <= Sys.word_size) * factor (Sys.backend_type = Native) * factor (1 < Domain.recommended_domain_count ()) let rec alloc ?(batch = 1000) counter = let n = Atomic.get counter in if n = 0 then 0 else let batch = Int_ext.min n batch in if Atomic.compare_and_set counter n (n - batch) then batch else alloc ~batch counter let cross xs ys = xs |> List.concat_map @@ fun x -> ys |> List.map @@ fun y -> (x, y) module Bits = struct type t = { mutable bytes : Bytes.t; mutable length : int } let create () = { bytes = Bytes.create 1; length = 0 } let push t bool = let capacity = Bytes.length t.bytes lsl 3 in if t.length == capacity then t.bytes <- Bytes.extend t.bytes 0 (capacity lsr 3); let byte_i = t.length lsr 3 in let mask = 1 lsl (t.length land 7) in t.length <- t.length + 1; let byte = Char.code (Bytes.unsafe_get t.bytes byte_i) in let byte = if bool then byte lor mask else byte land lnot mask in Bytes.unsafe_set t.bytes byte_i (Char.chr byte) let length t = t.length let iter fn t = let i = ref 0 in let n = t.length in while !i < n do let ix = !i in i := !i + 8; let byte = Char.code (Bytes.unsafe_get t.bytes (ix lsr 3)) in let n = n - ix in fn (0 <> byte land 1); if 1 < n then fn (0 <> byte land 2); if 2 < n then fn (0 <> byte land 4); if 3 < n then fn (0 <> byte land 8); if 4 < n then fn (0 <> byte land 16); if 5 < n then fn (0 <> byte land 32); if 6 < n then fn (0 <> byte land 64); if 7 < n then fn (0 <> byte land 128) done end let generate_push_and_pop_sequence ?(state = Random.State.make_self_init ()) n_msgs = let bits = Bits.create () in let rec loop length n_push n_pop = if 0 < n_push || 0 < n_pop then begin let push = Random.State.bool state && 0 < n_push in Bits.push bits push; loop (if push then length + 1 else if 0 < length then length - 1 else length) (n_push - Bool.to_int push) (n_pop - Bool.to_int ((not push) && 0 < length)) end else length in let length = loop 0 n_msgs n_msgs in assert (length = 0); bits multicore-bench-0.1.7/lib/infix_pair.ml0000644000175000017500000000004414704505212016517 0ustar kylekyletype ('a, 'b) t = ( :: ) of 'a * 'b multicore-bench-0.1.7/lib/option_ext.ml0000644000175000017500000000100614704505212016556 0ustar kylekylelet pair x y = match (x, y) with Some x, Some y -> Some (x, y) | _ -> None module Syntax = struct let ( & ) l r x = match l x with | None -> None | Some l -> begin match r x with None -> None | Some r -> Some Infix_pair.(l :: r) end let ( let* ) = Option.bind let ( >>= ) = Option.bind let ( >=> ) f g x = f x >>= g let ( let+ ) x f = Option.map f x let ( >>+ ) = ( let+ ) let ( >+> ) f g x = f x >>+ g let pure = Option.some let ( and* ) = pair let ( and+ ) = pair end multicore-bench-0.1.7/lib/int_ext.ocaml_lt_4_13.ml0000644000175000017500000000015214704505212020360 0ustar kylekylelet min (x : int) (y : int) = if x < y then x else y let max (x : int) (y : int) = if x < y then y else x multicore-bench-0.1.7/lib/data.ml0000644000175000017500000000233314704505212015303 0ustar kylekyleopen Option_ext.Syntax module Trend = struct type t = [ `Lower_is_better | `Higher_is_better ] let parse = Json.as_string >=> function | "lower-is-better" -> Some `Lower_is_better | "higher-is-better" -> Some `Higher_is_better | _ -> None end module Metric = struct type units = string type t = { name : string; value : float; units : units; trend : Trend.t; description : string; } let parse = (Json.prop "name" >=> Json.as_string & Json.prop "value" >=> Json.as_float & Json.prop "units" >=> Json.as_string & Json.prop "trend" >=> Trend.parse & Json.prop "description" >=> Json.as_string) >+> fun (name :: value :: units :: trend :: description) -> { name; value; units; trend; description } let name x = x.name end module Benchmark = struct type t = { name : string; metrics : Metric.t list } let parse = (Json.prop "name" >=> Json.as_string & Json.prop "metrics" >=> Json.as_list >+> List.filter_map Metric.parse) >+> fun (name :: metrics) -> { name; metrics } let name x = x.name end module Results = struct type t = Benchmark.t list let parse = Json.prop "results" >=> Json.as_list >+> List.filter_map Benchmark.parse end multicore-bench-0.1.7/lib/list_ext.ml0000644000175000017500000000132414704505212016224 0ustar kylekylelet default_duplicate _ _ = invalid_arg "duplicate key" let default_missing _ _ = None let zip_by (type k) ?(duplicate = default_duplicate) ?(missing = default_missing) (compare : k -> _) key_of xs ys = let (module M) = Map_ext.make compare in let to_map xs = xs |> List.fold_left (fun m x -> m |> M.update (key_of x) @@ function | None -> Some x | Some y -> duplicate x y) M.empty in M.merge (fun _ x y -> match (x, y) with | Some x, Some y -> Some (x, y) | Some x, None -> missing `R x | None, Some y -> missing `L y | None, None -> None) (to_map xs) (to_map ys) |> M.bindings |> List.map snd multicore-bench-0.1.7/lib/multicore_bench.ml0000644000175000017500000000033514704505212017534 0ustar kylekylemodule Trend = Trend module Metric = Metric module Unit_of_rate = Unit_of_rate module Unit_of_time = Unit_of_time module Times = Times module Suite = Suite module Cmd = Cmd module Countdown = Countdown module Util = Util multicore-bench-0.1.7/lib/trend.ml0000644000175000017500000000026014704505212015503 0ustar kylekyletype t = [ `Lower_is_better | `Higher_is_better ] let to_json = function | `Lower_is_better -> `String "lower-is-better" | `Higher_is_better -> `String "higher-is-better" multicore-bench-0.1.7/lib/multicore_bench.mli0000644000175000017500000002325514704505212017713 0ustar kylekyle(** Multicore bench is a framework for writing multicore benchmark executables to run on {{:https://github.com/ocurrent/current-bench}current-bench}. To use the framework one typically opens it {[ open Multicore_bench ]} which brings a number of submodules into scope. *) module Trend : sig (** Dealing with trends. *) type t = [ `Lower_is_better | `Higher_is_better ] (** Whether a lower or higher value is better. *) end module Metric : sig (** Dealing with benchmark metrics. *) type t (** Represents a metric. *) val make : metric:string -> config:string -> ?units:string -> ?trend:[< Trend.t ] -> ?description:string -> [< `Float of float ] -> t (** [make ~metric ~config value] constructs a metric with given specification. *) end module Unit_of_rate : sig (** Dealing with units of rate. *) type t = [ `_1 (** 1/s *) | `k (** 10{^ 3}/s or k/s *) | `M (** 10{^ 6}/s or M/s *) | `G (** 10{^ 9}/s or G/s *) ] (** Represents a unit of rate, i.e. how many per second. *) val to_divisor : [< t ] -> float (** [to_divisor t] converts the unit of rate [t] to a divisor. *) val to_mnemonic : [< t ] -> string (** [to_mnemonic t] returns a human readable mnemonic for the unit of rate [t]. *) end module Unit_of_time : sig (** Dealing with units of time. *) type t = [ `s (** seconds *) | `ms (** milliseconds *) | `mus (** microseconds *) | `ns (** nanoseconds *) ] (** Represents a unit of time. *) val to_multiplier : [< t ] -> float (** [to_multiplier t] converts the unit of time [t] to a multiplier. *) val to_mnemonic : [< t ] -> string (** [to_mnemonic t] returns a human readable mnemonic for the unit of time [t]. *) end module Times : sig (** Recording timings of benchmarks running on multiple domains in parallel and producing metrics from the recorded timings. *) type t (** Represents a record of elapsed times of multiple runs of a benchmark running on multiple domains. *) val record : budgetf:float -> n_domains:int -> ?ensure_multi_domain:bool -> ?domain_local_await:[< `Busy_wait | `Neglect > `Busy_wait ] -> ?n_warmups:int -> ?n_runs_min:int -> ?n_runs_max:int -> ?before:(unit -> unit) -> init:(int -> 's) -> ?wrap:(int -> 's -> (unit -> unit) -> unit) -> work:(int -> 's -> unit) -> ?after:(unit -> unit) -> unit -> t (** [record ~budgetf ~n_domains ~init ~work ()] essentially repeatedly runs [let x = init i in wrap i x (fun () -> .. work i x ..)] on specified number of domains, [i ∊ [0, n_domains-1]], and records the times that calls of [work] take. The calls of [work] are synchronized to start as simultaneously as possible. Optional arguments: - [~ensure_multi_domain]: Whether to run an extra busy untimed domain when [n_domains] is [1]. Doing so prevents the OCaml runtime from using specialized runtime implementations. Defaults to [true]. - [~domain_local_await]: Specifies whether and how to configure {{:https://github.com/ocaml-multicore/domain-local-await/}domain-local-await} or DLA. [`Neglect] does not reconfigure DLA. [`Busy_wait] configures DLA to use a busy-wait implementation, which prevents domains from going to sleep. Defaults to [`Busy_wait]. - [~n_warmups]: Specifies the number of warmup runs to perform before the actual measurements. Defaults to [3]. - [~n_runs_min]: Specifies the minimum number of timed runs. The upper bound is determined dynamically based on [budgetf]. Defaults to [7]. - [~n_runs_max]: Specifies the maximum number of timed runs. Defaults to [1023]. - [~before]: Specifies an action to run on one domain before [init]. - [~after]: Specifies an action to run on one domain after [work]. *) val to_thruput_metrics : n:int -> singular:string -> ?plural:string -> config:string -> ?unit_of_time:Unit_of_time.t -> ?unit_of_rate:Unit_of_rate.t -> t -> Metric.t list (** [to_thruput_metrics ~n ~singular ~config times] produces a pair of metrics from the recorded [times] where one metric is for the time a single operation takes and the other is the thruput of operations over all domains. Optional arguments: - [~plural]: Plural for the operation. Defaults to [singular ^ "s"]. - [~unit_of_time]: Unit of time for the duration of a single operation. Defaults to [`ns]. - [~unit_of_rate]: Unit of rate for the number of operations per second. Defaults to [`M]. *) end module Suite : sig (** Dealing with benchmark suites. *) type t = budgetf:float -> Metric.t list (** Represents a benchmark suite, i.e. a function that produces a list of metric outputs for {{:https://github.com/ocurrent/current-bench}current-bench}. *) end module Cmd : sig (** Command line interface for a benchmark executable. *) type output = [ `JSON (** [`JSON] gives the JSON output for {{:https://github.com/ocurrent/current-bench}current-bench}. *) | `Brief (** [`Brief] gives concise human readable output. *) | `Diff of string (** [`Diff "path.json"] gives concise human readable diff against results stored in specified [path.json] file. *) ] (** Specifies the output format. *) val run : benchmarks:(string * Suite.t) list -> ?budgetf:float -> ?filters:string list -> ?debug:bool -> ?output:output -> ?argv:string array -> ?flush:bool -> ?randomize:bool -> unit -> unit (** [run ~benchmarks ()] interprets command line arguments and runs the benchmarks suites based on the arguments. Optional arguments: - [~budgetf]: A budget (usually) in seconds passed to each benchmark suite. This defaults to a small number so that a benchmark suite can be used as a test. - [~filters]: A list of regular expressions to match names of benchmark suites. If any regular expression matches the name of benchmark, then that benchmark will be run. Defaults to [[]]. - [~debug]: Print progress information to help debugging. Defaults to [false]. - [~output]: Output mode. Defaults to [`JSON]. - [~argv]: Array of command line arguments. Defaults to [Sys.argv]. - [~flush]: Whether to flush the standard output after writing it. Defaults to [true]. - [~randomize]: Whether to randomize the order of suites or not. Defaults to [true]. Command line arguments take precedence over the optional arguments. In other words, you can specify the optional arguments to give defaults for the benchmark executable. *) end module Countdown : sig (** Scalable low-level countdown. *) type t (** Represents a countdown counter. *) val create : n_domains:int -> unit -> t (** [create ~n_domains ()] returns a new countdown counter with initial value of [0]. *) val non_atomic_set : t -> int -> unit (** [non_atomic_set countdown count] sets the [count] of the [countdown]. ⚠️ This operation is not atomic. However, it is safe to call [non_atomic_set] with the same [countdown] and [count] in parallel, because the [countdown] will be initialized deterministically. *) val get : t -> int (** [get countdown] returns the count of the [countdown]. *) val alloc : t -> domain_index:int -> batch:int -> int (** [alloc countdown ~domain_index ~batch] tries to reduce the count of the [countdown] by at most [batch] (which must be positive) and returns the number by which the count was reduced or [0] in case the count was already [0]. *) end module Util : sig (** Utilities for creating benchmarks. ⚠️ In the future we expect to regroup these utilities under different modules and deprecate them in this module. *) val iter_factor : int (** A multiplier depending various factors such as whether we are running on a 32- or 64-bit machine (1x/10x), bytecode or native (1x/10x), and whether we are running on single-core or multicore OCaml (1x/10x). *) val alloc : ?batch:int -> int Atomic.t -> int (** [alloc ~batch n] tries to decrement the specified atomic variable [n] by at most the optional amount [~batch] and not beyond [n] having value [0]. Returns the amount by which [n] was decremented, which is [0] only in case [n] is [0]. *) val cross : 'a list -> 'b list -> ('a * 'b) list (** [cross xs ys] returns a list formed by pairing each element of [xs] with each element of [ys]. For example: {[ # Util.cross [1; 2; 3] ["a"; "b"] - : (int * string) list = [(1, "a"); (1, "b"); (2, "a"); (2, "b"); (3, "a"); (3, "b")] ]} *) module Bits : sig (** A minimalistic bitset data structure. *) type t (** Represents a bitset. *) val create : unit -> t (** [create ()] returns a new zero length bitset. *) val push : t -> bool -> unit (** [push bs b] adds the bit [b] to the end of the bitset [bs]. *) val iter : (bool -> unit) -> t -> unit (** [iter action bs] calls the [action] for each bit in the bitset [bs]. *) end val generate_push_and_pop_sequence : ?state:Random.State.t -> int -> Bits.t (** [generate_push_and_pop_sequence n] generates a bitset where each [true] bit represents a "push" operation and each [false] bit represents a "try_pop" operation. Performing the operations on an initially empty dispenser leaves the dispenser empty. The sequence may include "try_pop" operations at points where the dispenser will be empty. *) end multicore-bench-0.1.7/lib/json.ml0000644000175000017500000000056314704505212015346 0ustar kylekyleopen Option_ext.Syntax type t = Yojson.Safe.t let as_assoc = function `Assoc assoc -> Some assoc | (_ : t) -> None let prop key = as_assoc >=> List.assoc_opt key let as_list = function `List list -> Some list | (_ : t) -> None let as_string = function `String string -> Some string | (_ : t) -> None let as_float = function `Float float -> Some float | (_ : t) -> None multicore-bench-0.1.7/lib/cmd.ml0000644000175000017500000001425114704505212015137 0ustar kylekyleopen Data type output = [ `JSON | `Brief | `Diff of string ] let worse_colors = [| 196; 197; 198; 199; 200; 201 |] let better_colors = [| 46; 47; 48; 49; 50; 51 |] let replace_non_breaking_spaces = let a_non_breaking_space = Str.regexp " " in Str.global_substitute a_non_breaking_space (fun _ -> " ") let duplicate kind name x _ = failwith (Printf.sprintf "Duplicate %s: %s" kind (name x |> replace_non_breaking_spaces)) let print_diff base next = List_ext.zip_by ~duplicate:(duplicate "benchmark" Benchmark.name) String.compare Benchmark.name base next |> List.iter @@ fun ((base : Benchmark.t), (next : Benchmark.t)) -> Printf.printf "%s:\n" base.name; let zipped = List_ext.zip_by ~duplicate:(duplicate "metric" Metric.name) String.compare Metric.name base.metrics next.metrics in let extreme_of join trend = List.fold_left (fun acc ((base : Metric.t), (next : Metric.t)) -> if trend <> base.trend || trend <> next.trend then acc else join acc (next.value /. base.value)) 1.0 zipped in let min_higher = extreme_of Float.min `Higher_is_better in let max_higher = extreme_of Float.max `Higher_is_better in let min_lower = extreme_of Float.min `Lower_is_better in let max_lower = extreme_of Float.max `Lower_is_better in zipped |> List.iter @@ fun ((base : Metric.t), (next : Metric.t)) -> Printf.printf " %s:\n" base.name; if base.trend <> next.trend || base.units <> next.units || Float.equal base.value next.value then Printf.printf " %.2f %s\n" next.value next.units else let times = next.value /. base.value in let colors, extreme = if next.trend = `Higher_is_better then if times < 1.0 then (worse_colors, min_higher) else (better_colors, max_higher) else if 1.0 < times then (worse_colors, max_lower) else (better_colors, min_lower) in let range = Float.abs (extreme -. 1.0) in let color = colors.(Float.to_int (Float.round (Float.of_int (Array.length colors - 1) *. Float.abs (extreme -. times) /. range))) in Printf.printf " %.2f %s = \x1b[1;38;5;%dm%.2f\x1b\x1b[0;39;49m x %.2f %s\n" next.value next.units color times base.value base.units let run_benchmark ~budgetf ~debug (name, fn) = if debug then (* I wish there was a way to tell dune not to capture stderr. *) Printf.printf "Running: %s\n%!" name; `Assoc [ ("name", `String name); ("metrics", `List (fn ~budgetf)) ] let name_of = function | `Assoc (("name", `String name) :: _) -> name | _ -> failwith "bug" let build_filter = function | [] -> Fun.const true | filters -> begin let regexps = filters |> List.map Str.regexp in fun (name, _) -> regexps |> List.exists @@ fun regexp -> match Str.search_forward regexp name 0 with | _ -> true | exception Not_found -> false end let shuffle xs = let xs = Array.of_list xs in let state = Random.State.make_self_init () in let n = Array.length xs in for i = 0 to n - 2 do let j = Random.State.int state (n - i) + i in let t = xs.(i) in xs.(i) <- xs.(j); xs.(j) <- t done; Array.to_list xs let run ~benchmarks ?(budgetf = 0.025) ?(filters = []) ?(debug = false) ?(output = `JSON) ?(argv = Sys.argv) ?(flush = true) ?(randomize = true) () = let budgetf = ref budgetf in let filters = ref filters in let debug = ref debug in let output = ref output in let randomize = ref randomize in let rec specs = [ ("-budget", Arg.Set_float budgetf, "seconds\t Budget for a benchmark"); ( "-debug", Arg.Set debug, "\t Print progress information to help debugging" ); ( "-diff", Arg.String (fun path -> output := `Diff path), "path.json\t Show diff against specified base results" ); ( "-brief", Arg.Unit (fun () -> output := `Brief), "\t Show brief human readable results." ); ("-help", Unit help, "\t Show this help message"); ("--help", Unit help, "\t Show this help message"); ] and help () = Arg.usage (Arg.align specs) (Printf.sprintf "\n\ Usage: %s