pax_global_header00006660000000000000000000000064151263361670014524gustar00rootroot0000000000000052 comment=bd4533f1f70c2b975cbd5769a60d8eaaea1d2233 thread-pool-5.1.0/000077500000000000000000000000001512633616700137455ustar00rootroot00000000000000thread-pool-5.1.0/.clang-format000066400000000000000000000106261512633616700163250ustar00rootroot00000000000000AccessModifierOffset: -4 AlignAfterOpenBracket: DontAlign AlignArrayOfStructures: None AlignConsecutiveAssignments: None AlignConsecutiveBitFields: None AlignConsecutiveDeclarations: None AlignConsecutiveMacros: None AlignConsecutiveShortCaseStatements: Enabled: false AlignEscapedNewlines: Left AlignOperands: DontAlign AlignTrailingComments: Kind: Always OverEmptyLines: 0 AllowAllArgumentsOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: false AllowBreakBeforeNoexceptSpecifier: Never AllowShortBlocksOnASingleLine: Empty AllowShortCaseLabelsOnASingleLine: false AllowShortCompoundRequirementOnASingleLine: true AllowShortEnumsOnASingleLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: Never AllowShortLambdasOnASingleLine: Empty AllowShortLoopsOnASingleLine: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AttributeMacros: [] BinPackArguments: true BinPackParameters: true BitFieldColonSpacing: Both BraceWrapping: AfterCaseLabel: true AfterClass: true AfterControlStatement: Always AfterEnum: true AfterExternBlock: true AfterFunction: true AfterNamespace: false AfterStruct: true AfterUnion: true BeforeCatch: true BeforeElse: true BeforeLambdaBody: true BeforeWhile: false IndentBraces: false SplitEmptyFunction: false SplitEmptyNamespace: true SplitEmptyRecord: true BracedInitializerIndentWidth: 4 BreakAdjacentStringLiterals: false BreakAfterAttributes: Never BreakBeforeBinaryOperators: None BreakBeforeBraces: Custom BreakBeforeConceptDeclarations: true BreakBeforeInheritanceComma: false BreakBeforeTernaryOperators: true BreakConstructorInitializers: BeforeColon BreakInheritanceList: BeforeColon BreakStringLiterals: true BreakTemplateDeclarations: No ColumnLimit: 0 CompactNamespaces: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DeriveLineEnding: true DerivePointerAlignment: false DisableFormat: false EmptyLineAfterAccessModifier: Never EmptyLineBeforeAccessModifier: LogicalBlock ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true IncludeBlocks: Preserve IncludeIsMainSourceRegex: '' IndentAccessModifiers: false IndentCaseBlocks: false IndentCaseLabels: false IndentExternBlock: AfterExternBlock IndentGotoLabels: true IndentPPDirectives: BeforeHash IndentRequires: false IndentWidth: 4 IndentWrappedFunctionNames: false JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: true LambdaBodyIndentation: Signature Language: Cpp MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: Inner ObjCBinPackProtocolList: Auto ObjCBlockIndentWidth: 4 ObjCBreakBeforeNestedBlockParam: true ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: true PPIndentWidth: -1 PackConstructorInitializers: CurrentLine PenaltyBreakAssignment: 0 PenaltyBreakBeforeFirstCallParameter: 0 PenaltyBreakComment: 0 PenaltyBreakFirstLessLess: 0 PenaltyBreakOpenParenthesis: 0 PenaltyBreakString: 0 PenaltyBreakTemplateDeclaration: 0 PenaltyExcessCharacter: 0 PenaltyIndentedWhitespace: 0 PenaltyReturnTypeOnItsOwnLine: 0 PointerAlignment: Left QualifierAlignment: Leave ReferenceAlignment: Pointer ReflowComments: true RemoveBracesLLVM: false SeparateDefinitionBlocks: Always ShortNamespaceLines: 1 SkipMacroDefinitionBody: true SortIncludes: CaseSensitive SortJavaStaticImport: Before SortUsingDeclarations: true SpaceAfterCStyleCast: false SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: true SpaceAroundPointerQualifiers: Default SpaceBeforeAssignmentOperators: true SpaceBeforeCaseColon: false SpaceBeforeCpp11BracedList: false SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements SpaceBeforeParensOptions: AfterControlStatements: true AfterForeachMacros: true AfterFunctionDeclarationName: false AfterFunctionDefinitionName: false AfterIfMacros: true AfterOverloadedOperator: false BeforeNonEmptyParentheses: false SpaceBeforeRangeBasedForLoopColon: true SpaceBeforeSquareBrackets: false SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: Never SpacesInCStyleCastParentheses: false SpacesInConditionalStatement: false SpacesInContainerLiterals: true SpacesInLineCommentPrefix: Maximum: 1 Minimum: 1 SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Latest TabWidth: 4 UseCRLF: false UseTab: Never thread-pool-5.1.0/.clang-tidy000066400000000000000000000040311512633616700157770ustar00rootroot00000000000000CheckOptions: cppcoreguidelines-pro-type-member-init.IgnoreArrays: true cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor: true misc-const-correctness.WarnPointersAsValues: true misc-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: true Checks: > *, -abseil-*, -altera-*, -android-*, -boost-*, -bugprone-easily-swappable-parameters, -bugprone-empty-catch, -cert-err58-cpp, -clang-analyzer-alpha.*, -clang-analyzer-debug.*, -clang-analyzer-fuchsia.*, -clang-analyzer-webkit.*, -cppcoreguidelines-avoid-c-arrays, -cppcoreguidelines-avoid-do-while, -cppcoreguidelines-avoid-magic-numbers, -cppcoreguidelines-avoid-non-const-global-variables, -cppcoreguidelines-macro-usage, -cppcoreguidelines-pro-bounds-array-to-pointer-decay, -cppcoreguidelines-pro-bounds-constant-array-index, -cppcoreguidelines-pro-bounds-pointer-arithmetic, -cppcoreguidelines-pro-type-reinterpret-cast, -cppcoreguidelines-pro-type-vararg, -darwin-*, -fuchsia-*, -google-*, -hicpp-avoid-c-arrays, -hicpp-braces-around-statements, -hicpp-member-init, -hicpp-no-array-decay, -hicpp-signed-bitwise, -hicpp-special-member-functions, -hicpp-use-auto, -hicpp-vararg, -linuxkernel-*, -llvm-*, -llvmlibc-*, -misc-definitions-in-headers, -modernize-avoid-bind, -modernize-avoid-c-arrays, -modernize-use-auto, -modernize-use-constraints, -modernize-use-designated-initializers, -modernize-use-ranges, -modernize-use-std-numbers, -modernize-use-trailing-return-type, -mpi-*, -objc-*, -openmp-*, -performance-enum-size, -readability-avoid-nested-conditional-operator, -readability-braces-around-statements, -readability-function-cognitive-complexity, -readability-identifier-length, -readability-magic-numbers, -readability-use-concise-preprocessor-directives, -zircon-*, HeaderFileExtensions: [h, hpp, cppm] HeaderFilterRegex: .* thread-pool-5.1.0/.github/000077500000000000000000000000001512633616700153055ustar00rootroot00000000000000thread-pool-5.1.0/.github/ISSUE_TEMPLATE/000077500000000000000000000000001512633616700174705ustar00rootroot00000000000000thread-pool-5.1.0/.github/ISSUE_TEMPLATE/bug_report.md000066400000000000000000000016521512633616700221660ustar00rootroot00000000000000--- name: Bug report about: Found a bug? Report it here. title: "[BUG]" labels: bug --- **Describe the bug** A clear and concise description of what the bug is. **Minimal working example** A short but complete program that can be compiled to reproduce the error. Paste the program between the two code fences. If it's too long or requires multiple files, attach the file(s) instead. ```cpp ``` **Behavior** What behavior did you expect to get? What actually happened? If the code failed to compile, please include the full output of the compiler. **System information** * CPU model, architecture, # of cores and threads: * Operating system: * Name and version of C++ compiler: * Full command used for compiling, including all compiler flags: * Thread pool library version: (Please note that only the latest version of the thread pool library is supported.) **Additional information** Include any additional information here. thread-pool-5.1.0/.github/ISSUE_TEMPLATE/failed-tests.md000066400000000000000000000011431512633616700223750ustar00rootroot00000000000000--- name: Failed tests about: The provided automated tests failed on your system? Report it here. title: "[TEST]" labels: bug --- **System information** * CPU model, architecture, # of cores and threads: * Operating system: * Name and version of C++ compiler: * Full command used for compiling, including all compiler flags: * Thread pool library version: (Please note that only the latest version of the thread pool library is supported.) **Log file** Please attach the log file generated by the automated test program to this issue. **Additional information** Include any additional information here. thread-pool-5.1.0/.github/ISSUE_TEMPLATE/feature_request.md000066400000000000000000000006221512633616700232150ustar00rootroot00000000000000--- name: Feature request about: Want a new feature? Suggest it here. title: "[REQ]" labels: enhancement --- **Describe the new feature** A clear and concise description of the feature you want. **Code example** An example of code that utilizes the suggested feature. Paste or write it between the two code fences. ```cpp ``` **Additional information** Include any additional information here. thread-pool-5.1.0/.github/pull_request_template.md000066400000000000000000000031401512633616700222440ustar00rootroot00000000000000**Pull request policy (please read)** > Contributions are always welcome. However, I release my projects in cumulative updates after editing and testing them locally on my system, so **my policy is to never accept any pull requests**. If you open a pull request, and I decide to incorporate your suggestion into the project, I will first modify your code to comply with the project's coding conventions (formatting, syntax, naming, comments, programming practices, etc.), and perform some tests to ensure that the change doesn't break anything. I will then merge it into the next release of the project, possibly together with some other changes. The new release will also include a note in `CHANGELOG.md` with a link to your pull request, and modifications to the documentation in `README.md` as needed. **Describe the changes** What does your pull request fix or add to the library? **Style** Have you formatted your code using the `.clang-format` file attached to this project? **Linting** Have you linted your code using the `.clang-tidy` file attached to this project? **Testing** Have you tested the new code using the provided automated test program `BS_thread_pool_test.cpp` (preferably with the provided test script `test_all.py`) and/or performed any other tests to ensure that the new code works correctly? If so, please provide information about the test system(s): * CPU model, architecture, # of cores and threads: * Operating system: * Name and version of C++ compiler: * Full command used for compiling, including all compiler flags: **Additional information** Include any additional information here. thread-pool-5.1.0/.gitignore000066400000000000000000000000341512633616700157320ustar00rootroot00000000000000build temp default_args.txt thread-pool-5.1.0/.vscode-linux/000077500000000000000000000000001512633616700164435ustar00rootroot00000000000000thread-pool-5.1.0/.vscode-linux/c_cpp_properties.json000066400000000000000000000062371512633616700227060ustar00rootroot00000000000000{ "configurations": [ { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerArgs": [ "-stdlib=libc++" ], "compilerPath": "/usr/bin/clang++", "cppStandard": "c++17", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "linux-clang-x64", "name": "Clang C++17" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerArgs": [ "-stdlib=libc++" ], "compilerPath": "/usr/bin/clang++", "cppStandard": "c++20", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "linux-clang-x64", "name": "Clang C++20" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerArgs": [ "-stdlib=libc++" ], "compilerPath": "/usr/bin/clang++", "cppStandard": "c++23", "cStandard": "c23", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "linux-clang-x64", "name": "Clang C++23" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerPath": "/usr/bin/g++", "cppStandard": "c++17", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "linux-gcc-x64", "name": "GCC C++17" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerPath": "/usr/bin/g++", "cppStandard": "c++20", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "linux-gcc-x64", "name": "GCC C++20" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerPath": "/usr/bin/g++", "cppStandard": "c++23", "cStandard": "c23", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "linux-gcc-x64", "name": "GCC C++23" } ], "version": 4 } thread-pool-5.1.0/.vscode-linux/launch.json000066400000000000000000000043751512633616700206210ustar00rootroot00000000000000{ "configurations": [ { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (Clang C++17)", "preLaunchTask": "Build for debugging (Clang C++17)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-clang-cpp17", "request": "launch", "type": "lldb" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (Clang C++20)", "preLaunchTask": "Build for debugging (Clang C++20)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-clang-cpp20", "request": "launch", "type": "lldb" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (Clang C++23)", "preLaunchTask": "Build for debugging (Clang C++23)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-clang-cpp23", "request": "launch", "type": "lldb" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (GCC C++17)", "preLaunchTask": "Build for debugging (GCC C++17)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-gcc-cpp17", "request": "launch", "type": "cppdbg" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (GCC C++20)", "preLaunchTask": "Build for debugging (GCC C++20)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-gcc-cpp20", "request": "launch", "type": "cppdbg" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (GCC C++23)", "preLaunchTask": "Build for debugging (GCC C++23)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-gcc-cpp23", "request": "launch", "type": "cppdbg" } ], "version": "0.2.0" } thread-pool-5.1.0/.vscode-linux/settings.json000066400000000000000000000001371512633616700211770ustar00rootroot00000000000000{ "C_Cpp.codeAnalysis.exclude": { "misc/**": true, "temp/**": true } } thread-pool-5.1.0/.vscode-linux/tasks.json000066400000000000000000000415001512633616700204630ustar00rootroot00000000000000{ "tasks": [ { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++17", "-t=debug", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (Clang C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++20", "-t=debug", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (Clang C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++23", "-t=debug", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (Clang C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++17", "-t=debug", "-v" ], "command": "python3", "detail": "Compile active file using g++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (GCC C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++20", "-t=debug", "-v" ], "command": "python3", "detail": "Compile active file using g++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (GCC C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++23", "-t=debug", "-v" ], "command": "python3", "detail": "Compile active file using g++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (GCC C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++17", "-t=release", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags.", "group": "build", "label": "Build optimized (Clang C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++20", "-t=release", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags.", "group": "build", "label": "Build optimized (Clang C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++23", "-t=release", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags.", "group": "build", "label": "Build optimized (Clang C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++17", "-t=release", "-v" ], "command": "python3", "detail": "Compile active file using g++ with warning and optimization flags.", "group": "build", "label": "Build optimized (GCC C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++20", "-t=release", "-v" ], "command": "python3", "detail": "Compile active file using g++ with warning and optimization flags.", "group": "build", "label": "Build optimized (GCC C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++23", "-t=release", "-v" ], "command": "python3", "detail": "Compile active file using g++ with warning and optimization flags.", "group": "build", "label": "Build optimized (GCC C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++17", "-t=release", "-r", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (Clang C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++20", "-t=release", "-r", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (Clang C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++23", "-t=release", "-r", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (Clang C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++17", "-t=release", "-r", "-v" ], "command": "python3", "detail": "Compile active file using g++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (GCC C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++20", "-t=release", "-r", "-v" ], "command": "python3", "detail": "Compile active file using g++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (GCC C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++23", "-t=release", "-r", "-v" ], "command": "python3", "detail": "Compile active file using g++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (GCC C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "-b", "-v" ], "command": "python3", "detail": "Delete all files in the build folder.", "group": "test", "label": "Clear build folder", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "type": "shell" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-t=release", "-y", "-v" ], "command": "python3", "detail": "Compile active file using all available compilers and all relevant C++ standards with warning and optimization flags.", "group": "test", "label": "Build all optimized (all compilers and standards)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "type": "shell" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-t=release", "-y", "-r", "-v" ], "command": "python3", "detail": "Compile active file using all available compilers and all relevant C++ standards with warning and optimization flags and run the program.", "group": "test", "label": "Build all optimized and run (all compilers and standards)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "type": "shell" } ], "version": "2.0.0" } thread-pool-5.1.0/.vscode-macos/000077500000000000000000000000001512633616700164065ustar00rootroot00000000000000thread-pool-5.1.0/.vscode-macos/c_cpp_properties.json000066400000000000000000000024261512633616700226450ustar00rootroot00000000000000{ "configurations": [ { "compilerPath": "/usr/local/opt/llvm/bin/clang++", "cppStandard": "c++17", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "macos-clang-x64", "name": "Clang C++17" }, { "compilerPath": "/usr/local/opt/llvm/bin/clang++", "cppStandard": "c++20", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "macos-clang-x64", "name": "Clang C++20" }, { "compilerPath": "/usr/local/opt/llvm/bin/clang++", "cppStandard": "c++23", "cStandard": "c23", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "macos-clang-x64", "name": "Clang C++23" } ], "version": 4 } thread-pool-5.1.0/.vscode-macos/launch.json000066400000000000000000000022411512633616700205520ustar00rootroot00000000000000{ "configurations": [ { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (Clang C++17)", "preLaunchTask": "Build for debugging (Clang C++17)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-clang-cpp17", "request": "launch", "type": "lldb" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (Clang C++20)", "preLaunchTask": "Build for debugging (Clang C++20)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-clang-cpp20", "request": "launch", "type": "lldb" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (Clang C++23)", "preLaunchTask": "Build for debugging (Clang C++23)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-clang-cpp23", "request": "launch", "type": "lldb" } ], "version": "0.2.0" } thread-pool-5.1.0/.vscode-macos/settings.json000066400000000000000000000001371512633616700211420ustar00rootroot00000000000000{ "C_Cpp.codeAnalysis.exclude": { "misc/**": true, "temp/**": true } } thread-pool-5.1.0/.vscode-macos/tasks.json000066400000000000000000000231431512633616700204310ustar00rootroot00000000000000{ "tasks": [ { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++17", "-t=debug", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (Clang C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++20", "-t=debug", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (Clang C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++23", "-t=debug", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (Clang C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++17", "-t=release", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags.", "group": "build", "label": "Build optimized (Clang C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++20", "-t=release", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags.", "group": "build", "label": "Build optimized (Clang C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++23", "-t=release", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags.", "group": "build", "label": "Build optimized (Clang C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++17", "-t=release", "-r", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (Clang C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++20", "-t=release", "-r", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (Clang C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++23", "-t=release", "-r", "-v" ], "command": "python3", "detail": "Compile active file using clang++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (Clang C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "-b", "-v" ], "command": "python3", "detail": "Delete all files in the build folder.", "group": "test", "label": "Clear build folder", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "type": "shell" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-t=release", "-y", "-v" ], "command": "python3", "detail": "Compile active file using all available compilers and all relevant C++ standards with warning and optimization flags.", "group": "test", "label": "Build all optimized (all compilers and standards)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "type": "shell" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-t=release", "-y", "-r", "-v" ], "command": "python3", "detail": "Compile active file using all available compilers and all relevant C++ standards with warning and optimization flags and run the program.", "group": "test", "label": "Build all optimized and run (all compilers and standards)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "type": "shell" } ], "version": "2.0.0" } thread-pool-5.1.0/.vscode-windows/000077500000000000000000000000001512633616700167765ustar00rootroot00000000000000thread-pool-5.1.0/.vscode-windows/c_cpp_properties.json000066400000000000000000000117431512633616700232370ustar00rootroot00000000000000{ "configurations": [ { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerArgs": [ "-stdlib=libc++" ], "compilerPath": "C:/msys64/clang64/bin/clang++.exe", "cppStandard": "c++17", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "windows-clang-x64", "name": "Clang C++17" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerArgs": [ "-stdlib=libc++" ], "compilerPath": "C:/msys64/clang64/bin/clang++.exe", "cppStandard": "c++20", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "windows-clang-x64", "name": "Clang C++20" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerArgs": [ "-stdlib=libc++" ], "compilerPath": "C:/msys64/clang64/bin/clang++.exe", "cppStandard": "c++23", "cStandard": "c23", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "windows-clang-x64", "name": "Clang C++23" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerPath": "C:/msys64/ucrt64/bin/g++.exe", "cppStandard": "c++17", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "windows-gcc-x64", "name": "GCC C++17" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerPath": "C:/msys64/ucrt64/bin/g++.exe", "cppStandard": "c++20", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "windows-gcc-x64", "name": "GCC C++20" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerPath": "C:/msys64/ucrt64/bin/g++.exe", "cppStandard": "c++23", "cStandard": "c23", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "windows-gcc-x64", "name": "GCC C++23" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerPath": "C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/bin/Hostx64/x64/cl.exe", "cppStandard": "c++17", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "windows-msvc-x64", "name": "MSVC C++17" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerPath": "C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/bin/Hostx64/x64/cl.exe", "cppStandard": "c++20", "cStandard": "c17", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "windows-msvc-x64", "name": "MSVC C++20" }, { "browse": { "limitSymbolsToIncludedHeaders": true }, "compilerPath": "C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/bin/Hostx64/x64/cl.exe", "cppStandard": "c++23", "cStandard": "c23", "defines": [ "BS_THREAD_POOL_NATIVE_EXTENSIONS" ], "includePath": [ "${workspaceFolder}/include" ], "intelliSenseMode": "windows-msvc-x64", "name": "MSVC C++23" } ], "version": 4 } thread-pool-5.1.0/.vscode-windows/launch.json000066400000000000000000000067571512633616700211620ustar00rootroot00000000000000{ "configurations": [ { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (Clang C++17)", "preLaunchTask": "Build for debugging (Clang C++17)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-clang-cpp17", "request": "launch", "type": "lldb" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (Clang C++20)", "preLaunchTask": "Build for debugging (Clang C++20)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-clang-cpp20", "request": "launch", "type": "lldb" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (Clang C++23)", "preLaunchTask": "Build for debugging (Clang C++23)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-clang-cpp23", "request": "launch", "type": "lldb" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (GCC C++17)", "preLaunchTask": "Build for debugging (GCC C++17)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-gcc-cpp17", "request": "launch", "type": "cppdbg" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (GCC C++20)", "preLaunchTask": "Build for debugging (GCC C++20)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-gcc-cpp20", "request": "launch", "type": "cppdbg" }, { "args": [], "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (GCC C++23)", "preLaunchTask": "Build for debugging (GCC C++23)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-gcc-cpp23", "request": "launch", "type": "cppdbg" }, { "args": [], "console": "integratedTerminal", "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (MSVC C++17)", "preLaunchTask": "Build for debugging (MSVC C++17)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-msvc-cpp17", "request": "launch", "type": "cppvsdbg" }, { "args": [], "console": "integratedTerminal", "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (MSVC C++20)", "preLaunchTask": "Build for debugging (MSVC C++20)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-msvc-cpp20", "request": "launch", "type": "cppvsdbg" }, { "args": [], "console": "integratedTerminal", "cwd": "${workspaceFolder}${/}build", "name": "Build and debug (MSVC C++23)", "preLaunchTask": "Build for debugging (MSVC C++23)", "program": "${workspaceFolder}${/}build${/}${fileBasenameNoExtension}_debug-msvc-cpp23", "request": "launch", "type": "cppvsdbg" } ], "version": "0.2.0" } thread-pool-5.1.0/.vscode-windows/settings.json000066400000000000000000000003171512633616700215320ustar00rootroot00000000000000{ "C_Cpp.codeAnalysis.clangTidy.args": [ "--extra-arg-before=--target=x86_64-pc-windows-msvc", ], "C_Cpp.codeAnalysis.exclude": { "misc/**": true, "temp/**": true } } thread-pool-5.1.0/.vscode-windows/tasks.json000066400000000000000000000600321512633616700210170ustar00rootroot00000000000000{ "tasks": [ { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++17", "-t=debug", "-v" ], "command": "python", "detail": "Compile active file using clang++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (Clang C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++20", "-t=debug", "-v" ], "command": "python", "detail": "Compile active file using clang++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (Clang C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++23", "-t=debug", "-v" ], "command": "python", "detail": "Compile active file using clang++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (Clang C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++17", "-t=debug", "-v" ], "command": "python", "detail": "Compile active file using g++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (GCC C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++20", "-t=debug", "-v" ], "command": "python", "detail": "Compile active file using g++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (GCC C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++23", "-t=debug", "-v" ], "command": "python", "detail": "Compile active file using g++ with warning and debugging flags.", "group": "build", "label": "Build for debugging (GCC C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=cl", "-s=c++17", "-t=debug", "-v" ], "command": "python", "detail": "Compile active file using cl with warning and debugging flags.", "group": "build", "label": "Build for debugging (MSVC C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$msCompile" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=cl", "-s=c++20", "-t=debug", "-v" ], "command": "python", "detail": "Compile active file using cl with warning and debugging flags.", "group": "build", "label": "Build for debugging (MSVC C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$msCompile" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=cl", "-s=c++23", "-t=debug", "-v" ], "command": "python", "detail": "Compile active file using cl with warning and debugging flags.", "group": "build", "label": "Build for debugging (MSVC C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$msCompile" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++17", "-t=release", "-v" ], "command": "python", "detail": "Compile active file using clang++ with warning and optimization flags.", "group": "build", "label": "Build optimized (Clang C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++20", "-t=release", "-v" ], "command": "python", "detail": "Compile active file using clang++ with warning and optimization flags.", "group": "build", "label": "Build optimized (Clang C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++23", "-t=release", "-v" ], "command": "python", "detail": "Compile active file using clang++ with warning and optimization flags.", "group": "build", "label": "Build optimized (Clang C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++17", "-t=release", "-v" ], "command": "python", "detail": "Compile active file using g++ with warning and optimization flags.", "group": "build", "label": "Build optimized (GCC C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++20", "-t=release", "-v" ], "command": "python", "detail": "Compile active file using g++ with warning and optimization flags.", "group": "build", "label": "Build optimized (GCC C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++23", "-t=release", "-v" ], "command": "python", "detail": "Compile active file using g++ with warning and optimization flags.", "group": "build", "label": "Build optimized (GCC C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$gcc" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=cl", "-s=c++17", "-t=release", "-v" ], "command": "python", "detail": "Compile active file using cl with warning and optimization flags.", "group": "build", "label": "Build optimized (MSVC C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$msCompile" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=cl", "-s=c++20", "-t=release", "-v" ], "command": "python", "detail": "Compile active file using cl with warning and optimization flags.", "group": "build", "label": "Build optimized (MSVC C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$msCompile" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=cl", "-s=c++23", "-t=release", "-v" ], "command": "python", "detail": "Compile active file using cl with warning and optimization flags.", "group": "build", "label": "Build optimized (MSVC C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "problemMatcher": [ "$msCompile" ], "type": "cppbuild" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++17", "-t=release", "-r", "-v" ], "command": "python", "detail": "Compile active file using clang++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (Clang C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++20", "-t=release", "-r", "-v" ], "command": "python", "detail": "Compile active file using clang++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (Clang C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=clang++", "-s=c++23", "-t=release", "-r", "-v" ], "command": "python", "detail": "Compile active file using clang++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (Clang C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++17", "-t=release", "-r", "-v" ], "command": "python", "detail": "Compile active file using g++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (GCC C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++20", "-t=release", "-r", "-v" ], "command": "python", "detail": "Compile active file using g++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (GCC C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=g++", "-s=c++23", "-t=release", "-r", "-v" ], "command": "python", "detail": "Compile active file using g++ with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (GCC C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=cl", "-s=c++17", "-t=release", "-r", "-v" ], "command": "python", "detail": "Compile active file using cl with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (MSVC C++17)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=cl", "-s=c++20", "-t=release", "-r", "-v" ], "command": "python", "detail": "Compile active file using cl with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (MSVC C++20)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "${file}", "-c=cl", "-s=c++23", "-t=release", "-r", "-v" ], "command": "python", "detail": "Compile active file using cl with warning and optimization flags and run the program.", "group": "test", "label": "Build optimized and run (MSVC C++23)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true } }, { "args": [ "scripts/compile_cpp.py", "-b", "-v" ], "command": "python", "detail": "Delete all files in the build folder.", "group": "test", "label": "Clear build folder", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "type": "shell" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-t=release", "-y", "-v" ], "command": "python", "detail": "Compile active file using all available compilers and all relevant C++ standards with warning and optimization flags.", "group": "test", "label": "Build all optimized (all compilers and standards)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "type": "shell" }, { "args": [ "scripts/compile_cpp.py", "${file}", "-t=release", "-y", "-r", "-v" ], "command": "python", "detail": "Compile active file using all available compilers and all relevant C++ standards with warning and optimization flags and run the program.", "group": "test", "label": "Build all optimized and run (all compilers and standards)", "presentation": { "clear": false, "echo": true, "focus": false, "panel": "shared", "reveal": "always", "revealProblems": "onProblem", "showReuseMessage": true }, "type": "shell" } ], "version": "2.0.0" } thread-pool-5.1.0/CHANGELOG.md000066400000000000000000003223621512633616700155660ustar00rootroot00000000000000# `BS::thread_pool`: a fast, lightweight, modern, and easy-to-use C++17 / C++20 / C++23 thread pool library By **Barak Shoshany**\ Email: \ Website: \ GitHub: * [Version history](#version-history) * [v5.1.0 (2026-01-03)](#v510-2026-01-03) * [v5.0.0 (2024-12-19)](#v500-2024-12-19) * [v4.1.0 (2024-03-22)](#v410-2024-03-22) * [v4.0.1 (2023-12-28)](#v401-2023-12-28) * [v4.0.0 (2023-12-27)](#v400-2023-12-27) * [v3.5.0 (2023-05-25)](#v350-2023-05-25) * [v3.4.0 (2023-05-12)](#v340-2023-05-12) * [v3.3.0 (2022-08-03)](#v330-2022-08-03) * [v3.2.0 (2022-07-28)](#v320-2022-07-28) * [v3.1.0 (2022-07-13)](#v310-2022-07-13) * [v3.0.0 (2022-05-30)](#v300-2022-05-30) * [v2.0.0 (2021-08-14)](#v200-2021-08-14) * [v1.9 (2021-07-29)](#v19-2021-07-29) * [v1.8 (2021-07-28)](#v18-2021-07-28) * [v1.7 (2021-06-02)](#v17-2021-06-02) * [v1.6 (2021-05-26)](#v16-2021-05-26) * [v1.5 (2021-05-07)](#v15-2021-05-07) * [v1.4 (2021-05-05)](#v14-2021-05-05) * [v1.3 (2021-05-03)](#v13-2021-05-03) * [v1.2 (2021-04-29)](#v12-2021-04-29) * [v1.1 (2021-04-24)](#v11-2021-04-24) * [v1.0 (2021-01-15)](#v10-2021-01-15) ## Version history ### v5.1.0 (2026-01-03) * New/changed features: * Added `detach_bulk()` and `submit_bulk()` member functions to submit tasks in bulk. You can pass either a range of iterators or a container. The mutex protecting the task queue is locked only once for the entire bulk submission, which should improve performance when submitting a large number of tasks; if the tasks were submitted using individual calls to `detach_task()` or `submit_task()` instead, the mutex would need to be locked and unlocked for each task. * `detach_blocks()`/`detach_loop()`/`detach_sequence()` and `submit_blocks()`/`submit_loop()`/`submit_sequence()` now use `detach_bulk()` and `submit_bulk()`, respectively, under the hood for increased performance (the API remains the same). They have also been refactored using helper functions and custom function object classes to reduce code duplication. * All `submit_*` member functions now use a C++17/20 polyfill for `std::move_only_function` when C++23 is not available (or when using libc++, which at the time of this release has not implemented it yet). This allows them to work without using `std::shared_ptr`, which should increase performance. * If the native extensions are enabled, a pool created with the default constructor will now only use the number of threads available to the process, as obtained from `BS::get_os_process_affinity()`, which can be less than the number of hardware threads. See [#161](https://github.com/bshoshany/thread-pool/issues/161). * Since importing the C++ Standard Library using `import std` is now supported by all 3 major compilers, the library (and test program) will now use `import std` whenever the macro `BS_THREAD_POOL_IMPORT_STD` is defined, as long as C++23 is enabled, without performing any additional checks for compiler or standard library support (aside from the workaround for GCC mentioned below). * The `BS::tp` enumeration is now properly defined as an `enum class` with the appropriate bitwise operators. * Removed the polyfills for `std::counting_semaphore` and `std::binary_semaphore` from `BS_thread_pool.hpp`, as they are not used by the library itself, to reduce the size of the header file. If you need them, you can copy them from the test program `BS_thread_pool_test.cpp`. * On Windows, if the native extensions are enabled, `WIN32_LEAN_AND_MEAN` is now defined before including `` to reduce compilation time. * Bug fixes: * The system macros `major` and `minor` (from `` on Linux) or `min` and `max` (from `` on Windows) are now automatically undefined if they are detected, to prevent compilation errors. This was also done previously, but only under certain conditions; now it is done unconditionally, as some users reported issues. On Windows, `NOMINMAX` is now defined before including ``, but the `min` and `max` macros are still undefined independently. * `BS::wait_deadlock` is now only exported by the module if exceptions are enabled, preventing a compiler error. See [#160](https://github.com/bshoshany/thread-pool/pull/160). * Fixed a typo in `BS::thread_pool::submit_sequence()` which caused the wrong number of futures to be reserved, potentially resulting in unnecessary reallocations. * `BS::multi_future::wait_until` now correctly waits using the specific clock type passed in the template parameter. * Fixed a bug where `reset()` failed to notify worker threads if the pool was unpaused before resetting. * `get_os_thread_affinity()` and `set_os_thread_affinity()` now return `std::nullopt` and `false` (respectively) on Android, as the API is not supported. See [#163](https://github.com/bshoshany/thread-pool/pull/163). * Tests: * The test program `BS_thread_pool_test.cpp` now prints colored output for better readability. This can be disabled by setting the `NO_COLOR` environment variable. * The test program now prints out the Mandelbrot set it generates, downsampled to fit in a terminal window (at 120 character width). This will be in 24-bit color in the terminal, and in monochrome using Unicode blocks in the log file. (If `NO_COLOR` is set, the terminal output will also be in monochrome.) * The test program now looks for `default_args.txt` in both the current folder and the parent folder when reading default command line arguments. * Documentation: * Added an example in `README.md` for getting and setting process affinity. * Removed the suggestion in `README.md` to use the `-pthread` flag on Linux/macOS, as it does not seem to be necessary in order to use the library, at least on the systems I tested with. If you are using a system that requires it, then you probably already know about it. * Updated the instructions in `README.md` for compiling with GCC using `import BS.thread_pool`, and added instructions on how to compile the `std` module with GNU libstdc++. * Fixed many typos and inconsistencies in `README.md`. * Known issues: * At the time of this release, there is a bug in Clang with libc++ where using `std::jthread` in a C++20 module causes a compilation error. As a workaround, until the bug is fixed, the thread pool library automatically falls back to `std::thread` if it detects that Clang and libc++ are being used together with C++20 modules. This workaround can be disabled by defining `BS_THREAD_POOL_DISABLE_WORKAROUNDS` when compiling the module. * At the time of this release, there is a bug when using GCC with libstdc++ on Windows via MSYS2 where the `BS.thread_pool` module doesn't compile if both native extensions and `import std` are enabled. As a workaround, until the bug is fixed, the thread pool library automatically falls back to header files if it detects that GCC and libstdc++ are being used together with the C++23 `std` module on Windows. This workaround can be disabled by defining `BS_THREAD_POOL_DISABLE_WORKAROUNDS` when compiling the module. * Development: * The Python script I use for compiling the test program, `compile_cpp.py`, has received numerous improvements: * The script now supports `import std` with GCC in addition to Clang and MSVC. * The script now only recompiles the program if the source file(s), module(s), and optional additional dependencies (added using the flag `-n`/`--deps` or the `deps` field in `compile_cpp.yaml`) have changed since the binary was created. Use `-e`/`--force` to force recompilation. * The script now allows disabling exceptions by either defining `disable_exceptions: true` in `compile_cpp.yaml` or using the flag `-x=true`/`--disable-exceptions=true` (the flag overrides the YAML file). * The script can now compile modules independently using the `-l`/`--as-module` flag. * The script can now clear the output folder using the `-b`/`--clear-output` flag (replaces the `clear_folder.py` script from the previous release). * The script can now test compilation using all possible combinations of compilers and C++ standards available in the system using the `-y`/`--try-all` flag (replaces the `test_all.py` script from the previous release). * The script now automatically detects the Visual Studio installation path on Windows. * The script now implements a more robust mechanism for finding the `std` module. * The script now prints colored output for better readability. This can be disabled by setting the `NO_COLOR` environment variable. * The script no longer disables optimizations when compiling with GCC and modules (since that bug is fixed in recent versions of GCC). * Added a VS Code task to compile the test program (or the active file) with all available compilers and all relevant C++ standards, but without running it, to quickly check for compiler compatibility issues. ### v5.0.0 (2024-12-19) * A major new release with many new features, improvements, bug fixes, and performance optimizations! Please note that code written using previous releases may need to be modified to work with the new release. The changes needed to migrate to the new API are explicitly indicated below for your convenience. * **Highlights:** * Added support for C++20 and C++23, while maintaining full C++17 compatibility. In C++20, the library can now optionally be imported as a module using `import BS.thread_pool` on Clang, GCC, and MSVC. In C++23, both the library itself and the test program can now optionally import the C++ Standard Library as a module using `import std` on supported compilers and platforms. Extensive documentation has been added to `README.md` on how to use these features, to ease the transition. * Optional features are now enabled via a bitmask template parameter instead of macros, using the flags `BS::tp::priority`, `BS::tp::pause`, and `BS::tp::wait_deadlock_checks`. This makes the optional features easier to use, allows multiple thread pools with different features to coexist, and makes the library compatible with C++20 modules. Exception handling is now disabled automatically if exceptions are disabled, instead of using a macro. * Added optional native extensions for non-portable features using the operating system's native API: setting the priority and affinity for processes and threads, and setting thread names. These have been tested on the latest versions of Windows, Ubuntu, and macOS. * This library is now back to being a true single-header library, with a single header file `BS_thread_pool.hpp`. The utility classes have been combined into the main header file. `BS::timer` has been removed, `BS::signaller` has been replaced with `BS::binary_semaphore` and `BS::counting_semaphore` (in C++17 mode only), and `BS::synced_stream` now supports multiple output streams. * Cleanup functions can now be defined to complement the initialization functions. Both initialization and cleanup functions can now optionally take the index of the thread as an argument. * Parallelization member functions no longer need type casting or template parameters if the start and end indices are of different types. * The worker function no longer incorrectly reads shared variables while the mutex is unlocked. * The type aliases `BS::this_thread::optional_index` and `BS::this_thread::optional_pool` have been removed. Instead, `BS::this_thread::get_index()` returns `std::optional`, and `BS::this_thread::get_pool()` returns `std::optional`. The latter must be cast to the correct instantiation of the `BS::thread_pool` class template before using any member functions. * The thread pool version is now accessible using the object `BS::thread_pool_version`, a `constexpr struct` of type `BS::version` with the members `major`, `minor`, and `patch`. This works even if importing the library as a C++20 module, unlike the version macros. * The type `priority_t`, used to set priorities, is now defined as `std::int8_t`, which means it takes values from -128 to +127. The pre-defined priorities in `BS::pr`, such as `BS::pr::highest` or `BS::pr::lowest`, have been updated accordingly. * Exceptions thrown by detached tasks are now caught and prevented from propagating, so that they do not terminate the program. Exceptions thrown by submitted tasks are still rethrown when calling `get()` on the future, as before. * Parallelization member functions no longer destruct objects prematurely under certain circumstances. * The test program has been expanded with many new tests for both old and new features. It can also import both the thread pool module using `import BS.thread_pool` (in C++20 and later) and the C++ Standard Library module using `import std` (in C++23) if the appropriate macros are defined, and read default command line arguments from a `default_args.txt` file for debugging purposes. * Added new and improved benchmarks using a highly-optimized multithreaded algorithm which generates a plot of the Mandelbrot set, utilizing a normalized iteration count algorithm and linear interpolation to create smooth coloring. * The type `BS::concurrency_t` has been removed; use `std::size_t` instead. * **C++20 and C++23 support:** * This library now officially supports C++20 and C++23 in addition to C++17. If compiled with C++20 and/or C++23 support (e.g. using the compiler flag `-std=c++23` in Clang/GCC or `/std:c++latest` on MSVC), the library will make use of newly available features for maximum performance, reliability, and usability. * To be clear, the library is still fully compatible with any C++17 standard-compliant compiler. I have no plans to remove C++17 support at the moment, as it is still [the most widely used C++ standard](https://www.jetbrains.com/lp/devecosystem-2023/cpp/) among developers, but that might change in the future. * If C++20 features are available, the library can be imported as a module using `import BS.thread_pool`. This is now the officially recommended way to use the library, as it has many benefits, such as faster compilation times, better encapsulation, no namespace pollution, no include order issues, easier maintainability, simpler dependency management, and more. * The module file itself is `BS.thread_pool.cppm`, located in the `modules` folder, and it is just a thin wrapper around the header file `BS_thread_pool.hpp`. * The `constexpr` flag `BS::thread_pool_module` indicates whether the thread pool library was compiled as a module. * To my knowledge, `BS::thread_pool` is one of the only popular C++ libraries that are [currently available as a C++20 module](https://arewemodulesyet.org/) (and certainly the only thread pool library). This feature has been tested with the latest versions of Clang, GCC, and MSVC. Unfortunately, C++20 modules are still (4 years later!) not fully implemented in all compilers, and each compiler implements them differently; for instructions on how to compile and import the `BS.thread_pool` module in each compiler, please see `README.md`. * Known issues: * GCC v14.2.0 (latest version at the time of writing) appears to have an internal compiler error when compiling programs containing modules (or at least, this particular module) with any optimization flags other than `-Og` enabled. Until this is fixed, if you wish to use compiler optimizations, please either include the library as a header file or use a different compiler. * On macOS, Apple Clang v16.0.0 (latest version at the time of writing) does not support C++20 modules. Please either install the latest version of LLVM Clang using [Homebrew](https://formulae.brew.sh/formula/llvm), or include the library as a header file. * Visual Studio Code's C/C++ extension v1.23.2 (latest version at the time of writing) does not yet support modules. My temporary solution for that, as demonstrated in the test program, is to define the macro `BS_THREAD_POOL_TEST_IMPORT_MODULE` (see below) when compiling the test program, but not when editing in the IDE. If the macro is enabled, the module is imported via `import BS.thread_pool`, otherwise the header file is included using `#include "BS_thread_pool.hpp"` as usual. * If C++23 features are available, both the library and the test program can now import the C++ Standard Library as a module using `import std`. To enable this, define the macro `BS_THREAD_POOL_IMPORT_STD` at compilation time. This is currently only officially supported by recent versions of MSVC with Microsoft STL or LLVM Clang (**not** Apple Clang) with LLVM libc++. It is not supported by GCC with any standard library, Clang with any standard library other than libc++, any compiler with GNU libstdc++, or any other compiler. * If `BS_THREAD_POOL_IMPORT_STD` is defined, then you must also import the library itself as a module. If the library is included as a header file, this will force the program that included the header file to also import `std`, which is not desirable and can lead to compilation errors if the program `#include`s any Standard Library header files. * Defining the macro before importing the module will not work, as modules cannot access macros defined in the program that imported them. Instead, define the macro as a compiler flag, e.g. `-D BS_THREAD_POOL_IMPORT_STD` (or `/D` for MSVC). * The `constexpr` flag `BS::thread_pool_import_std` indicates whether the thread pool library was compiled with `import std`. Note that the flag will be `false` if `BS_THREAD_POOL_IMPORT_STD` is defined but the compiler or standard library does not support importing the C++ Standard Library as a module. * If C++20 features are available, the pool will use `std::jthread` instead of `std::thread`. This allows considerable simplification and added safety, since the threads no longer need to be manually joined, and `std::stop_token` is used to stop the workers automatically when destructing the threads. This eliminates the need for the `destroy_threads()` member function, as well as the `workers_running` flag, which are now only used in C++17 mode. * If C++20 features are available, the library will use concepts to enforce the signature of the initialization function and to selectively enable member functions related to pausing only if pausing is enabled. In C++17 mode, the library will use SFINAE to achieve essentially the same effect. * If C++23 features are available, the task queue will use `std::move_only_function` instead of `std::function`. This allows `submit_task()` to work without using a shared pointer, which should increase performance. * **API migration:** All of the C++20/C++23 features listed above are either automatically applied based on compiler settings or optional. If you are still using C++17, or if you are using C++20 or C++23 but do not wish to import the thread pool library and/or the C++ Standard Library as a module, no changes are needed. * **Optional features overhaul:** * All optional features are now enabled via a bitmask template parameter instead of macros. This works using `if constexpr`, `std::conditional_t`, and concepts (in C++20 and later) or SFINAE (in C++17). * This change makes the optional features much easier and more intuitive to use, as you no longer need to define any macros before including the header file. * Additionally, it allows you to have multiple thread pools in the same program with different optional features enabled or disabled. For example, you can have one pool with task priority enabled and another without. * Most importantly, this makes it possible to import the library as a C++20 module, as macros cannot be read by imported modules. * The bitmask flags are members of the `BS::tp` enumeration: * `BS::tp::priority` enables task priority (previously enabled via the macro `BS_THREAD_POOL_ENABLE_PRIORITY`, which has been removed). * `BS::tp::pause` enables pausing the pool (previously enabled via the macro `BS_THREAD_POOL_ENABLE_PAUSE`, which has been removed). * `BS::tp::wait_deadlock_checks` enables deadlock checks in `wait()`/`wait_for()`/`wait_until()` (previously enabled via the macro `BS_THREAD_POOL_ENABLE_WAIT_DEADLOCK_CHECK`, which has been removed). * The default is `BS::tp::none`, which disables all optional features. * Convenience aliases are defined as follows: * `BS::light_thread_pool` disables all optional features (equivalent to `BS::thread_pool` with the default template parameter, that is, `BS::thread_pool`). * `BS::priority_thread_pool` enables task priority (equivalent to `BS::thread_pool`). * `BS::pause_thread_pool` enables pausing the pool (equivalent to `BS::thread_pool`). * `BS::wdc_thread_pool` enables wait deadlock checks (equivalent to `BS::thread_pool`). * There are no aliases with multiple features enabled; if this is desired, you must either pass the template parameter explicitly or define your own alias. Note that the parameter is a bitmask, so to enable multiple features, you need to use the bitwise OR operator `|`, e.g. `BS::thread_pool` to enable both task priority and pausing. * The macro `BS_THREAD_POOL_DISABLE_EXCEPTION_HANDLING` has been removed. Exception handling is disabled automatically if exceptions are disabled, based on whether the feature-test macro `__cpp_exceptions` is defined. * The exception thrown by wait deadlock checks is now `BS::wait_deadlock` instead of `BS::thread_pool::wait_deadlock`, to avoid having to deal with different template parameters. * The macro `BS_THREAD_POOL_LIGHT_TEST` has been removed from the test program, as all optional features are now tested by enabling them selectively via the template parameter, so there is no need to compile with different macros. * If for some reason you forgot which options you enabled when creating the pool, the `static constexpr` members `priority_enabled`, `pause_enabled`, and `wait_deadlock_checks_enabled` can be used to check if the corresponding features are enabled. * **API migration:** * `BS::thread_pool` can still be used without the template parameter, for backwards compatibility; this will create a thread pool with all optional features disabled. Therefore, if you did not use any of the optional features in existing code, no changes are needed. * If your code uses any of the optional features by defining macros before including the header file, please remove these macros, and instead either use one of the convenience aliases above or define the template parameter explicitly using the `BS::tp` enumeration when creating the pool. * If you use wait deadlock checks, you must now catch the exception `BS::wait_deadlock` instead of `BS::thread_pool::wait_deadlock`. * **Native extensions:** * While portability is one of my guiding principle when developing this library, non-portable features such as setting the thread priority using the operating system's native API are frequently requested by users. Starting with this release, the library includes native extensions, which are disabled by default. * Currently, the extensions provide the following functions (please see `README.md` for details on how to use them): * `BS::get_os_process_affinity()` and `BS::set_os_process_affinity()` to get and set the CPU affinity of the current process in a portable way. Should work on Windows and Linux, but not on macOS, as the native API does not allow it. * `BS::get_os_process_priority()` and `BS::set_os_process_priority()` to get and set the priority of the current process in a portable way. Should work on Windows, Linux, and macOS. * `BS::this_thread::get_os_thread_affinity()` and `BS::this_thread::set_os_thread_affinity()` to get and set the CPU affinity of the current thread in a portable way. Should work on Windows and Linux, but not on macOS, as the native API does not allow it. * `BS::this_thread::get_os_thread_priority()` and `BS::this_thread::set_os_thread_priority()` to get and set the priority of the current thread in a portable way. Should work on Windows, Linux, and macOS. * `BS::this_thread::get_os_thread_name()` and `BS::this_thread::set_os_thread_name()` to get and set the name of the current thread in a portable way, for debugging purposes. Should work on Windows, Linux, and macOS. * The native extensions may be enabled by defining the macro `BS_THREAD_POOL_NATIVE_EXTENSIONS` at compilation time. * Even if the macro is defined, the extensions are disabled automatically if a supported operating system (Windows, Linux, or macOS) is not detected. * Note that if you are using the library as a C++20 module, defining the macro before importing the module will not work, as modules cannot access macros defined in the program that imported them. Instead, define the macro as a compiler flag, e.g. `-D BS_THREAD_POOL_NATIVE_EXTENSIONS` (or `/D` for MSVC). * The macro `BS_THREAD_POOL_ENABLE_NATIVE_HANDLES` has been removed. The thread pool member function `get_native_handles()` is now part of the native extensions, so it is enabled using the macro `BS_THREAD_POOL_NATIVE_EXTENSIONS`. * Please note that the native extensions have only been tested on Windows 11 23H2, Ubuntu 24.10, and macOS 15.1. They have not been tested on older versions of these operating systems, other Linux distributions, or any other operating systems, and are therefore not guaranteed to work on every system. If you encounter any issues, please report them on the GitHub repository. * The test program only tests the native extensions if the macro `BS_THREAD_POOL_NATIVE_EXTENSIONS` is defined at compilation time. If importing the library as a module, please ensure that the macro is also enabled when compiling the module. * The `constexpr` flag `BS::thread_pool_native_extensions` indicates whether the thread pool library was compiled with native extensions enabled. Note that the flag will be `false` if `BS_THREAD_POOL_NATIVE_EXTENSIONS` is defined but the operating system is unsupported. * **API migration:** The native extensions are a brand new optional feature and do not require any changes to existing code. * **Utility classes:** * This library is now back to being a true single-header library, with a single header file `BS_thread_pool.hpp`. The utility classes (previously in a separate header `BS_thread_pool_utils.hpp`, which has been removed) have been combined into the main header file. * The `BS::timer` class has been removed from the library, since it doesn't really have anything to do with multithreading directly. However, it is still available in the test program if you want to use it. * The `BS::signaller` class has been removed from the library, and replaced with `BS::binary_semaphore` and `BS::counting_semaphore`, which are C++17 polyfills for the C++20 classes `std::binary_semaphore` and `std::counting_semaphore`. If C++20 features are available, the polyfills are not used, and instead are just aliases for the standard library classes. The reason is that semaphores can do the same thing that the signaller class was previously used for, but are much more versatile. * The `BS::synced_stream` class now supports printing to more than one output stream. * **API migration:** * If you previously included the `BS_thread_pool_utils.hpp` header file, this is no longer needed. Only include the header `BS_thread_pool.hpp`, or better yet, in C++20 or later, import the library as a module using `import BS.thread_pool`. * If you previously used the `BS::timer` class, it is no longer available in the header file, but if you still need it you can copy it into your program directly from the test program `BS_thread_pool_test.cpp`. * If you previously used the `BS::signaller` class, you can replace it with `BS::binary_semaphore` or `BS::counting_semaphore`. Previously, you defined an object `BS::signaller signal`, and then used `signal.wait()` to wait for the signal, and `signal.ready()` to unblock all waiting threads. Now, you can define an object `BS::counting_semaphore signal(0)`, and use `signal.acquire()` to wait for the signal, and `signal.release(num_threads)` to unblock waiting threads; note that the number of threads to release must be passed explicitly, as the semaphore also allows you to unblock only some of them. Use `BS::binary_semaphore` if only one thread will be waiting at any given time. * If you previously used the `BS::synced_stream` class, no changes are needed. * **Cleanup and initialization functions:** * Using the new `set_cleanup_func()` member function, it is now possible to provide the pool with a cleanup function to run in each thread right before it is destroyed, which will happen when the pool is destructed or reset. See [#152](https://github.com/bshoshany/thread-pool/issues/152). * Both initialization and cleanup functions can now optionally take the index of the thread as an argument. * Added a warning in the documentation that both initialization and cleanup functions must not throw any exceptions, as that will result in program termination. Any exceptions must be handled explicitly within the function. * **API migration:** No changes to existing code are needed. * **Parallelization index types:** * All member functions which parallelize collections of tasks, namely `detach_blocks()`, `detach_loop()`, `detach_sequence()`, `submit_blocks()`, `submit_loop()`, and `submit_sequence()`, can now be called with start and end indices of different types. * Previously, the indices had to be of the same type, or the template parameter had to be explicitly specified; this is no longer needed, as the library will automatically cast the indices to a suitable common type. * This was already possible in v2.X.X and v3.X.X, where it was done using [`std::common_type`](https://en.cppreference.com/w/cpp/types/common_type), but I removed it in v4.X.X because `std::common_type` sometimes completely messed up the range of the loop. For example, the `std::common_type` of `int` and `unsigned int` is `unsigned int`, which means the loop will only use non-negative indices even if the `int` start index was negative, resulting in an integer overflow. * Starting with v5.0.0, the library uses a custom type trait `BS::common_index_type` to determine the common type of the indices. The common type of two signed integers or two unsigned integers is the larger of the integers, while the common type of a signed and an unsigned integer is a signed integer that can hold the full ranges of both integers. This avoids messing up the indices, except in the case of `std::uint64_t`, where there is no fundamental signed type that can hold its entire range. In this case, we choose `std::uint64_t` as the common type, since the most common use case is where the indices go from 0 to `x` where `x` has been previously defined as `std::size_t`. This will fail if the first index is negative; in that case, the user must cast the indices explicitly. * **API migration:** Existing code which uses type casting or explicit template parameters in parallelization functions does not need to be changed, but it can be simplified by removing the casting or template parameters. However, if one index is negative and the other is an unsigned 64-bit integer, casting is still needed (although you should probably not be doing this in the first place, as casting to either of the two types will result in potential narrowing or overflow). * **`BS::this_thread`:** * `BS::this_thread` is now a class instead of a namespace, since defining it as a namespace proved to be incompatible with C++20 modules (at least in some compilers). Defining it as a class also results in a simpler implementation. However, the functionality remains the same, and since it only has static methods, the call syntax for `BS::this_thread::get_index()` and `BS::this_thread::get_pool()` is unchanged. * The type aliases `BS::this_thread::optional_index` and `BS::this_thread::optional_pool` have been removed. Instead, `BS::this_thread::get_index()` now returns the explicit type `std::optional`, and `BS::this_thread::get_pool()` returns `std::optional`. * The rationale for this removal is that using `std::optional` explicitly provides more information about the type that is being returned, and most users are probably not using the explicit types anyway (either by using `auto` or by invoking the `std::optional` member functions directly on the returned object). * Note that `BS::this_thread::get_pool()` now returns an optional `void*` instead of `BS::thread_pool*`. The reason for that is that `BS::thread_pool` is now a template. Once you obtain the pool pointer, you must cast it to the desired instantiation of the template if you want to use any member functions. Note that you have to cast it to the correct type; if you cast a pointer to a `BS::light_thread_pool` into a pointer to a `BS::priority_thread_pool`, for example, your program will have undefined behavior. * **API migration:** * If your code uses the type aliases, please replace `BS::this_thread::optional_index` with `std::optional` and `BS::this_thread::optional_pool` with `std::optional`. * If your code uses `BS::this_thread::get_pool()`, you must now cast the returned pointer to the correct instantiation of the `BS::thread_pool` class template before using any member functions. * **Determining the library version:** * The library now defines the `constexpr` object `BS::thread_pool_version`, which can be used to check the version of the library at compilation time. This object is of type `BS::version`, with members `major`, `minor`, and `patch`, and all comparison operators defined as `constexpr`. It also has a `to_string()` member function and an `operator<<` overload for easy printing at runtime. For example, you can do `static_assert(BS::thread_pool_version == BS::version(5, 0, 0))`, or you can use it in `if constexpr` for conditional compilation. * The version macros `BS_THREAD_POOL_VERSION_MAJOR`, `BS_THREAD_POOL_VERSION_MINOR`, and `BS_THREAD_POOL_VERSION_PATCH` are still defined, since they can be used in conditional code inclusion, and for backwards compatibility. However, since C++20 modules cannot export macros, `BS::thread_pool_version` is the only way to check the version of the thread pool library if you are importing it as a module. * **API migration:** No changes needed in existing code; if you previously used the macros `BS_THREAD_POOL_VERSION_MAJOR`, `BS_THREAD_POOL_VERSION_MINOR`, and `BS_THREAD_POOL_VERSION_PATCH` to determine the version of the library when including it as a header file, you can still do so. However, if you wish to import the library as a C++20 module, you must use the object `BS::thread_pool_version` instead. * **Task priority:** * The type `priority_t`, used to set priorities, is now defined as `std::int8_t`, which means it takes values from -128 to +127. The pre-defined priorities in `BS::pr`, such as `BS::pr::highest` or `BS::pr::lowest`, have been updated accordingly (also, it is now an `enum` instead of a namespace). The old priority type `std::int16_t` was unnecessarily large; having fewer priority values means less bookkeeping in the priority queue, which should also improve performance. * **API migration:** If you used the pre-defined priorities in `BS::pr`, no changes are needed. If you specified numerical priorities directly, you may need to adjust them to the new range of -128 to +127. * **Miscellaneous:** * Exceptions thrown by detached tasks are now caught and prevented from propagating, so that they do not terminate the program. Exceptions thrown by submitted tasks are still rethrown when calling `get()` on the future, as before. * All member functions which parallelize collections of tasks, namely `detach_blocks()`, `detach_loop()`, `detach_sequence()`, `submit_blocks()`, `submit_loop()`, and `submit_sequence()`, now store the callable object inside an `std::shared_ptr`, and then pass that shared pointer to each subtask. Previously, the callable was passed using perfect forwarding, which under some circumstances resulted in mistakenly moving the callable during the first iteration of the loop, thus potentially destructing captured objects prematurely. The new shared pointer method resolves this issue, while also avoiding making copies of the callable. See [#149](https://github.com/bshoshany/thread-pool/issues/149). * Fixed incorrect reading of shared variables while the mutex is unlocked in the worker function. See [#159](https://github.com/bshoshany/thread-pool/issues/159). * Added documentation to `README.md` for all the new features. In addition, fixed some typos and other minor issues in the existing documentation. * Added instructions in `README.md` for installing the library using CMake with `FetchContent` instead of CPM. See [#155](https://github.com/bshoshany/thread-pool/pull/155). * The type `BS::concurrency_t` has been removed. In previous versions this type was defined to be the type of the value returned by `std::thread::hardware_concurrency()` (which is supposed to be `unsigned int`), for maximum portability. However, in practice this value is only used to indicate the size of arrays, so `std::size_t` is more appropriate, and this simplifies the code. * **API migration:** If you used `BS::concurrency_t` in your code, please replace it with `std::size_t`. If you previously cast to/from these two types, you can now remove the cast. * **Tests:** * The test program `BS_thread_pool_test.cpp` will import the library as a C++20 module via `import BS.thread_pool` if the macro `BS_THREAD_POOL_TEST_IMPORT_MODULE` is defined, C++20 or later is detected, and a supported compiler is used. * The test program will also import the C++ Standard Library as a module using `import std` if the macro `BS_THREAD_POOL_IMPORT_STD` is defined during compilation, on supported compilers and platforms. * The new test `check_copy()` checks that the callable object does not get copied when parallelized into multiple tasks. It will succeed on previous versions of the library, but not if perfect forwarding is removed. * The new test `check_shared_ptr()` checks that captured shared pointers do not prematurely destruct. It will fail on previous versions. * The new test `check_task_destruct()` checks that a task is destructed immediately after it executes, and therefore does not artificially extend the lifetime of any captured objects. * The new test `check_common_index_type()` checks that the type trait `BS::common_index_type` (see above) works as expected. * The new tests `check_os_process_priorities()`, `check_os_thread_priorities()`, `check_os_process_affinity()`, `check_os_thread_affinity()`, and `check_os_thread_names()` check the corresponding features of the native extensions. * The new test `check_callables()` checks that different callable types are accepted by the thread pool. * New command line argument: `stdout`, to print to the standard output, enabled by default. * If the file `default_args.txt` exists in the same folder, the test program reads the default arguments from it (space separated in a single line). Command line arguments can still override these defaults. This is useful when debugging. * The test program will now detect and log the OS, compiler, standard library, C++ standard, available C++ features, whether the thread pool was imported as a C++20 module, and whether the standard library was imported as a module. * **Benchmarks:** * Added new and improved benchmarks using a highly-optimized multithreaded algorithm which generates a plot of the Mandelbrot set, utilizing a normalized iteration count algorithm and linear interpolation to create smooth coloring. * These benchmarks are heavily CPU-intensive, and much less limited by memory and cache compared to the benchmarks in previous versions (which used vector or matrix operations). This results in a much higher speedup factor due to multithreading, utilizing every core and thread to their fullest extent. This makes these benchmarks more useful for optimizing the library, since they are more sensitive to the thread pool's own performance. * The full benchmarks are enabled using the command line argument `benchmarks`, which is enabled by default. The command line argument `plot` can be used to just plot the Mandelbrot set once, either instead of or in addition to doing the full benchmarks. This will plot the largest possible image that can be plotted in 5 seconds, and only measure the performance in pixels/ms for the entire plot. * If you want to see the actual plot, pass the `save` command line argument. The plot is saved to a BMP file, since I didn't want to depend on any 3rd-party libraries. This is off by default, since that file can get quite large. * **Development:** * A Python script `compile_cpp.py` has been added to the repository, in the `scripts` folder. It can be used to compile any C++ source file with different compilers on different platforms. The compilation parameters can be configured using command line arguments and/or via an optional YAML configuration file `compile_cpp.yaml` which specifies defined macros, extra compiler flags (per compiler), include folders, modules, and the output folder. * I wrote this script to make it easier for me to test the library with different combinations of compilers, standards, and platforms using the built-in Visual Studio Code tasks. I also included three `.vscode` folders (one for each OS) in the repository, with appropriate `c_cpp_properties.json`, `launch.json`, and `tasks.json` files that utilize this script, in case you want to use it in your own projects. However, note that this script is not meant to replace CMake or any full-fledged build system, it's just a convenient script for developing single-header libraries like this one or other small projects. * The `compile_cpp.py` script also transparently handles C++20 modules and importing the C++ Standard Library as a module in C++23. Therefore, users of this library who wish to import it as a C++20 module may find this script particularly useful. * Another Python script `test_all.py` in the `scripts` folder replaces the old PowerShell test script. Tests are now performed in C++17, C++20, and C++23 modes, using all compilers available in the system (Clang, GCC, and/or MSVC). Since there are so many tests, the test script now no longer performs the benchmarks, as that would take too long. * A final Python script `clear_folder.py` in the `scripts` folder is used to clean up output and temporary folders, and integrates with VS Code tasks. ### v4.1.0 (2024-03-22) * This library is now published in [SoftwareX](https://www.sciencedirect.com/journal/softwarex)! If you use it in published research, please cite it as follows: Barak Shoshany, *"A C++17 Thread Pool for High-Performance Scientific Computing"*, [doi:10.1016/j.softx.2024.101687](https://doi.org/10.1016/j.softx.2024.101687), [SoftwareX 26 (2024) 101687](https://www.sciencedirect.com/science/article/pii/S235271102400058X), [arXiv:2105.00613](https://arxiv.org/abs/2105.00613) * Updated the source files, as well as `README.md`, `CITATION.bib`, and `CITATION.cff` with the new citation. * A new macro, `BS_THREAD_POOL_DISABLE_EXCEPTION_HANDLING`, allows the user to disable exception handling in `submit_task()` if it is not needed, or if exceptions are explicitly disabled in the codebase. See [#139](https://github.com/bshoshany/thread-pool/issues/139). * Note that this macro can be defined independently of `BS_THREAD_POOL_ENABLE_WAIT_DEADLOCK_CHECK`. Disabling exception handling removes the `try`-`catch` block from `submit_task()`, while enabling wait deadlock checks adds a `throw` expression to `wait()`, `wait_for()`, and `wait_until()`. * If the feature-test macro `__cpp_exceptions` is undefined, `BS_THREAD_POOL_DISABLE_EXCEPTION_HANDLING` is automatically defined, and `BS_THREAD_POOL_ENABLE_WAIT_DEADLOCK_CHECK` is automatically undefined. * Replaced `#pragma once` with old-school include guards using the macros `BS_THREAD_POOL_HPP` and `BS_THREAD_POOL_UTILS_HPP`. There are two main reasons for this: 1. Even though `#pragma once` is supported by the vast majority of modern compilers, it is still a non-standard feature, so using it technically made the library not standards compliant. 2. Include guards make it possible to include the library twice in the same project (for example, once with priority enabled and once without) by undefining the include guard and putting the second include in its own namespace. * Included a description of the destructor behavior for the `BS::thread_pool` class in `README.md`, in the library reference section. See [#143](https://github.com/bshoshany/thread-pool/issues/143). * Removed unnecessary locking in `reset()` if pausing is not enabled. ### v4.0.1 (2023-12-28) * Fixed linkage issue caused by the global variables `BS::this_thread::get_index` and `BS::this_thread::get_pool` not being defined as `inline`. See [#134](https://github.com/bshoshany/thread-pool/issues/134) and [137](https://github.com/bshoshany/thread-pool/issues/137). * Fixed redundant cast in the `BS::thread_pool::blocks` class, and added `-Wuseless-cast` to the GCC warning flags in `BS_thread_pool_test.ps1` to catch similar issues in the future. See [#133](https://github.com/bshoshany/thread-pool/pull/133). * Each of the three files `BS_thread_pool_test.cpp`, `BS_thread_pool.hpp`, and `BS_thread_pool_utils.hpp` now contains three macros indicating the major, minor, and patch version of the file. In addition, `BS_thread_pool_test.cpp` now checks whether the versions of all three files match, and aborts compilation if they do not. ### v4.0.0 (2023-12-27) * A major new release with numerous changes, additions, fixes, and improvements. Many frequently requested features have been added, and performance has been optimized. Please note that code written using previous releases will need to be modified to work with the new release. The changes needed to migrate to the new API are explicitly indicated below for your convenience. * Highlights: * The light thread pool has been removed. However, by default, the thread pool is in "light mode". Optional features that may affect performance must be enabled by defining suitable macros. * This library now ships with two stand-alone header files: * `BS_thread_pool.hpp` contains the main `BS::thread_pool` class and the `BS::multi_future` helper classes, and is the only file needed to use the thread pool itself. * `BS_thread_pool_utils.hpp` contains the additional utility classes `BS::signaller`, `BS::synced_stream`, and `BS::timer`, which are fully independent of the thread pool itself and can be used either with or without it. * It is now possible to assign priorities to tasks. Tasks with higher priorities will be executed first. * Member functions for submitting tasks and loops have been renamed for consistency, e.g. `detach_task()` and `submit_task()`, where the prefix `detach` means no future will be returned and `submit` means a future or `BS::multi_future` will be returned. * There are now two ways to parallelize loops into blocks: * `detach_blocks()` and `submit_blocks()` behave the same as loop parallelization in previous releases, running the loop function once per block. * `detach_loop()` and `submit_loop()` have a simpler syntax, where the loop function is run once per index, so the user doesn't have to manually run the internal loop for each block. * The new member functions `detach_sequence()` and `submit_sequence()` allow submitting a sequence of tasks enumerated by indices. * It is now possible to run an initialization function in each thread before it starts to execute any submitted tasks. * Tasks submitted with `detach_task()` or `submit_task()` can no longer have arguments. Task with arguments must be enclosed inside lambda expressions. This simplifies the API and provides better readability. Tasks can still have return values. * Various ways to obtain information about the threads in the pool have been introduced: * The member function `get_thread_ids()` obtains the unique thread identifiers, and `get_native_handles()` obtains the underlying implementation-defined thread handles. * The new namespace `BS::this_thread` allows obtaining the thread's index in the pool using `BS::this_thread::get_index()` and a pointer to the pool that owns the thread using `BS::this_thread::get_pool()`. * Member functions for waiting for tasks have been renamed for brevity: `wait()`/`wait_for()`/`wait_until()`. In addition, these functions can now optionally throw an exception if the user tries to call them from within a thread of the same pool, which would result in a deadlock. * The first index must now be specified explicitly when parallelizing blocks, loops, and sequences, and it must not be greater than the last index. Also, both indices must now have the same type, or the template parameter should be explicitly specified. * Optimized the way `detach_blocks()`, `submit_blocks()`, `detach_loop()`, and `submit_loop()` split the range of the loop into blocks. * Added a utility class `BS::signaller` to allow simple signalling between threads. * `BS::multi_future` is now a specialization of `std::vector>` with additional member functions. * Breaking changes: * The light thread pool has been removed. The original idea was that the light thread pool will allow the user to sacrifice functionality for increased performance. However, in my testing I found that there was no actual performance benefit to the light thread pool. Therefore, there is no reason to keep it. * However, by default, the thread pool is in "light mode". Optional features that may affect performance due to additional checks or more complicated algorithms must be enabled by defining suitable macros before including the library: * `BS_THREAD_POOL_ENABLE_PAUSE` to enable pausing. * `BS_THREAD_POOL_ENABLE_PRIORITY` to enable task priority. * `BS_THREAD_POOL_ENABLE_WAIT_DEADLOCK_CHECK` to enable wait deadlock checks. * **API migration:** * If you previously used `BS_thread_pool_light.hpp`, simply use `BS_thread_pool.hpp` instead. * If you previously used the pausing feature, define the macro `BS_THREAD_POOL_ENABLE_PAUSE` before including `BS_thread_pool.hpp` to enable it. * Member functions have been renamed for better consistency. Each function has a `detach` variant which does not return a future, and a `submit` variant which does return a future (or a `BS::multi_future`): * `detach_task()` and `submit_task()` for single tasks. * `detach_blocks()` and `submit_blocks()` for loops to be split into blocks, where the loop function is executed once per block and must have an internal loop, as in previous releases. * `detach_loop()` and `submit_loop()` for loops to be split into blocks, where the loop function is executed once per index and the pool takes care of the internal loop. * `detach_sequence()` and `submit_sequence()` for sequences of enumerated tasks. * **API migration:** Use the new names of the functions: * `push_task()` -> `detach_task()` * `submit()` -> `submit_task()` * `push_loop()` -> `detach_blocks()` * `parallelize_loop()` -> `submit_blocks()` * `wait_for_tasks()`, `wait_for_tasks_duration()`, and `wait_for_tasks_until()` have been renamed to `wait()`, `wait_for()`, and `wait_until()` respectively. * **API migration:** Use the new names of the functions: * `wait_for_tasks()` -> `wait()` * `wait_for_tasks_duration()` -> `wait_for()` * `wait_for_tasks_until()` -> `wait_until()` * Functions for parallelizing loops no longer have dedicated overloads for the special case where the first index is 0. These overloads essentially amount to giving the first function argument a default value, which is not allowed in C++, and can be confusing. In addition, indicating the first index explicitly is better for readability. * **API migration:** Add the first index 0 manually as the first argument if it was omitted. * Functions for parallelizing loops no longer allow the last index to be smaller than the first index. Previously, e.g. `detach_blocks(5, 0, ...)` was equivalent to `detach_blocks(0, 5, ...)`. However, this led to confusing results. Since the first argument is the first index and the second argument is the index *after* the last index (i.e. 0 to 5 actually means 0, 1, 2, 3, 4), the user might get the wrong impression that `detach_blocks(5, 0, ...)` will count 5, 4, 3, 2, 1 instead. This option was removed to avoid this confusion. * Sometimes the user might actually want to make a loop that counts down instead of up. This cannot be done by flipping the order of the arguments to e.g. `detach_blocks()` (nor could it be done in previous releases). However, it can be done by simply defining a suitable loop function. For example, if you call `detach_blocks(0, 10, loop, 2)` and define the loop function as `for (T i = 9 - start; i > 9 - end; --i)`, then the first block will count 9, 8, 7, 6, 5 and the second block will count 4, 3, 2, 1, 0. * `detach_loop()`, `submit_loop()`, `detach_sequence()`, and `submit_sequence()` work the same way. The first index must be smaller than the last index, but you can count down by writing a suitable loop or sequence function. * **API migration:** Any loop parallelization that used a first index greater than the last index will work exactly the same after switching the first and second arguments so that the smaller index appears first. * Functions for parallelizing loops no longer accept first and last indices of different types. The reason for allowing this previously was that otherwise, writing something like `detach_blocks(0, x, ...)` where `x` is not an `int` would result in a compilation error, since `0` is by default an `int` and therefore the arguments `0` and `x` have different types. However, this behavior, which used [`std::common_type`](https://en.cppreference.com/w/cpp/types/common_type) to determine the common type of the two indices, sometimes completely messed up the range of the loop. For example, the `std::common_type` of `int` and `unsigned int` is `unsigned int`, which means the loop will only use non-negative indices even if the `int` start index was negative, resulting in an integer overflow. * **API migration:** If you want to invoke e.g. `detach_blocks(0, x, ...)` where `x` is not an `int`, you can either: * Make the `0` have the desired type using a cast or a suffix. For example, if `x` is an `unsigned int`, write `(unsigned int)0` or `0U` instead of `0`. * Specify the template parameter explicitly. For example, if `x` is a `size_t`, write `detach_blocks(0, x, ...)`. * `detach_task()` and `submit_task()` no longer accept arguments for the submitted task. Instead, you must enclose the function in a [lambda expression](https://en.cppreference.com/w/cpp/language/lambda). In other words, instead of `detach_task(task, args...)` you should write `detach_task([] { task(args...); })`, indicating in the capture list `[]` whether to capture the task itself, and each of the arguments, by value or reference. Please see `README.md` for examples. This was changed for the following reasons: 1. Consistency with `detach_blocks()` and `submit_blocks()`, as well as the new `detach_loop()`, `submit_loop()`, `detach_sequence()`, and `submit_sequence()`, which do not accept function arguments either. 2. In my own multithreaded projects, I find that I almost always need the task to have access to variables in the local scope. This is much simpler, easier, and more concise to do with a lambda capture list, especially an implicit capture `[=]` or `[&]`, than by defining a function that takes arguments and then passing these arguments. 3. Similarly, I find that I mostly submit tasks defined as a lambda on the spot, rather than creating them as separate functions, because it's faster to code and makes it clear exactly what the task does without having to look elsewhere. 4. When users post issues to this repository asking for help with their own code that uses the thread pool, the solution often turns out to be "just wrap that in a lambda". Such issues can be avoided if lambdas must be used to begin with. 5. Submitting member functions, which previously required the awkward syntax `detach_task(&class::function, &object, args...)`, can now be achieved with the much simpler and more readable syntax `detach_task([] { object.function(args...); })` with the appropriate captures. 6. Passing arguments by reference, which previously required using [`std::ref`](https://en.cppreference.com/w/cpp/utility/functional/ref), e.g. `detach_task(task, std::ref(arg))`, can now be achieved with the much simpler and more readable syntax `detach_task([&arg] { task(arg); })`. 7. The new syntax allows specifying the priority of the task easily, as the second argument - otherwise, it would have been hard to distinguish the priority from a task argument, making the API more complicated and confusing. This syntax will also permit adding additional arguments to the member functions as needed in the future. * **API migration:** Enclose all tasks with arguments inside a lambda expression. All submitted tasks must have no arguments, but they can still have return values. * Alternatively, [`std::bind`](https://en.cppreference.com/w/cpp/utility/functional/bind) can also be used, if the old syntax is preferred to a lambda. Just wrap it around the task and its arguments: instead of `detach_task(task, args...)`, write `detach_task(std::bind(task, args...))`. This achieves the same effect, and can be used to easily convert v3.x.x code to v4.0.0 using a simple regular expression search and replace: * `push_task\((.*?)\)` -> `detach_task(std::bind($1))` * `submit\((.*?)\)` -> `submit_task(std::bind($1))` * `BS::synced_stream` and `BS::timer` have been moved to `BS_thread_pool_utils.hpp`. * **API migration:** Include the new header file if either of these utility classes are used. * `BS_thread_pool.hpp` new features: * A new optional feature, enabled by defining the macro `BS_THREAD_POOL_ENABLE_PRIORITY`, allows assigning priority to tasks. The priority is a number of type `BS::priority_t`, which is a signed 16-bit integer, so it can have any value between -32,768 and 32,767. The tasks will be executed in priority order from highest to lowest. * To assign a priority to a task, add the priority as the last argument to any of the `detach` or `submit` functions. If the priority is not specified, the default value will be 0. * The namespace `BS::pr` contains some pre-defined priorities for users who wish to avoid magic numbers and enjoy better future-proofing. In order of decreasing priority, the pre-defined priorities are: `BS::pr::highest`, `BS::pr::high`, `BS::pr::normal`, `BS::pr::low`, and `BS::pr::lowest`. * Please see `README.md` for more information, including performance considerations. * The new member functions `detach_loop()` and `submit_loop()` facilitate loop parallelization without having to worry about internal loops in the loop function. In previous releases, the loop function had to be of the form `[](T start, T end) { for (T i = start; i < end; ++i) loop(i); }`. This behavior has been preserved in `detach_blocks()` and `submit_blocks()`. However, the new `detach_loop()` and `submit_loop()` allow much simpler loop functions of the form `[](T i) { loop(i) }`, greatly simplifying the interface. * Performance-wise, due to fewer function calls, `detach_blocks()` and `submit_blocks()` are generally faster. However, the difference is usually not significant, and with compiler optimizations there may be no difference at all. In any case, `detach_loop()` and `submit_loop()` are provided as convenience functions, but performance-critical applications can stick with `detach_blocks()` and `submit_blocks()`. * The new member functions `detach_sequence()` and `submit_sequence()` facilitate submitting a sequence of tasks enumerated by indices. This is a bit similar to `detach_loop()` and `submit_loop()`, except that the range of indices is not split into blocks with each block containing a smaller range of indices. Instead, there is exactly one task per index. This can be used, for example, to submit a sequence of tasks with each one independently processing a single array element. `detach_sequence()` does not return a future, while `submit_sequence()` returns a `BS::multi_future`. * It is now possible to run an initialization function in each thread before it starts to execute any submitted tasks. The function must take no arguments and have no return value. It will only be executed exactly once, when the thread is first constructed. It can be passed as an argument to the constructor or to `reset()`. See [#104](https://github.com/bshoshany/thread-pool/issues/104), [#105](https://github.com/bshoshany/thread-pool/pull/105), [#113](https://github.com/bshoshany/thread-pool/issues/113), and [#119](https://github.com/bshoshany/thread-pool/issues/119). * Added a member function `get_thread_ids()` which returns a vector containing the unique identifiers for each of the pool's threads, as obtained by [`std::thread::get_id()`](https://en.cppreference.com/w/cpp/thread/get_id). See [#126](https://github.com/bshoshany/thread-pool/issues/126). * A new optional feature, enabled by defining the macro `BS_THREAD_POOL_ENABLE_NATIVE_HANDLES`, adds a member function `get_native_handles()` which returns a vector containing the underlying implementation-defined thread handles for each of the pool's threads. These can then be used in an implementation-specific way to manage the threads at the OS level; however, note that this will generally **not** be portable code. See [#122](https://github.com/bshoshany/thread-pool/issues/122). * This feature is disabled by default since it uses [std::thread::native_handle()](https://en.cppreference.com/w/cpp/thread/thread/native_handle), which is in the C++ standard library, but is **not** guaranteed to be present on all systems. * A new namespace `BS::this_thread` was created to provide functionality similar to `std::this_thread`. * `BS::this_thread::get_index()` can be used to get the index of the current thread. If this thread belongs to a `BS::thread_pool` object, it will have an index from 0 to `BS::thread_pool::get_thread_count() - 1`. Otherwise, for example if this thread is the main thread or an independent [`std::thread`](https://en.cppreference.com/w/cpp/thread/thread), [`std::nullopt`](https://en.cppreference.com/w/cpp/utility/optional/nullopt) will be returned. * `BS::this_thread::get_pool()` can be used to get the pointer to the thread pool that owns the current thread. If this thread belongs to a `BS::thread_pool` object, a pointer to that object will be returned. Otherwise, `std::nullopt` will be returned. * Note that both functions return an [`std::optional`](https://en.cppreference.com/w/cpp/utility/optional) object. * `BS::multi_future` is now defined as a specialization of `std::vector>`. This means that all of the member functions that can be used on an [`std::vector`](https://en.cppreference.com/w/cpp/container/vector) can also be used on a `BS::multi_future`. For example, it is now possible to use a range-based `for` loop with a `BS::multi_future` object, since it has iterators. * In addition to inherited member functions, `BS::multi_future` has the following specialized member functions, most of which are new in this release: `get()`, `ready_count()`, `valid()`, `wait()`, `wait_for()`, and `wait_until()`. Please see `README.md` for more information. See also [#128](https://github.com/bshoshany/thread-pool/issues/128). * A new optional feature, enabled by defining the macro `BS_THREAD_POOL_ENABLE_WAIT_DEADLOCK_CHECK`, allows `wait()`, `wait_for()`, and `wait_until()` to check whether the user tried to call them from within a thread of the same pool, which would result in a deadlock. If so, they will throw the exception `BS::thread_pool::wait_deadlock` instead of waiting. * `BS_thread_pool_utils.hpp`: * The utility classes `BS::synced_stream` and `BS::timer` now reside in this header file instead of the main one. * `BS::timer` has a new member function, `current_ms()`, which can be used to obtain the number of milliseconds that have elapsed so far, but keep the timer ticking. * The new utility class `BS::signaller` allows simple signalling between threads. It can be used to make one or more threads wait, using the `wait()` member function. When another thread uses the `ready()` member function, all waiting threads stop waiting. This class is really just a convenient wrapper around [`std::promise`](https://en.cppreference.com/w/cpp/thread/promise), which contains both the promise and its future. * `BS_thread_pool.hpp` bug fixes and minor changes: * Optimized locking in the worker function. This should result in increased performance. * Optimized the way `detach_blocks()`, `submit_blocks()`, `detach_loop()`, and `submit_loop()` split the range of the loop into blocks. All blocks are now guaranteed to have one of two sizes, differing by 1, with the larger blocks always first. See [#96](https://github.com/bshoshany/thread-pool/issues/96). * For example, in previous releases, 100 indices were split into 15 blocks as 14 blocks of size 6 and one additional block of size 16, which was suboptimal. Now they are split into 10 blocks of size 7 and 5 blocks of size 6, which means the tasks are as evenly distributed as possible. * Fixed a bug that caused paused pools to have high idle CPU usage if pausing was used. See [#120](https://github.com/bshoshany/thread-pool/issues/120). * The worker now destructs the task object as soon as it finishes executing. See [#124](https://github.com/bshoshany/thread-pool/issues/124) and [#129](https://github.com/bshoshany/thread-pool/pull/129). * Added Markdown inline code formatting in all comments whenever applicable, which makes the comments look nicer when displayed as a tooltip in [Visual Studio Code](https://code.visualstudio.com/) or other supporting IDEs. * The `BS::thread_pool::blocks` helper class has been moved into the main thread pool class, and now returns a degenerate object (zero blocks) if `index_after_last <= first_index`. * `BS_thread_pool_test.cpp`: * Removed tests for the light thread pool. * Added/modified tests for all new/changed features. * Many of the previous tests have been simplified and optimized. * The program now takes command line arguments: * `help`: Show a help message and exit. * `log`: Create a log file. * `tests`: Perform standard tests. * `deadlock` Perform long deadlock tests. * `benchmarks`: Perform benchmarks. * If no options are entered, the default is: `log tests benchmarks`. * By default, the test program enables all the optional features by defining the suitable macros, so it can test them. However, if the macro `BS_THREAD_POOL_LIGHT_TEST` is defined during compilation, the optional features will not be tested. * Instead of using a pre-defined list to specify the number of loop blocks to try in the benchmarks, the program now simply keeps increasing the number of blocks until it finds the optimal value. Often, the optimal number of blocks is much higher than the number of hardware threads, but if the number is too high it will result in diminishing returns. * `check_loop_no_return()` now checks that the loop modifies all the indices exactly once, to detect cases where an index has been modified more than once, e.g. if the same loop index was erroneously placed in more than one block. * Instead of defining `_CRT_SECURE_NO_WARNINGS`, the program now uses [`localtime_s`](https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/localtime-s-localtime32-s-localtime64-s) instead of [`std::localtime`](https://en.cppreference.com/w/cpp/chrono/c/localtime) if MSVC is detected to avoid generating a warning. * On macOS, the test program will exit with [`std::terminate()`](https://en.cppreference.com/w/cpp/error/terminate) instead of [`std::quick_exit()`](https://en.cppreference.com/w/cpp/utility/program/quick_exit) if any tests failed. This is because macOS does not implement `std::quick_exit()` for some reason. Note that as a result, the number of failed tests cannot be returned by the program on macOS. Unfortunately, [`std::exit()`](https://en.cppreference.com/w/cpp/utility/program/exit) cannot be used here, as it might get stuck if a deadlock occurs. See [#106](https://github.com/bshoshany/thread-pool/pull/106) * The log file now uses the name of the executable file, followed by the date and time, so it's easy to distinguish between log files generated by different builds of the test (since the test script names them based on the compiler used). Also, the program now checks if the log file failed to open for some reason, and writes only to the standard output in that case. * The benchmarks now display a progress bar. * The test program will now detect the OS and compiler used. * `BS_thread_pool_test.ps1`: * The script will compile and run a light version of the test, with no optional features enabled, in addition to the main test, for each compiler. * The source and build folders will now be determined relative to the script folder, to ensure that the script works no matter which folder it is executed from. * The script now checks that the include files `BS_thread_pool.hpp` and `BS_thread_pool_utils.hpp` are present before attempting to compile the test program. * `README.md`: * Added/modified documentation for all new/changed features. * Revised many of the existing examples and explanations. * Added a complete library reference at the end of the documentation. * Added instructions for installing the package using Meson and CMake with CPM. The installation instructions with various package managers and build systems were moved to the end, before the reference. * Miscellaneous: * A `.clang-tidy` file is now included, with all the checks that are enabled in this project. The pull request template has been updated to suggest that authors lint their code using this file before submitting the pull request. * This release is dedicated to my wife (since December 1, 2023), Pauline. Her endless love, support, and encouragement have been a great source of motivation for working on this and other projects. I am so lucky and honored to [`my_future.share()`](https://en.cppreference.com/w/cpp/thread/future/share) with her ❤️ ### v3.5.0 (2023-05-25) * `BS_thread_pool.hpp` and `BS_thread_pool_light.hpp`: * Added a new member function, `purge()`, to the full (non-light) thread pool. This function purges all the tasks waiting in the queue. Tasks that are currently running will not be affected, but any tasks still waiting in the queue will be removed and will never be executed by the threads. Please note that there is no way to restore the purged tasks. * Fixed a bug which caused `wait_for_tasks()` to only block the first thread that called it. Now it blocks every thread that calls it, which is the expected behavior. In addition, all related deadlock have now been completely resolved. This also applies to the variants `wait_for_tasks_duration()` and `wait_for_tasks_until()` in the non-light version. See [#110](https://github.com/bshoshany/thread-pool/pull/110). * Note: You should never call `wait_for_tasks()` from within a thread of the same thread pool, as that will cause it to wait forever! This fix is relevant for situations when `wait_for_tasks()` is called from an auxiliary `std::thread` or a separate thread pool. * `push_task()` and `submit()` now avoid creating unnecessary copies of the function object. This should improve performance, especially if large objects are involved. See [#90](https://github.com/bshoshany/thread-pool/pull/90). * Optimized the way condition variables are used by the thread pool class. Shared variables are now modified while owning the mutex, but condition variables are notified after the mutex is released, if possible. See [#84](https://github.com/bshoshany/thread-pool/pull/84). * Instead of a variable `tasks_total` to keep track of the total number of tasks (queued + running), the thread pool class now uses a variable `tasks_running` to keep track only of the number of running tasks, with the number of tasks in the queue obtained via `tasks.size()`. This makes more sense in terms of the internal logic of the class. * All atomic variables have been converted to non-atomic. They are now all governed by `tasks_mutex`, so they do not need to be atomic. This eliminates redundant locking, and may improve performance a bit. * `running` has been renamed to `workers_running` and `task_done_cv` has been renamed to `tasks_done_cv`. * The worker now only notifies thi condition variable `tasks_done_cv` if all the tasks are done, not just a single task. Checking if the tasks are done is cheaper than notifying the condition variable, so since the worker no longer notifies the condition variable every single time it finishes a task, this should improve performance a bit if `wait_for_tasks()` is used. * `BS_thread_pool_test.cpp`: * Combined the tests for the full and light versions into one program. The file `BS_thread_pool_light_test.cpp` has been removed. * The tests for the light version are now much more comprehensive. The only features that are not tested in the light version are those that do not exist in it. * Added a test for the new `purge()` member function. * Added a test to ensure that `push_task()` and `submit()` do not create unnecessary copies of the function object. * Added a test to ensure that `push_task()` and `submit()` correctly accept arguments passed by value, reference, and constant reference. * Added a test to ensure that `wait_for_tasks()` blocks all external threads that call it. * `_CRT_SECURE_NO_WARNINGS` is now set only if it has not already been defined, to prevent errors in MSVC projects which already have it set as part of the default build settings. See [#72](https://github.com/bshoshany/thread-pool/pull/72). * `README.md`: * Added documentation for the new `purge()` member function. * Added an explanation for how to pass arguments by reference or constant reference when submitting functions to the queue, using the wrappers `std::ref()` and `std::cref()` respectively. See [#83](https://github.com/bshoshany/thread-pool/issues/83). * Added a link to [my lecture notes](https://baraksh.com/CSE701/notes.php) for a course taught at McMaster University, for the benefit of beginner C++ programmers who wish to learn some of the advanced techniques and programming practices used in developing this library. * Removed the sample test results, since the complete log file (including the deadlock tests) is now over 500 lines long. * Other: * A `.clang-format` file with the project's formatting conventions is now included in the GitHub repository. The pull request template now asks to format any new code using this file, so that it is consistent with the rest of the library. * A PowerShell script, `BS_thread_pool_test.ps1`, is now provided in the GitHub repository to make running the test on multiple compilers and operating systems easier. Since it is written in PowerShell, it is fully portable and works on Windows, Linux, and macOS. The script will automatically detect if Clang, GCC, and/or MSVC are available, compile the test program using each available compiler, and then run each compiled test program 5 times and report on any errors. The pull request template now recommends using this script for testing. * Since the root folder has become a bit crowded, the header files `BS_thread_pool.hpp` and `BS_thread_pool_light.hpp` have been moved to the `include` subfolder, and the test file `BS_thread_pool_test.cpp` has been moved to the `tests` subfolder, which also contains the new test script `BS_thread_pool_test.ps1`. ### v3.4.0 (2023-05-12) * `BS_thread_pool.hpp` and `BS_thread_pool_light.hpp`: * Resolved an issue which could have caused `tasks_total` to not be synchronized in some cases. See [#70](https://github.com/bshoshany/thread-pool/pull/70). * Resolved a deadlock which could rarely be caused when the pool was destructed or reset. See [#93](https://github.com/bshoshany/thread-pool/pull/93), [#100](https://github.com/bshoshany/thread-pool/pull/100), [#107](https://github.com/bshoshany/thread-pool/pull/107), and [#108](https://github.com/bshoshany/thread-pool/pull/108). * Resolved a deadlock which could be caused when `wait_for_tasks()` was called more than once. * Two new member functions have been added to the non-light version: `wait_for_tasks_duration()` and `wait_for_tasks_until()`. They allow waiting for the tasks to complete, but with a timeout. `wait_for_tasks_duration()` will stop waiting after the specified duration has passed, and `wait_for_tasks_until()` will stop waiting after the specified time point has been reached. * Renamed `BS_THREAD_POOL_VERSION` in `BS_thread_pool_light.hpp` to `BS_THREAD_POOL_LIGHT_VERSION` and removed the `[light]` tag. This allows including both header files in the same program in case we want to use both the light and non-light thread pools simultaneously. * `BS_thread_pool_test.cpp` and `BS_thread_pool_light_test.cpp`: * Fixed an issue that caused a compilation error when using MSVC and including `windows.h`. See [#72](https://github.com/bshoshany/thread-pool/pull/72). * The number and size of the vectors in the performance test (`BS_thread_pool_test.cpp` only) are now guaranteed to be multiples of the number of threads, for optimal performance. * In `count_unique_threads()`, moved the condition variables and mutexes to the function scope to prevent cluttering the global scope. * Three new tests have been added to `BS_thread_pool_test.cpp` to check the deadlocks issue that were resolved in this release (see above). The tests rely on the new wait for tasks with timeout feature, so they are not available in the light version. * One test checks for deadlocks when calling `wait_for_tasks()` more than once. * Two tests check for deadlocks when destructing and resetting the pool respectively. They are turned off by default, since they take a long time to complete, but can be turned on by setting `enable_long_deadlock_tests` to `true`. * Two new tests have been added to the non-light version to check the new member functions `wait_for_tasks_duration()` and `wait_for_tasks_until()`. * The test programs now return the number of failed tests upon exit, instead of just 1 if any number of tests failed, which was the case in previous versions. Also, if any tests failed, `std::quick_exit()` is invoked instead of `return`, to avoid getting stuck due to any lingering tasks or deadlocks. * `README.md`: * Added documentation for the two new member functions, `wait_for_tasks_duration()` and `wait_for_tasks_until()`. * Fixed Markdown rendering incorrectly on Visual Studio. See [#77](https://github.com/bshoshany/thread-pool/pull/77). * The sample performance tests are now taken from a 40-core / 80-thread dual-CPU computing node, which is a more typical use case for high-performance scientific software. ### v3.3.0 (2022-08-03) * `BS_thread_pool.hpp`: * The public member variable `paused` of `BS::thread_pool` has been made private for future-proofing (in case future versions implement a more involved pausing mechanism) and better encapsulation. It is now accessible only via the `pause()`, `unpause()`, and `is_paused()` member functions. In other words: * Replace `pool.paused = true` with `pool.pause()`. * Replace `pool.paused = false` with `pool.unpause()`. * Replace `if (pool.paused)` (or similar) with `if (pool.is_paused())`. * The public member variable `f` of `BS::multi_future` has been renamed to `futures` for clarity, and has been made private for encapsulation and simplification purposes. Instead of operating on the vector `futures` itself, you can now use the `[]` operator of the `BS::multi_future` to access the future at a specific index directly, or the `push_back()` member function to append a new future to the list. The `size()` member function tells you how many futures are currently stored in the object. * The explicit casts of `std::endl` and `std::flush`, added in v3.2.0 to enable flushing a `BS::synced_stream`, caused ODR (One Definition Rule) violations if `BS_thread_pool.hpp` was included in two different translation units, since they were mistakenly not defined as `inline`. To fix this, I decided to make them static members of `BS::synced_stream` instead of global variables, which also makes the code better organized in my opinion. These objects can now be accessed as `BS::synced_stream::endl` and `BS::synced_stream::flush`. I also added an example for how to use them in `README.md`. See [#64](https://github.com/bshoshany/thread-pool/issues/64). * `BS_thread_pool_light.hpp`: * This package started out as a very lightweight thread pool, but over time has expanded to include many additional features, and at the time of writing it has a total of 340 lines of code, including all the helper classes. Therefore, I have decided to bundle a light version of the thread pool in a separate and stand-alone header file, `BS_thread_pool_light.hpp`, with only 170 lines of code (half the size of the full package). This file does not contain any of the helper classes, only a new `BS::thread_pool_light` class, which is a minimal thread pool with only the 5 most basic member functions: * `get_thread_count()` * `push_loop()` * `push_task()` * `submit()` * `wait_for_tasks()` * A separate test program `BS_thread_pool_light_test.cpp` tests only the features of the lightweight `BS::thread_pool_light` class. In the spirit of minimalism, it does not generate a log file and does not do any benchmarks. * To be perfectly clear, each header file is 100% stand-alone. If you wish to use the full package, you only need `BS_thread_pool.hpp`, and if you wish to use the light version, you only need `BS_thread_pool_light.hpp`. Only a single header file needs to be included in your project. ### v3.2.0 (2022-07-28) * `BS_thread_pool.hpp`: * Main `BS::thread_pool` class: * Added a new member function, `push_loop()`, which does the same thing as `parallelize_loop()`, except that it does not return a `BS::multi_future` with the futures for each block. Just like `push_task()` vs. `submit()`, this avoids the overhead of creating the futures, but the user must use `wait_for_tasks()` or some other method to ensure that the loop finishes executing, otherwise bad things will happen. * `push_task()` and `submit()` now utilize perfect forwarding in order to support more types of tasks - in particular member functions, which in previous versions could not be submitted unless wrapped in a lambda. To submit a member function, use the syntax `submit(&class::function, &object, args)`. More information can be found in `README.md`. See [#9](https://github.com/bshoshany/thread-pool/issues/9). * `push_loop()` and `parallelize_loop()` now have overloads where the first argument (the first index in the loop) is omitted, in which case it is assumed to be 0. This is for convenience, as the case where the first index is 0 is very common. * Helper classes: * `BS::synced_stream` now utilizes perfect forwarding in the member functions `print()` and `println()`. * Previously, it was impossible to pass the flushing manipulators `std::endl` and `std::flush` to `print()` and `println()`, since the compiler could not figure out which template specializations to use. The new objects `BS::endl` and `BS::flush` are explicit casts of these manipulators, whose sole purpose is to enable passing them to `print()` and `println()`. * `BS::multi_future::get()` now rethrows exceptions generated by the futures, even if the futures return `void`. See [#62](https://github.com/bshoshany/thread-pool/pull/62). * Added a new helper class, `BS::blocks`, which is used by `parallelize_loop()` and `push_loop()` to divide a range into blocks. This class is not documented in `README.md`, as it most likely will not be of interest to most users, but it is still publicly available, in case you want to parallelize something manually but still benefit from the built-in algorithm for splitting a range into blocks. * `BS_thread_pool_test.cpp`: * Added plenty of new tests for the new features described above. * Fixed a bug in `count_unique_threads()` that caused it to get stuck on certain systems. * `dual_println()` now also flushes the stream using `BS::endl`, so that if the test gets stuck, the log file will still contain everything up to that point. (Note: It is a common misconception that `std::endl` and `'\n'` are interchangeable. `std::endl` not only prints a newline character, it also flushes the stream, which is not always desirable, as it may reduce performance.) * The performance test has been modified as follows: * Instead of generating random vectors using `std::mersenne_twister_engine`, which proved to be inconsistent across different compilers and systems, the test now generates each element via an arbitrarily-chosen numerical operation. In my testing, this provided much more consistent results. * Instead of using a hard-coded vector size, a suitable vector size is now determined dynamically at runtime. * Instead of using `parallelize_loop()`, the test now uses the new `push_loop()` function to squeeze out a bit more performance. * Instead of setting the test parameters to achieve a fixed single-threaded mean execution time of 300 ms, the test now aims to achieve a fixed multithreaded mean execution time of 50 ms when the number of blocks is equal to the number of threads. This allows for more reliable results on very fast CPUs with a very large number of threads, where the mean execution time when using all the threads could previously be below a statistically significant value. * The number of vectors is now restricted to be a multiple of the number of threads, so that the blocks are always all of the same size. * `README.md`: * Added instructions and examples for the new features described above. * Rewrote the documentation for `parallelize_loop()` to make it clearer. ### v3.1.0 (2022-07-13) * `BS_thread_pool.hpp`: * Fixed an issue where `wait_for_tasks()` would sometimes get stuck if `push_task()` was executed immediately before `wait_for_tasks()`. * Both the thread pool constructor and the `reset()` member function now determine the number of threads to use in the pool as follows. If the parameter is a positive number, then the pool will be created with this number of threads. If the parameter is non-positive, or a parameter was not supplied, then the pool will be created with the total number of hardware threads available, as obtained from `std::thread::hardware_concurrency()`. If the latter returns a non-positive number for some reason, then the pool will be created with just one thread. See [#51](https://github.com/bshoshany/thread-pool/issues/51) and [#52](https://github.com/bshoshany/thread-pool/issues/52). * Added the `[[nodiscard]]` attribute to classes and class members, in order to warn the user when accidentally discarding an important return value, such as a future or the return value of a function with no useful side-effects. For example, if you use `submit()` and don't save the future it returns, the compiler will now generate a warning. (If a future is not needed, then you should use `push_task()` instead.) * Removed the `explicit` specifier from all constructors, as it prevented the default constructor from being used with static class members. See [#48](https://github.com/bshoshany/thread-pool/issues/48). * `BS_thread_pool_test.cpp`: * Improved `count_unique_threads()` using condition variables, to ensure that each thread in the pool runs at least one task regardless of how fast it takes to run the tasks. * When appropriate, `check()` now explicitly reports what the obtained result was and what it was expected to be. * `check_task_monitoring()` and `check_pausing()` now explicitly report the results of the monitoring at each step. * Changed all instances of `std::vector>` to `std::unique_ptr[]>`. See [#44](https://github.com/bshoshany/thread-pool/issues/44). * Converted a few more C-style casts to C++ cast expressions. * `README.md`: * Added instructions for using this package with the [Conan](https://conan.io/) C/C++ package manager. Please refer to [this package's page on ConanCenter](https://conan.io/center/bshoshany-thread-pool) to learn how to use Conan to include this package in your project with various build systems. * If you found this project useful, please consider [starring it on GitHub](https://github.com/bshoshany/thread-pool/stargazers)! This allows me to see how many people are using my code, and motivates me to keep working to improve it. ### v3.0.0 (2022-05-30) * This is a major new release with many changes and improvements! Please note that code written using previous releases will need to be slightly modified to work with the new release. The changes needed to migrate to the new API are explicitly indicated below for your convenience. * Breaking changes to the library header file: * The header file has been renamed to `BS_thread_pool.hpp` to avoid potential conflict with other thread pool libraries. * **API migration:** The library must now be included by invoking `#include "BS_thread_pool.hpp"`. * All the definitions in the library, including the `thread_pool` class and the helper classes, are now located in the namespace `BS`. This namespace will also be used for my other C++ projects, and is intended to ensure consistency between my projects while avoiding potential name conflicts with other libraries. * **API migration:** The thread pool class should now be invoked as `BS::thread_pool`. Alternatively, it is possible to employ `using BS::thread_pool` or even `using namespace BS` and then invoke `thread_pool` directly. Same for the `BS::synced_stream` and `BS::timer` helper classes. * The macro `THREAD_POOL_VERSION`, which contains the version number and release date of the library, has been renamed to `BS_THREAD_POOL_VERSION` to avoid potential conflicts. * **API migration:** The version must now be read from the macro `BS_THREAD_POOL_VERSION`. * The public member `sleep_duration` has been removed. The thread pool now uses condition variables instead of sleep to facilitate waiting. This significantly improves performance (by 10%-50% in my testing), drastically decreases idle CPU utilization, and eliminates the need to set an optimal sleep time. This was a highly-requested change; see [issue #1](https://github.com/bshoshany/thread-pool/issues/1), [issue #12](https://github.com/bshoshany/thread-pool/issues/12), and [pull request #23](https://github.com/bshoshany/thread-pool/pull/23). * **API migration:** Remove any code that relates to the public member `sleep_duration`. * The template specializations for `submit()` have been merged. Now instead of two versions, one for functions with a return value and one for functions without a return value, there is just one version, which can accept any function. This makes the code more compact (and elegant). If a function with no return value is submitted, an `std::future` is returned (the previous version returned an `std::future`) * **API migration:** To wait for a task with no return value, simply call `wait()` or `get()` on the corresponding `std::future`. * `parallelize_loop()` now returns a future in the form of a new `BS::multi_future` helper class template. The member function `wait()` of this future allows waiting until all of the loop's blocks finish executing. In previous versions, calling `parallelize_loop()` both parallelized the loop and waited for the blocks to finish; now it is possible to do other stuff while the loop executes. * **API migration:** Since `parallelize_loop()` no longer automatically blocks, you should either store the result in a `BS::multi_future` object and call its `wait()` member function, or simply call `parallelize_loop().wait()` to reproduce the old behavior. * Non-breaking changes to the library header file: * It is now possible to use `parallelize_loop()` with functions that have return values and get these values from all blocks at once through the `get()` member function of the `BS::multi_future`. * The template specializations for `push_task()` have been merged. Now instead of two versions, one for functions with arguments and one for functions without arguments, there is just one version, which can accept any function. * Constructors have been made `explicit`. See [issue #28](https://github.com/bshoshany/thread-pool/issues/28). * `submit()` now uses `std::make_shared` instead of `new` to create the shared pointer. This means only one memory allocation is performed instead of two, which should improve performance. In addition, all unique pointers are now created using `std::make_unique`. * A new helper class template, `BS::multi_future`, has been added. It's basically just a wrapper around `std::vector>`. This class is used by the new implementation of `parallelize_loop()` to allow waiting for the entire loop, consisting of multiple tasks with their corresponding futures, to finish executing. * `BS::multi_future` can also be used independently to handle multiple futures at once. For example, you can now keep track of several groups of tasks by storing their futures inside separate `BS::multi_future` objects and use either `wait()` to wait for all tasks in a specific group to finish or `get()` to get an `std::vector` with the return values of every task in the group. * Integer types are now chosen in a smarter way to improve portability, allow for better compatibility with 32-bit systems, and prevent potential conversion errors. * Added a new type, `BS::concurrency_t`, equal to the return type of `std::thread::hardware_concurrency()`. This is probably pointless, since the C++ standard requires this to be `unsigned int`, but it seems to me to make the code slightly more portable, in case some non-conforming compiler chooses to use a different integer type. * C-style casts have been converted to C++ cast expressions for added clarity. * Miscellaneous minor optimizations and style improvements. * Changes to the test program: * The program has been renamed to `BS_thread_pool_test.cpp` to avoid potential conflict with other thread pool libraries. * The program now returns `EXIT_FAILURE` if any of the tests failed, for automation purposes. See [pull request #42](https://github.com/bshoshany/thread-pool/pull/42). * Fixed incorrect check order in `check_task_monitoring()`. See [pull request #43](https://github.com/bshoshany/thread-pool/pull/43). * Added a new test for `parallelize_loop()` with a return value. * Improved some of the tests to make them more reliable. For example, `count_unique_threads()` now uses futures (stored in a `BS::multi_future` object). * The program now uses `std::vector` instead of matrices, for both consistency checks and benchmarks, in order to simplify the code and considerably reduce its length. * The benchmarks have been simplified. There's now only one test: filling a specific number of vectors of fixed size with random values. This may be replaced with something more practical in a future released, but at least on the systems I've tested on, it does demonstrate a very significant multithreading speedup. * In addition to multithreaded tests with different numbers of tasks, the benchmark now also includes a single-threaded test. This allows for more accurate benchmarks compared to previous versions, as the (slight) parallelization overhead is now taken into account when calculating the maximum speedup. * The program decides how many vectors to use for benchmarking by testing how many are needed to reach a target duration in the single-threaded test. This ensures that the test takes approximately the same amount of time on different systems, and is thus more consistent and portable. * Miscellaneous minor optimizations and style improvements. * Changes to `README.md`: * Many sections have been rewritten and/or polished. * Explanations and examples of all the new features have been added. * Added an acknowledgements section. * Miscellaneous changes: * Added a `CITATION.bib` file (in BibTeX format) to the GitHub repository. You can use it to easily cite this package if you use it in any research papers. * Added a `CITATION.cff` file (in YAML format) to the GitHub repository. This should add [an option to get a citation in different formats](https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/creating-a-repository-on-github/about-citation-files) directly from GitHub repository by clicking on "cite this repository" on the sidebar to the right. * Added templates for GitHub issues and pull requests. ### v2.0.0 (2021-08-14) * From now on, version numbers will adhere to the [Semantic Versioning](https://semver.org/) specification in the format **major.minor.patch**. * A file named `thread_pool_test.cpp` has been added to the package. It will perform automated tests of all aspects of the package, and benchmark some multithreaded matrix operations. Please run it on your system and [submit a bug report](https://github.com/bshoshany/thread-pool/issues) if any of the tests fail. In addition, the code is thoroughly documented, and is meant to serve as an extensive example of how to properly use the package. * The package is now available through [vcpkg](https://github.com/microsoft/vcpkg). Instructions for how to install it have been added to `README.md`. See [this pull request](https://github.com/bshoshany/thread-pool/pull/18). * The package now defines a macro `THREAD_POOL_VERSION`, which returns the version number and release date of the thread pool library as a string. * `parallelize_loop()` has undergone some major changes (and is now incompatible with v1.x): * The second argument is now the index **after** the last index, instead of the last index itself. This is more consistent with C++ conventions (e.g. standard library algorithms) where the range is always `[first, last)`. For example, for an array with `n` indices, instead of `parallelize_loop(0, n - 1, ...)` you should now write `parallelize_loop(0, n, ...)`. * The `loop` function is now only called once per block, instead of once per index, as was the case before. This should provide a performance boost due to significantly reducing the number of function calls, and it also allows you to conserve resources by using them only once per block instead of once per index (an example can be found in the `random_matrix_generator` class in `thread_pool_test.cpp`). It also means that `loop` now takes two arguments: the first index in the block and the index after the last index in the block. Thus, `loop(start, end)` should typically involve a loop of the form `for (T i = start; i < end; i++)`. * The first and last indices can now be of two different integer types. Previously, `parallelize_loop(0, i, ...)` did not work if `i` was not an `int`, because `0` was interpreted as `int`, and the two arguments had to be of the same type. Therefore, one had to use casting, e.g. `parallelize_loop((size_t)0, i)`, to make it work. Now this is no longer necessary; the common type is inferred automatically using `std::common_type_t`. ### v1.9 (2021-07-29) * Fixed a bug in `reset()` which caused it to create the wrong number of threads. ### v1.8 (2021-07-28) * The version history has become too long to be included in `README.md`, so I moved it to a separate file, `CHANGELOG.md`. * A button to open this repository directly in Visual Studio Code has been added to the badges in `README.md`. * An internal variable named `promise` has been renamed to `task_promise` to avoid any potential errors in case the user invokes `using namespace std`. * `submit()` now catches exceptions thrown by the submitted task and forwards them to the future. See [this issue](https://github.com/bshoshany/thread-pool/issues/14). * Eliminated compiler warnings that appeared when using the `-Weffc++` flag in GCC. See [this pull request](https://github.com/bshoshany/thread-pool/pull/17). ### v1.7 (2021-06-02) * Fixed a bug in `parallelize_loop()` which prevented it from actually running loops in parallel, see [this issue](https://github.com/bshoshany/thread-pool/issues/11). ### v1.6 (2021-05-26) * Since MSVC does not interpret `and` as `&&` by default, the previous release did not compile with MSVC unless the `/permissive-` or `/Za` compiler flags were used. This has been fixed in this version, and the code now successfully compiles with GCC, Clang, and MSVC. See [this pull request](https://github.com/bshoshany/thread-pool/pull/10). ### v1.5 (2021-05-07) * This library now has a DOI for citation purposes. Information on how to cite it in publications has been added to the source code and to `README.md`. * Added GitHub badges to `README.md`. ### v1.4 (2021-05-05) * Added three new public member functions to monitor the tasks submitted to the pool: * `get_tasks_queued()` gets the number of tasks currently waiting in the queue to be executed by the threads. * `get_tasks_running()` gets the number of tasks currently being executed by the threads. * `get_tasks_total()` gets the total number of unfinished tasks - either still in the queue, or running in a thread. * Note that `get_tasks_running() == get_tasks_total() - get_tasks_queued()`. * Renamed the private member variable `tasks_waiting` to `tasks_total` to make its purpose clearer. * Added an option to temporarily pause the workers: * When public member variable `paused` is set to `true`, the workers temporarily stop popping new tasks out of the queue, although any tasks already executed will keep running until they are done. Set to `false` again to resume popping tasks. * While the workers are paused, `wait_for_tasks()` will wait for the running tasks instead of all tasks (otherwise it would wait forever). * By utilizing the new pausing mechanism, `reset()` can now change the number of threads on-the-fly while there are still tasks waiting in the queue. The new thread pool will resume executing tasks from the queue once it is created. * `parallelize_loop()` and `wait_for_tasks()` now have the same behavior as the worker function with regards to waiting for tasks to complete. If the relevant tasks are not yet complete, then before checking again, they will sleep for `sleep_duration` microseconds, unless that variable is set to zero, in which case they will call `std::this_thread::yield()`. This should improve performance and reduce CPU usage. * Merged [this commit](https://github.com/bshoshany/thread-pool/pull/8): Fixed weird error when using MSVC and including `windows.h`. * The `README.md` file has been reorganized and expanded. ### v1.3 (2021-05-03) * Fixed [this issue](https://github.com/bshoshany/thread-pool/issues/3): Removed `std::move` from the `return` statement in `push_task()`. This previously generated a `-Wpessimizing-move` warning in Clang. The assembly code generated by the compiler seems to be the same before and after this change, presumably because the compiler eliminates the `std::move` automatically, but this change gets rid of the Clang warning. * Fixed [this issue](https://github.com/bshoshany/thread-pool/issues/5): Removed a debugging message printed to `std::cout`, which was left in the code by mistake. * Fixed [this issue](https://github.com/bshoshany/thread-pool/issues/6): `parallelize_loop()` no longer sends references for the variables `start` and `stop` when calling `push_task()`, which may lead to undefined behavior. * A companion paper is now published at arXiv:2105.00613, including additional information such as performance tests on systems with up to 80 hardware threads. The `README.md` has been updated, and it is now roughly identical in content to the paper. ### v1.2 (2021-04-29) * The worker function, which controls the execution of tasks by each thread, now sleeps by default instead of yielding. Previously, when the worker could not find any tasks in the queue, it called `std::this_thread::yield()` and then tried again. However, this caused the workers to have high CPU usage when idle, [as reported by some users](https://github.com/bshoshany/thread-pool/issues/1). Now, when the worker function cannot find a task to run, it instead sleeps for a duration given by the public member variable `sleep_duration` (in microseconds) before checking the queue again. The default value is `1000` microseconds, which I found to be optimal in terms of both CPU usage and performance, but your own optimal value may be different. * If the constructor is called with an argument of zero for the number of threads, then the default value, `std::thread::hardware_concurrency()`, is used instead. * Added a simple helper class, `timer`, which can be used to measure execution time for benchmarking purposes. * Improved and expanded the documentation. ### v1.1 (2021-04-24) * Cosmetic changes only. Fixed a typo in the Doxygen comments and added a link to the GitHub repository. ### v1.0 (2021-01-15) * Initial release. thread-pool-5.1.0/CITATION.bib000066400000000000000000000007431512633616700156410ustar00rootroot00000000000000@article{Shoshany2024_ThreadPool, archiveprefix = {arXiv}, author = {Barak Shoshany}, doi = {10.1016/j.softx.2024.101687}, eprint = {2105.00613}, journal = {SoftwareX}, pages = {101687}, title = {{A C++17 Thread Pool for High-Performance Scientific Computing}}, url = {https://www.sciencedirect.com/science/article/pii/S235271102400058X}, volume = {26}, year = {2024} } thread-pool-5.1.0/CITATION.cff000066400000000000000000000014631512633616700156430ustar00rootroot00000000000000authors: - email: baraksh@gmail.com family-names: Shoshany given-names: Barak orcid: https://orcid.org/0000-0003-2222-127X cff-version: 1.2.0 doi: 10.1016/j.softx.2024.101687 license: MIT message: If you use this library in published research, please cite it as follows. preferred-citation: authors: - family-names: Shoshany given-names: Barak doi: 10.1016/j.softx.2024.101687 journal: SoftwareX start: 101687 title: A C++17 Thread Pool for High-Performance Scientific Computing type: article url: https://www.sciencedirect.com/science/article/pii/S235271102400058X volume: 26 year: 2024 repository-code: https://github.com/bshoshany/thread-pool title: A C++17 Thread Pool for High-Performance Scientific Computing type: software url: https://github.com/bshoshany/thread-pool thread-pool-5.1.0/LICENSE.txt000066400000000000000000000020641512633616700155720ustar00rootroot00000000000000MIT License Copyright (c) 2021-2026 Barak Shoshany Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. thread-pool-5.1.0/README.md000066400000000000000000007134131512633616700152350ustar00rootroot00000000000000[![Author: Barak Shoshany](https://img.shields.io/badge/author-Barak_Shoshany-009933)](https://baraksh.com/) [![DOI: 10.1016/j.softx.2024.101687](https://img.shields.io/badge/DOI-10.1016%2Fj.softx.2024.101687-b31b1b)](https://doi.org/10.1016/j.softx.2024.101687) [![arXiv:2105.00613](https://img.shields.io/badge/arXiv-2105.00613-b31b1b)](https://arxiv.org/abs/2105.00613) [![License: MIT](https://img.shields.io/github/license/bshoshany/thread-pool)](https://github.com/bshoshany/thread-pool/blob/master/LICENSE.txt) [![Language: C++17 / C++20 / C++23](https://img.shields.io/badge/Language-C%2B%2B17%20%2F%20C%2B%2B20%20%2F%20C%2B%2B23-yellow)](https://cppreference.com/) [![GitHub stars](https://img.shields.io/github/stars/bshoshany/thread-pool?style=flat&color=009999)](https://github.com/bshoshany/thread-pool/stargazers) [![GitHub forks](https://img.shields.io/github/forks/bshoshany/thread-pool?style=flat&color=009999)](https://github.com/bshoshany/thread-pool/forks) [![GitHub release](https://img.shields.io/github/v/release/bshoshany/thread-pool?color=660099)](https://github.com/bshoshany/thread-pool/releases) [![Vcpkg version](https://img.shields.io/vcpkg/v/bshoshany-thread-pool?color=6600ff)](https://vcpkg.io/) [![Meson WrapDB](https://img.shields.io/badge/dynamic/json?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmesonbuild%2Fwrapdb%2Fmaster%2Freleases.json&query=%24%5B%22bshoshany-thread-pool%22%5D.versions%5B0%5D&label=wrapdb&color=6600ff)](https://mesonbuild.com/Wrapdb-projects.html) [![Conan version](https://img.shields.io/conan/v/bshoshany-thread-pool?color=6600ff)](https://conan.io/center/recipes/bshoshany-thread-pool) [![Open in Visual Studio Code](https://img.shields.io/badge/Open_in_Visual_Studio_Code-007acc)](https://vscode.dev/github/bshoshany/thread-pool) # `BS::thread_pool`: a fast, lightweight, modern, and easy-to-use C++17 / C++20 / C++23 thread pool library By **Barak Shoshany**\ Email: \ Website: \ GitHub: This is the complete documentation for **v5.1.0** of the library, released on **2026-01-03**. * [Introduction](#introduction) * [Motivation](#motivation) * [Overview of features](#overview-of-features) * [Getting started](#getting-started) * [Installing the library](#installing-the-library) * [Compiling and compatibility](#compiling-and-compatibility) * [Constructors](#constructors) * [Getting and resetting the number of threads in the pool](#getting-and-resetting-the-number-of-threads-in-the-pool) * [Submitting tasks to the queue](#submitting-tasks-to-the-queue) * [Submitting tasks with no arguments and receiving a future](#submitting-tasks-with-no-arguments-and-receiving-a-future) * [Submitting tasks with arguments and receiving a future](#submitting-tasks-with-arguments-and-receiving-a-future) * [Detaching and waiting for tasks](#detaching-and-waiting-for-tasks) * [Waiting for submitted or detached tasks with a timeout](#waiting-for-submitted-or-detached-tasks-with-a-timeout) * [Class member functions as tasks](#class-member-functions-as-tasks) * [Parallelizing loops](#parallelizing-loops) * [Automatic parallelization of loops](#automatic-parallelization-of-loops) * [Optimizing the number of blocks](#optimizing-the-number-of-blocks) * [Common index types](#common-index-types) * [Parallelizing loops without futures](#parallelizing-loops-without-futures) * [Parallelizing individual indices vs. blocks](#parallelizing-individual-indices-vs-blocks) * [Loops with return values](#loops-with-return-values) * [Parallelizing sequences](#parallelizing-sequences) * [More about `BS::multi_future`](#more-about-bsmulti_future) * [Submitting tasks in bulk without a loop](#submitting-tasks-in-bulk-without-a-loop) * [Utility classes](#utility-classes) * [Synchronizing printing to a stream with `BS::synced_stream`](#synchronizing-printing-to-a-stream-with-bssynced_stream) * [Managing tasks](#managing-tasks) * [Monitoring the tasks](#monitoring-the-tasks) * [Purging tasks](#purging-tasks) * [Exception handling](#exception-handling) * [Getting information about the current thread](#getting-information-about-the-current-thread) * [Thread initialization functions](#thread-initialization-functions) * [Thread cleanup functions](#thread-cleanup-functions) * [Passing task arguments by constant reference](#passing-task-arguments-by-constant-reference) * [Optional features](#optional-features) * [Enabling features](#enabling-features) * [Setting task priority](#setting-task-priority) * [Pausing the pool](#pausing-the-pool) * [Avoiding wait deadlocks](#avoiding-wait-deadlocks) * [Native extensions](#native-extensions) * [Enabling the native extensions](#enabling-the-native-extensions) * [Setting thread priority](#setting-thread-priority) * [Setting thread affinity](#setting-thread-affinity) * [Setting thread names](#setting-thread-names) * [Setting process priority](#setting-process-priority) * [Setting process affinity](#setting-process-affinity) * [Accessing native thread handles](#accessing-native-thread-handles) * [Testing the library](#testing-the-library) * [Automated tests](#automated-tests) * [Performance tests](#performance-tests) * [Finding the version of the library](#finding-the-version-of-the-library) * [Importing the library as a C++20 module](#importing-the-library-as-a-c20-module) * [Compiling the module](#compiling-the-module) * [Compiling with `compile_cpp.py` using `import BS.thread_pool`](#compiling-with-compile_cpppy-using-import-bsthread_pool) * [Compiling with Clang using `import BS.thread_pool`](#compiling-with-clang-using-import-bsthread_pool) * [Compiling with GCC using `import BS.thread_pool`](#compiling-with-gcc-using-import-bsthread_pool) * [Compiling with MSVC using `import BS.thread_pool`](#compiling-with-msvc-using-import-bsthread_pool) * [Compiling with CMake using `import BS.thread_pool`](#compiling-with-cmake-using-import-bsthread_pool) * [Importing the C++23 Standard Library as a module](#importing-the-c23-standard-library-as-a-module) * [Enabling `import std`](#enabling-import-std) * [Compiling with `compile_cpp.py` using `import std`](#compiling-with-compile_cpppy-using-import-std) * [Compiling with Clang and LLVM libc++ using `import std`](#compiling-with-clang-and-llvm-libc-using-import-std) * [Compiling with GCC and GNU libstdc++ using `import std`](#compiling-with-gcc-and-gnu-libstdc-using-import-std) * [Compiling with MSVC and Microsoft STL using `import std`](#compiling-with-msvc-and-microsoft-stl-using-import-std) * [Compiling with CMake using `import std`](#compiling-with-cmake-using-import-std) * [Installing the library using package managers](#installing-the-library-using-package-managers) * [Installing using vcpkg](#installing-using-vcpkg) * [Installing using Meson](#installing-using-meson) * [Installing using Conan](#installing-using-conan) * [Installing using CMake with CPM](#installing-using-cmake-with-cpm) * [Installing using CMake with `FetchContent`](#installing-using-cmake-with-fetchcontent) * [Complete library reference](#complete-library-reference) * [The `BS::thread_pool` class template](#the-bsthread_pool-class-template) * [Optional features and the template parameter](#optional-features-and-the-template-parameter) * [The `BS::this_thread` class](#the-bsthis_thread-class) * [The native extensions](#the-native-extensions) * [The `BS::multi_future` class](#the-bsmulti_future-class) * [The `BS::synced_stream` class](#the-bssynced_stream-class) * [The `BS::version` class](#the-bsversion-class) * [Diagnostic variables](#diagnostic-variables) * [All names exported by the C++20 module](#all-names-exported-by-the-c20-module) * [Development tools](#development-tools) * [The `compile_cpp.py` script](#the-compile_cpppy-script) * [Visual Studio Code tasks](#visual-studio-code-tasks) * [About the project](#about-the-project) * [Bug reports and feature requests](#bug-reports-and-feature-requests) * [Contribution and pull request policy](#contribution-and-pull-request-policy) * [Starring the repository](#starring-the-repository) * [Acknowledgements](#acknowledgements) * [Copyright and citing](#copyright-and-citing) * [About the author](#about-the-author) * [Learning more about C++](#learning-more-about-c) * [Other projects to check out](#other-projects-to-check-out) ## Introduction ### Motivation Multithreading is essential for modern high-performance computing. Since C++11, the C++ standard library has included built-in low-level multithreading support using constructs such as `std::thread`. However, `std::thread` creates a new thread each time it is called, which can have a significant performance overhead. Furthermore, it is possible to create more threads than the hardware can handle simultaneously, potentially resulting in a substantial slowdown. The library presented here contains a C++ thread pool class, `BS::thread_pool`, which avoids these issues by creating a fixed pool of threads once and for all, and then continuously reusing the same threads to perform different tasks throughout the lifetime of the program. By default, the number of threads in the pool is equal to the maximum number of threads that the hardware can run in parallel. The user submits tasks to be executed into a queue. Whenever a thread becomes available, it retrieves the next task from the queue and executes it. The pool optionally produces an `std::future` for each task, which allows the user to wait for the task to finish executing and/or obtain its eventual return value, if applicable. Threads and tasks are autonomously managed by the pool in the background, without requiring any input from the user aside from submitting the desired tasks. The design of this library is guided by four important principles. First, *compactness*: the entire library consists of just one self-contained header file, with no other components or dependencies. Second, *portability*: the library only utilizes the C++ standard library, without relying on any compiler extensions or 3rd-party libraries, and is therefore compatible with any modern standards-conforming C++ compiler on any platform, as long as it supports C++17 or later. Third, *ease of use*: the library is extensively documented, and programmers of any level should be able to use it right out of the box. The fourth and final guiding principle is *performance*: each and every line of code in this library was carefully designed with maximum performance in mind, and performance was tested and verified on a variety of compilers and platforms. Indeed, the library was originally designed for use in the author's own computationally-intensive scientific computing projects, running both on high-end desktop/laptop computers and high-performance computing nodes. Among the available C++ thread pool libraries, `BS::thread_pool` occupies the crucial middle ground between small bare-bones thread pool classes that offer rudimentary functionality and are only suitable for simple programs, and very large libraries that offer many advanced features but consist of multiple components and dependencies and involve complex APIs that require a substantial time investment to learn. `BS::thread_pool` was designed for users who want a simple and lightweight header-only library that is easy to learn and use, and can be readily incorporated into existing or new projects, but do not want to compromise on performance or functionality. Obtaining the library is quick and easy; it can be downloaded manually from [the GitHub repository](https://github.com/bshoshany/thread-pool), or installed automatically using a variety of package managers and build systems. The library can be imported either as a traditional [header-only library](#installing-the-library), or as a modern [C++20 module](#importing-the-library-as-a-c20-module). `BS::thread_pool` has undergone extensive testing on multiple platforms and is actively used by thousands of C++ developers worldwide for a wide range of applications, from scientific computing to game development. ### Overview of features * **Fast:** * Built from scratch with [maximum performance](#performance-tests) in mind. * Suitable for use in high-performance computing nodes with a very large number of CPU cores. * Reusing threads avoids the overhead of creating and destroying them for individual tasks. * A task queue ensures that there are never more tasks running in parallel than is allowed by the hardware. * All optional features can be selectively turned on to ensure minimal overhead. * **Lightweight:** * Single header file: simply [`#include "BS_thread_pool.hpp"`](#installing-the-library) and you're all set! * Header-only: no need to install or build the library. * Self-contained: no external requirements or dependencies. * Portable: uses only the C++ standard library, and works with any C++17-compliant compiler on any platform. * Only 536 lines of code, including all optional features and utility classes (excluding comments, blank lines, lines containing only a single brace, C++17/20 polyfills, and native extensions). * **Modern:** * Fully supports C++17, C++20, and C++23, taking advantage of the latest language features when available for maximum performance, reliability, and usability. * In C++20, the library can be imported as a C++20 module using [`import BS.thread_pool`](#importing-the-library-as-a-c20-module), with many benefits, such as faster compilation times and avoiding namespace pollution. * In C++23, the library can import the C++ Standard Library as a module using [`import std`](#importing-the-c23-standard-library-as-a-module) on supported compilers and platforms. * Makes use of modern C++ programming practices for readability, maintainability, performance, safety, portability, and reliability. * **Easy to use:** * Very simple operation, using only a handful of member functions for basic use, with many additional member functions, classes, and functions for more advanced use. * Every task submitted to the queue using [`submit_task()`](#submitting-tasks-to-the-queue) automatically generates an `std::future`, which can be used to wait for the task to finish executing, obtain its eventual return value, and/or catch any thrown exceptions. * Loops can be automatically parallelized into any number of tasks using [`submit_loop()`](#parallelizing-loops), which returns a [`BS::multi_future`](#more-about-bsmulti_future) that can be used to track the execution of all parallel tasks at once. * If futures are not needed, tasks may be submitted using [`detach_task()`](#detaching-and-waiting-for-tasks), and loops can be parallelized using [`detach_loop()`](#parallelizing-loops-without-futures) - sacrificing convenience for even greater performance. In that case, `wait()`, `wait_for()`, and `wait_until()` can be used to wait for all the tasks in the queue to complete. * Extremely thorough and detailed documentation, with numerous examples, is available in the library's [`README.md` file](https://github.com/bshoshany/thread-pool/blob/master/README.md), with a total of 3,486 lines and 26,700 words! * The code is thoroughly documented using Doxygen comments - not only the interface, but also the implementation, in case the user would like to make modifications. * Optionally, the included Python script [`compile_cpp.py`](#the-compile_cpppy-script) can be used to easily compile any programs that are using the library, with full support for C++20 modules and C++23 Standard Library modules where applicable. * **Additional features:** * Get the current thread count of the pool using [`get_thread_count()`](#getting-and-resetting-the-number-of-threads-in-the-pool). * Change the number of threads in the pool safely and on-the-fly as needed using [`reset()`](#getting-and-resetting-the-number-of-threads-in-the-pool). * Monitor the number of queued and/or running tasks using [`get_tasks_queued()`, `get_tasks_running()`, and `get_tasks_total()`](#monitoring-the-tasks). * Purge all tasks currently waiting in the queue with [`purge()`](#purging-tasks). * Run an [initialization function](#thread-initialization-functions) in each thread before it starts to execute any submitted tasks, by passing it to the `BS::thread_pool` constructor. * Run a cleanup function in each thread right before it is destroyed, using [`set_cleanup_func()`](#thread-cleanup-functions). * Assume lower-level control of parallelized loops using [`detach_blocks()` and `submit_blocks()`](#parallelizing-individual-indices-vs-blocks). * Parallelize a sequence of tasks enumerated by indices to the queue using [`detach_sequence()` and `submit_sequence()`](#parallelizing-sequences). * Submit tasks in bulk from a container or iterator range using [`detach_bulk()` and `submit_bulk()`](#submitting-tasks-in-bulk-without-a-loop). * Get [information about the current thread](#getting-information-about-the-current-thread): the pool index using `BS::this_thread::get_index()` and a pointer to the owning pool using `BS::this_thread::get_pool()`. * Get the unique thread IDs for all threads in the pool using [`get_thread_ids()`](#getting-and-resetting-the-number-of-threads-in-the-pool). * Synchronize output to one or more streams from multiple threads in parallel using the [`BS::synced_stream`](#synchronizing-printing-to-a-stream-with-bssynced_stream) utility class. * **Optional features:** * [Optional features](#enabling-features) can be enabled by passing a bitmask template parameter to the `BS::thread_pool` class template. * Assign a priority to each task using the optional [task priority](#setting-task-priority) feature. The priority, in the range -128 to +127, is passed as the last argument to all `submit` and `detach` member functions. Tasks with higher priorities will be executed first. * Freely pause and resume the pool using `pause()`, `unpause()`, and `is_paused()` with the optional [pausing](#pausing-the-pool) feature. When paused, threads do not retrieve new tasks out of the queue. * Avoid deadlocks using the optional [wait deadlock checks](#avoiding-wait-deadlocks) feature. If a deadlock is detected while waiting for tasks, the pool will throw the exception `BS::wait_deadlock`. * **Native extensions:** * The library includes optional [native extensions](#native-extensions), which contain non-portable features using the operating system's native API, enabled by defining the macro `BS_THREAD_POOL_NATIVE_EXTENSIONS` at compilation time. This feature should work on most Windows, Linux, and macOS systems. * Use [`BS::this_thread::get_os_thread_priority()` and `BS::this_thread::set_os_thread_priority()`](#setting-thread-priority) to get and set the priority of the current thread. * Use [`BS::this_thread::get_os_thread_affinity()` and `BS::this_thread::set_os_thread_affinity()`](#setting-thread-affinity) to get and set the processor affinity of the current thread. * Use [`BS::this_thread::get_os_thread_name()` and `BS::this_thread::set_os_thread_name()`](#setting-thread-names) to get and set the name of the current thread. * Use [`BS::get_os_process_priority()` and `BS::set_os_process_priority()`](#setting-process-priority) to get and set the priority of the current process. * Use [`BS::get_os_process_affinity()` and `BS::set_os_process_affinity()`](#setting-process-affinity) to get and set the processor affinity of the current process. * Get the implementation-defined thread handles for all threads in the pool using [`get_native_handles()`](#accessing-native-thread-handles). * **Well-tested:** * The included test program [`BS_thread_pool_test.cpp`](#testing-the-library) performs hundreds of automated tests, and also serves as a comprehensive example of how to properly use the library. * The test program also performs [benchmarks](#performance-tests) using a highly-optimized multithreaded algorithm which generates a plot of the Mandelbrot set. * The included Python script [`compile_cpp.py`](#the-compile_cpppy-script) provides a portable way to automatically run the tests with all available compilers with one command. * [Compatibility](#compiling-and-compatibility) is comprehensively tested on the latest versions of Windows, Ubuntu, and macOS, using Clang, GCC, and MSVC. * Under continuous and active development. Bug reports and feature requests are welcome, and should be made via [GitHub issues](https://github.com/bshoshany/thread-pool/issues). ## Getting started ### Installing the library To install `BS::thread_pool`, simply download the [latest release](https://github.com/bshoshany/thread-pool/releases) from [the GitHub repository](https://github.com/bshoshany/thread-pool), place the header file `BS_thread_pool.hpp` from the `include` folder in the desired folder, and include it in your program: ```cpp #include "BS_thread_pool.hpp" ``` The thread pool will now be accessible via the `BS::thread_pool` class. For an even quicker installation, you can download the header file itself directly [at this URL](https://raw.githubusercontent.com/bshoshany/thread-pool/master/include/BS_thread_pool.hpp); no additional files are required, as the library is a single-header library. This library is also available on various package managers and build systems, including [vcpkg](https://vcpkg.io/), [Conan](https://conan.io/), [Meson](https://mesonbuild.com/), and [CMake](https://cmake.org/). Please [see below](#installing-the-library-using-package-managers) for more details. If C++20 features are available, the library can also be imported as a C++20 module, in which case `#include "BS_thread_pool.hpp"` should be replaced with `import BS.thread_pool;`. This requires one additional file, and the module must be compiled before it can be used; please see detailed instructions [below](#importing-the-library-as-a-c20-module). ### Compiling and compatibility This library officially supports C++17, C++20, and C++23. If compiled with C++20 and/or C++23 support, the library will make use of newly available features for maximum performance and usability. However, the library is fully compatible with C++17, and should successfully compile on any C++17 standard-compliant compiler, on all operating systems and architectures for which such a compiler is available. Compatibility was verified using the bundled test program [`BS_thread_pool_test.cpp`](#testing-the-library), compiled using the bundled Python script [`compile_cpp.py`](#the-compile_cpppy-script) with native extensions enabled, importing the library [as a C++20 module](#importing-the-library-as-a-c20-module) where applicable, and importing the [C++23 Standard Library as a module](#importing-the-c23-standard-library-as-a-module) where applicable, on a 24-core (8P+16E) / 32-thread Intel i9-13900K CPU, using the following compilers, C++ standard libraries, and platforms: * Windows 11 25H2 build 26200.7462: * [Clang](https://clang.llvm.org/) v21.1.8 with LLVM libc++ v21.1.8 ([MSYS2 build](https://www.msys2.org/)) * [GCC](https://gcc.gnu.org/) v15.2.0 with GNU libstdc++ v15 (20250808) ([MSYS2 build](https://www.msys2.org/)) * [MSVC](https://docs.microsoft.com/en-us/cpp/) v19.50.35721 with Microsoft STL v145 (202508). * Ubuntu 25.10: * [Clang](https://clang.llvm.org/) v21.1.8 with LLVM libc++ v21.1.8 * [GCC](https://gcc.gnu.org/) v15.2.0 with GNU libstdc++ v15 (20250917) * macOS 15.1 build 24B83: * [Clang](https://clang.llvm.org/) v21.1.8 with LLVM libc++ v21.1.8 ([Homebrew build](https://formulae.brew.sh/formula/llvm)) * Note: Apple Clang is currently not officially supported, as it does not support C++20 modules. As this library requires C++17 features, the code must be compiled with C++17 support: * For Clang or GCC, use the `-std=c++17` flag. * For MSVC, use `/std:c++17`, and also `/permissive-` to ensure standards conformance. For maximum performance, it is recommended to compile with all available compiler optimizations: * For Clang or GCC, use the `-O3` flag. * For MSVC, use `/O2`. As an example, to compile the test program `BS_thread_pool_test.cpp` with compiler optimizations, first create the `build` folder using `mkdir build`, and then run the following command in the root folder of the repository: * Windows: * GCC: `g++ -std=c++17 -O3 -I include tests/BS_thread_pool_test.cpp -o build/BS_thread_pool_test.exe` * Clang: `clang++ -std=c++17 -O3 -I include tests/BS_thread_pool_test.cpp -o build/BS_thread_pool_test.exe` * MSVC: `cl /std:c++17 /permissive- /O2 /EHsc /I include tests/BS_thread_pool_test.cpp /Fo:build/BS_thread_pool_test.obj /Fe:build/BS_thread_pool_test.exe` (in the Visual Studio Developer PowerShell for your CPU architecture) * Linux/macOS: * GCC: `g++ -std=c++17 -O3 -I include tests/BS_thread_pool_test.cpp -o build/BS_thread_pool_test` * Clang: `clang++ -std=c++17 -O3 -I include tests/BS_thread_pool_test.cpp -o build/BS_thread_pool_test` If your compiler and codebase support C++20 and/or C++23, it is recommended to enable them in order to allow the thread pool library access to the latest features: * For Clang or GCC, use the `-std=c++20` or `-std=c++23` flag. * For MSVC, use `/std:c++20` for C++20 or `/std:c++latest` for C++23. In addition, if C++20 features are available, the library can be imported as a module; instructions for doing so are provided [below](#importing-the-library-as-a-c20-module). ### Constructors The default constructor creates a thread pool with as many threads as the hardware can handle concurrently, as reported by the implementation via `std::thread::hardware_concurrency()`. This is usually determined by the number of cores in the CPU. If a core is hyperthreaded, it will count as two threads. For example: ```cpp // Constructs a thread pool with as many threads as are available in the hardware. BS::thread_pool pool; ``` Optionally, a number of threads different from the hardware concurrency can be specified as an argument to the constructor. However, note that adding more threads than the hardware can handle will **not** improve performance, and in fact will most likely hinder it. This option exists in order to allow using **fewer** threads than the hardware concurrency, in cases where you wish to leave some threads available for other processes. For example: ```cpp // Constructs a thread pool with only 12 threads. BS::thread_pool pool(12); ``` Usually, when the thread pool is used, a program's main thread should only submit tasks to the thread pool and wait for them to finish, and should not perform any computationally intensive tasks on its own. If this is the case, it is recommended to use the default value for the number of threads. This ensures that all the threads available in the hardware will be put to work while the main thread waits. However, if the main thread also performs computationally intensive tasks, it may be beneficial to use one fewer thread than the hardware concurrency, leaving one hardware thread available for the main thread. Furthermore, if more than one thread pool is used in the program simultaneously, the total number of threads across all pools should not exceed the hardware concurrency. Note: If the [native extensions](#native-extensions) are enabled, a pool created with the default constructor will only use the number of threads available to the process, as obtained from [`BS::get_os_process_affinity()`](#setting-process-affinity), which can be less than the number of hardware threads. ### Getting and resetting the number of threads in the pool The member function `get_thread_count()` returns the number of threads in the pool. This will be equal to `std::thread::hardware_concurrency()` if the default constructor was used (or to [`BS::get_os_process_affinity()`](#setting-process-affinity) if the [native extensions](#native-extensions) are enabled). It is generally unnecessary to change the number of threads in the pool after it has been created, since the whole point of a thread pool is that you only create the threads once. However, if needed, this can be done, safely and on-the-fly, using the `reset()` member function. `reset()` will wait for all tasks to be completed, both those that are currently running in the threads and those that are still waiting in the queue. Then it will destroy the thread pool and create a new one with the desired new number of threads, as specified in the function's argument (if no argument is given, it behaves like the default constructor), with an empty task queue. If pausing is enabled ([see below](#pausing-the-pool)), `reset()` will only wait for tasks that are currently running before destroying the pool; once the pool is reset, it will then resume executing the tasks that remained in the queue and any newly submitted tasks. If the pool was paused before resetting it, the new pool will be paused as well. `reset()` can also be used to change the thread initialization function ([see below](#thread-initialization-functions)). The member function `get_thread_ids()` returns a vector containing the unique identifiers for each of the pool's threads, as obtained by `std::thread::get_id()`. These values are not so useful on their own, but can be used to identify and distinguish between threads, or for allocating resources. ## Submitting tasks to the queue ### Submitting tasks with no arguments and receiving a future In this section we will learn how to submit a task with no arguments, but potentially with a return value, to the queue. Once a task has been submitted, it will be executed as soon as a thread becomes available. Tasks are executed in the order that they were submitted (first-in, first-out), unless task priority is enabled ([see below](#setting-task-priority)). For example, if the pool has 8 threads and an empty queue, and we submitted 16 tasks, then we should expect the first 8 tasks to be executed in parallel, with the remaining tasks being picked up by the threads one by one as each thread finishes executing its first task, until no tasks are left in the queue. The member function `submit_task()` is used to submit tasks to the queue. It takes exactly one input, the task to submit. This task must be a function with no arguments, but it can have a return value. `submit_task()` returns an `std::future` associated to the task. If the submitted task has a return value of type `T`, then the future will be of type `std::future`, and will be set to the task's return value when the task finishes its execution. If the submitted task does not have a return value, then the future will be an `std::future`, which will not contain any value, but may still be used to wait for the task to finish. To wait until the task finishes, use the member function `wait()` of the future. To obtain the return value, use the member function `get()`, which will also automatically wait for the task to finish if it hasn't yet. Here is a simple example: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::future #include // std::cout int the_answer() { return 42; } int main() { BS::thread_pool pool; std::future my_future = pool.submit_task(the_answer); std::cout << my_future.get() << '\n'; } ``` In this example we submitted the function `the_answer()`, which returns an `int`. The member function `submit_task()` of the pool therefore returned an `std::future`. We then used the `get()` member function of the future to get the return value, and printed it out. In addition to submitting a pre-defined function, we can also use a [lambda expression](https://en.cppreference.com/w/cpp/language/lambda) to quickly define the task on-the-fly. Rewriting the previous example in terms of a lambda expression, we get: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::future #include // std::cout int main() { BS::thread_pool pool; std::future my_future = pool.submit_task([]{ return 42; }); std::cout << my_future.get() << '\n'; } ``` Here, the lambda expression `[]{ return 42; }` has two parts: 1. An empty capture clause, denoted by `[]`. This signifies to the compiler that a lambda expression is being defined. 2. A code block `{ return 42; }` that simply returns the value `42`. It is generally simpler and faster to submit lambda expressions rather than pre-defined functions, especially due to the ability to capture local variables, which we will discuss in the next section. Of course, tasks do not have to return values. In the following example, we submit a function with no return value and then using the future to wait for it to finish executing: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::chrono #include // std::future #include // std::cout #include // std::this_thread int main() { BS::thread_pool pool; const std::future my_future = pool.submit_task( [] { std::this_thread::sleep_for(std::chrono::milliseconds(500)); }); std::cout << "Waiting for the task to complete... "; my_future.wait(); std::cout << "Done." << '\n'; } ``` Here we split the lambda into multiple lines to make it more readable. The command `std::this_thread::sleep_for(std::chrono::milliseconds(500))` instructs the task to simply sleep for 500 milliseconds, simulating a computationally-intensive task. ### Submitting tasks with arguments and receiving a future As stated in the previous section, tasks submitted using `submit_task()` cannot have any arguments. However, it is easy to submit tasks with arguments either by wrapping the function in a lambda or using lambda captures directly. The following is an example of submitting a pre-defined function with arguments by wrapping it in a lambda: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::future #include // std::cout double multiply(const double lhs, const double rhs) { return lhs * rhs; } int main() { BS::thread_pool pool; std::future my_future = pool.submit_task( [] { return multiply(6, 7); }); std::cout << my_future.get() << '\n'; } ``` As you can see, to pass the arguments to `multiply()` we simply called `multiply(6, 7)` explicitly inside a lambda. If the arguments are not literals, we can use the lambda capture clause to capture the arguments from the local scope: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::future #include // std::cout double multiply(const double lhs, const double rhs) { return lhs * rhs; } int main() { BS::thread_pool pool; constexpr double first = 6; constexpr double second = 7; std::future my_future = pool.submit_task( [first, second] { return multiply(first, second); }); std::cout << my_future.get() << '\n'; } ``` We could even get rid of the `multiply()` function entirely and just put everything inside a lambda, if desired: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::future #include // std::cout int main() { BS::thread_pool pool; constexpr double first = 6; constexpr double second = 7; std::future my_future = pool.submit_task( [first, second] { return first * second; }); std::cout << my_future.get() << '\n'; } ``` ### Detaching and waiting for tasks Usually, it is best to submit a task to the queue using `submit_task()`. This allows you to wait for the task to finish and/or get its return value later. However, sometimes a future is not needed, for example when you just want to "set and forget" a certain task, or if the task already communicates with the main thread or with other tasks without using futures, such as via condition variables. In such cases, you may wish to avoid the overhead involved in assigning a future to the task, in order to increase performance. This is called "detaching" the task, as the task detaches from the main thread and runs independently. Detaching tasks is done using the `detach_task()` member function, which allows you to detach a task to the queue without generating a future for it. As with `submit_task()`, the task must have no arguments, but you can pass arguments by wrapping it in a lambda, as shown in the previous section. However, tasks executed via `detach_task()` cannot have a return value, as there would be no way for the main thread to retrieve that value. Since `detach_task()` does not return a future, there is no built-in way for the user to know when the task finishes executing. You must manually ensure that the task finishes executing before trying to use anything that depends on its output. Otherwise, bad things will happen! `BS::thread_pool` provides the member function `wait()` to facilitate waiting for all the tasks in the queue to complete, whether they were detached or submitted with a future. The `wait()` member function works similarly to the `wait()` member function of `std::future`. Consider, for example, the following code: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::chrono #include // std::cout #include // std::this_thread int main() { BS::thread_pool pool; int result = 0; pool.detach_task( [&result] { std::this_thread::sleep_for(std::chrono::milliseconds(100)); result = 42; }); std::cout << result << '\n'; } ``` This program first defines a local variable named `result` and initializes it to `0`. It then detaches a task in the form of a lambda expression. Note that the lambda captures `result` **by reference**, as indicated by the `&` in front of it. This means that the task can modify `result`, and any such modification will be reflected in the main thread. The task changes `result` to `42`, but it first sleeps for 100 milliseconds. When the main thread prints out the value of `result`, the task has not yet had time to modify its value, since it is still sleeping. Therefore, the program will actually print out the initial value `0`, which is not what we want. To wait for the task to complete, we must use the `wait()` member function after detaching it: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::chrono #include // std::cout #include // std::this_thread int main() { BS::thread_pool pool; int result = 0; pool.detach_task( [&result] { std::this_thread::sleep_for(std::chrono::milliseconds(100)); result = 42; }); pool.wait(); std::cout << result << '\n'; } ``` Now the program will print out the value `42`, as expected. Note, however, that `wait()` will wait for **all** the tasks in the queue, including any other tasks that were potentially submitted before or after the one we care about. If we want to wait just for **one** task, `submit_task()` would be a better choice. ### Waiting for submitted or detached tasks with a timeout Sometimes you may wish to wait for the tasks to complete, but only for a certain amount of time, or until a specific point in time. For example, if the tasks have not yet completed after some time, you may wish to let the user know that there is a delay. For tasks submitted with futures using `submit_task()`, this can be achieved using two member functions of `std::future`: * `wait_for()` waits for the task to be completed, but stops waiting after the specified duration, given as an argument of type `std::chrono::duration`, has passed. * `wait_until()` waits for the task to be completed, but stops waiting after the specified time point, given as an argument of type `std::chrono::time_point`, has been reached. In both cases, the functions will return `std::future_status::ready` if the future is ready, meaning the task is finished and its return value, if any, has been obtained. However, they will return `std::future_status::timeout` if the future is not yet ready when the timeout has expired. Here is an example: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::chrono #include // std::future #include // std::cout #include // std::this_thread int main() { BS::thread_pool pool; const std::future my_future = pool.submit_task( [] { std::this_thread::sleep_for(std::chrono::milliseconds(1000)); std::cout << "Task done!\n"; }); while (true) { if (my_future.wait_for(std::chrono::milliseconds(200)) != std::future_status::ready) std::cout << "Sorry, the task is not done yet.\n"; else break; } } ``` The output should look similar to this: ```none Sorry, the task is not done yet. Sorry, the task is not done yet. Sorry, the task is not done yet. Sorry, the task is not done yet. Task done! ``` For detached tasks, since we do not have futures for them, we cannot use this method. However, `BS::thread_pool` has two member functions, also named `wait_for()` and `wait_until()`, which similarly wait for a specified duration or until a specified time point, but do so for **all** tasks (whether submitted or detached). Instead of an `std::future_status`, the thread pool's wait functions return `true` if all tasks finished running, or `false` if the duration expired or the time point was reached but some tasks are still running. Here is the same example as above, using `detach_task()` and `pool.wait_for()`: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::chrono #include // std::cout #include // std::this_thread int main() { BS::thread_pool pool; pool.detach_task( [] { std::this_thread::sleep_for(std::chrono::milliseconds(1000)); std::cout << "Task done!\n"; }); while (true) { if (!pool.wait_for(std::chrono::milliseconds(200))) std::cout << "Sorry, the task is not done yet.\n"; else break; } } ``` ### Class member functions as tasks Let us consider the following program: ```cpp #include // std::boolalpha, std::cout class flag_class { public: [[nodiscard]] bool get_flag() const { return flag; } void set_flag(const bool arg) { flag = arg; } private: bool flag = false; }; int main() { flag_class flag_object; flag_object.set_flag(true); std::cout << std::boolalpha << flag_object.get_flag() << '\n'; } ``` This program creates a new object `flag_object` of the class `flag_class`, sets the flag to `true` using the setter member function `set_flag()`, and then prints out the flag's value using the getter member function `get_flag()`. What if we want to submit the member function `set_flag()` as a task to the thread pool? We can simply wrap the entire statement `flag_object.set_flag(true);` in a lambda, and pass `flag_object` to the lambda by reference, as in the following example: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::boolalpha, std::cout class flag_class { public: [[nodiscard]] bool get_flag() const { return flag; } void set_flag(const bool arg) { flag = arg; } private: bool flag = false; }; int main() { BS::thread_pool pool; flag_class flag_object; pool.submit_task( [&flag_object] { flag_object.set_flag(true); }) .wait(); std::cout << std::boolalpha << flag_object.get_flag() << '\n'; } ``` Of course, this will also work with `detach_task()`, if we call `wait()` on the pool itself instead of on the returned future. Note that in this example, instead of getting a future from `submit_task()` and then waiting for that future, we simply called `wait()` on that future straight away. This is a common way of waiting for a task to complete if we have nothing else to do in the meantime. Note also that we passed `flag_object` by reference to the lambda, since we want to set the flag on that same object, not a copy of it. Another thing you might want to do is call a member function from within the object itself, that is, from another member function. This follows a similar syntax, except that you must also capture `this` (i.e. a pointer to the current object) in the lambda. Here is an example: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::boolalpha, std::cout BS::thread_pool pool; class flag_class { public: [[nodiscard]] bool get_flag() const { return flag; } void set_flag(const bool arg) { flag = arg; } void set_flag_to_true() { pool.submit_task( [this] { set_flag(true); }) .wait(); } private: bool flag = false; }; int main() { flag_class flag_object; flag_object.set_flag_to_true(); std::cout << std::boolalpha << flag_object.get_flag() << '\n'; } ``` Note that in this example we defined the thread pool as a global object, so that it is accessible outside the `main()` function. Although we could have, in theory, passed a reference to the thread pool in our call to `set_flag_to_true()`, that would be very cumbersome to do if multiple different functions need to use the same thread pool. Defining the thread pool as a global object is common practice, as it allows all functions to access the same thread pool without having to pass it around as an argument. ## Parallelizing loops ### Automatic parallelization of loops One of the most common and effective methods of parallelization is splitting a loop into smaller sub-loops and running them in parallel. It is most effective in "embarrassingly parallel" computations, such as vector or matrix operations, where each iteration of the loop is completely independent of every other iteration. For example, if we are summing up two vectors of 1000 elements each, and we have 10 threads, we could split the summation into 10 blocks of 100 elements each, and run all the blocks in parallel, potentially increasing performance by up to a factor of 10. `BS::thread_pool` can automatically parallelize loops, making it very easy to implement many parallel algorithms without having to worry about the details. To see how this works, consider the following generic loop: ```cpp for (T i = start; i < end; ++i) loop(i); ``` where: * `T` is any signed or unsigned integer type. * The loop is over the range `[start, end)`, i.e. inclusive of `start` but exclusive of `end`. * `loop()` is an operation performed for each loop index `i`, such as modifying an array with `end - start` elements. This loop may be automatically parallelized and submitted to the thread pool's queue using the member function `submit_loop()`, which has the following syntax: ```cpp pool.submit_loop(start, end, loop, num_blocks); ``` where: * `start` is the first index in the range. * `end` is the index after the last index in the range, such that the full range is `[start, end)`. In other words, the loop will be equivalent to the generic loop above, but parallelized. Note that if `end <= start`, nothing will happen; the loop cannot go backwards. * `loop()` is the function that should run in every iteration of the loop. It must take exactly one argument, the loop index. It cannot have a return value, as it will be executed multiple times by each task, so a return value would not make sense. * `num_blocks` is the number of blocks of the form `[a, b)` to split the loop into. For example, if the range is `[0, 9)` and there are 3 blocks, then the blocks will be the ranges `[0, 3)`, `[3, 6)`, and `[6, 9)`. This argument can be omitted, in which case the number of blocks will be the number of threads in the pool. The thread pool's internal algorithm ensures that each of the blocks has one of two sizes, differing by 1, with the larger blocks always first, so that the tasks are as evenly distributed as possible, to optimize performance. For example, if the range `[0, 100)` is split into 15 blocks, the result will be 10 blocks of size 7, which will be submitted first, and 5 blocks of size 6. Each block will be submitted to the thread pool's queue as a separate task. Therefore, a loop that is split into 3 blocks will be split into 3 individual tasks, which may run in parallel. If there is only one block, then the entire loop will run as one task, and no parallelization will take place. To parallelize the generic loop above, we use the following commands: ```cpp BS::multi_future loop_future = pool.submit_loop(start, end, loop, num_blocks); loop_future.wait(); ``` `submit_loop()` returns an object of the helper class [`BS::multi_future`](#more-about-bsmulti_future). This is essentially a specialization of `std::vector>` with additional member functions. Each of the `num_blocks` blocks will have an `std::future` assigned to it, and all these futures will be stored inside the returned `BS::multi_future`. When `loop_future.wait()` is called, the main thread will wait until **all** tasks generated by `submit_loop()` finish executing, and **only** those tasks - not any other tasks that also happen to be in the queue. This is essentially the role of the `BS::multi_future` class: to wait for a specific **group of tasks**, in this case the tasks running the loop blocks. As a simple example, the following code calculates and prints a table of squares of all integers from 0 to 99: ```cpp #include // std::size_t #include // std::setw #include // std::cout int main() { constexpr std::size_t max = 100; std::size_t squares[max]; for (std::size_t i = 0; i < max; ++i) squares[i] = i * i; for (std::size_t i = 0; i < max; ++i) std::cout << std::setw(2) << i << "^2 = " << std::setw(4) << squares[i] << ((i % 5 != 4) ? " | " : "\n"); } ``` We can parallelize it as follows: ```cpp #include "BS_thread_pool.hpp" // BS::multi_future, BS::thread_pool #include // std::size_t #include // std::setw #include // std::cout int main() { BS::thread_pool pool(10); constexpr std::size_t max = 100; std::size_t squares[max]; const BS::multi_future loop_future = pool.submit_loop(0, max, [&squares](const std::size_t i) { squares[i] = i * i; }); loop_future.wait(); for (std::size_t i = 0; i < max; ++i) std::cout << std::setw(2) << i << "^2 = " << std::setw(4) << squares[i] << ((i % 5 != 4) ? " | " : "\n"); } ``` Since there are 10 threads, and we omitted the `num_blocks` argument, the loop will be divided into 10 blocks, each calculating 10 squares. As a side note, notice that here we parallelized the calculation of the squares, but we did not parallelize printing the results. This is for two reasons: 1. We want to print out the squares in ascending order, and we have no guarantee that the blocks will be executed in the correct order. This is very important; you must never expect that the parallelized loop will execute at the same order as the non-parallelized loop. 2. If we did print out the squares from within the parallel tasks, we would get a huge mess, since all 10 blocks would print to the standard output at once. [Later](#synchronizing-printing-to-a-stream-with-bssynced_stream) we will see how to synchronize printing to a stream from multiple tasks at the same time. ### Optimizing the number of blocks The most important factor to consider when parallelizing loops is the number of blocks `num_blocks` to split the loop into. Naively, it may seem that the number of blocks should simply be equal to the number of threads in the pool, but that is usually **not** the optimal choice. Inevitably, some blocks will finish before other blocks; if there is only one block per thread, then any threads that have already finished executing their blocks will remain idle until the rest of the blocks are done, wasting many CPU cycles. It is therefore generally better to use a larger number of blocks than the number of threads, to ensure that all threads work at maximum capacity. On the other hand, parallelization with too many blocks will eventually suffer from diminishing returns due to increased overhead. A good rule of thumb is to use a number of blocks equal to the square of the number of threads, but this is not necessarily the optimal number in all cases. In the end, the optimal number of blocks will always depend on the specific algorithm being parallelized and the total number of indices in the loop, and may differ between different compilers, operating systems, and hardware configurations. For best performance, it is strongly recommended to do your own benchmarks to find the optimal number of blocks for your particular use case; see the [benchmarks code in the bundled test program](#performance-tests) for an example of how to do this. Finally, note that the discussion here only pertains to situations where the parallelized loop is the only thing running in the pool. If there are many other tasks running in parallel from other sources, then you probably do not need to worry about idle time, since the threads will be kept busy by the other tasks anyway. ### Common index types Let us now consider a subtlety regarding the types of the start and end indices. In the example [above](#automatic-parallelization-of-loops), the start index is `0`, which is of type `int`, while the end index is `max`, which is of type `std::size_t`. These two types are not compatible, as they are both of different signedness and (on a 64-bit system) of different bit width. In such cases, `submit_loop()` uses a custom type trait `BS::common_index_type` to determine the common type of the indices. The common index type of two signed integers or two unsigned integers is the larger of the integers, while the common index type of a signed and an unsigned integer is a signed integer that can hold the full ranges of both integers. (This is in contrast to [`std::common_type`](https://en.cppreference.com/w/cpp/types/common_type), which would choose the unsigned integer in the latter case, causing a loop with a negative start index and an unsigned end index to fail due to integer overflow.) The exception to this rule is when one of the integers is a 64-bit unsigned integer, and the other is a signed integer (of any bit width), since there is no fundamental signed type that can hold the full ranges of both integers. In this case, we choose a 64-bit unsigned integer as the common index type, since the most common scenario where this might happen is when the indices go from `0` to an index of type `std::size_t` - as in our example in the previous section. However, it is important to note that this will fail if the first index is in fact negative. Therefore, **only** in the edge case where one index is a negative integer and the other is of an unsigned 64-bit integer type such as `std::size_t`, the user must cast both indices explicitly to the desired common type. In all other cases, this is handled automatically behind the scenes using `BS::common_index_type`. ### Parallelizing loops without futures Just as in the case of [`detach_task()`](#detaching-and-waiting-for-tasks) vs. [`submit_task()`](#submitting-tasks-with-no-arguments-and-receiving-a-future), sometimes you may want to parallelize a loop, but you don't need it to return a `BS::multi_future`. In this case, you can save the overhead of generating the futures (which can be significant, depending on the number of blocks) by using `detach_loop()` instead of `submit_loop()`, with the same arguments. For example, we could detach the loop of squares example above as follows: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::size_t #include // std::setw #include // std::cout int main() { BS::thread_pool pool(10); constexpr std::size_t max = 100; std::size_t squares[max]; pool.detach_loop(0, max, [&squares](const std::size_t i) { squares[i] = i * i; }); pool.wait(); for (std::size_t i = 0; i < max; ++i) std::cout << std::setw(2) << i << "^2 = " << std::setw(4) << squares[i] << ((i % 5 != 4) ? " | " : "\n"); } ``` **Warning:** Since `detach_loop()` does not return a `BS::multi_future`, there is no built-in way for the user to know when the loop finishes executing. You must use either [`wait()`](#detaching-and-waiting-for-tasks) as we did here, or some other method such as condition variables, to ensure that the loop finishes executing before trying to use anything that depends on its output. Otherwise, bad things will happen! If the loop is the only thing running in the pool, then generally `detach_loop()` followed by `wait()` is the optimal choice in terms of performance. ### Parallelizing individual indices vs. blocks We have seen that `detach_loop()` and `submit_loop()` execute the function `loop(i)` for each index `i` in the loop. However, behind the scenes, the loop is split into blocks, and each block executes the `loop()` function multiple times. Each block has an internal loop of the form (where `T` is the type of the indices): ```cpp for (T i = start; i < end; ++i) loop(i); ``` The `start` and `end` indices of each block are determined automatically by the pool. For example, in the previous section, the loop from 0 to 100 was split into 10 blocks of 10 indices each: `start = 0` to `end = 10`, `start = 10` to `end = 20`, and so on; the blocks are not inclusive of the last index, since the `for` loop has the condition `i < end` and not `i <= end`. However, this also means that the `loop()` function is executed multiple times per block. This generates additional overhead due to the multiple function calls. For short loops, this should not affect performance. However, for very long loops, with millions of indices, the performance cost may be significant. For this reason, the thread pool library provides two additional member functions for parallelizing loops: `detach_blocks()` and `submit_blocks()`. While `detach_loop()` and `submit_loop()` execute a function `loop(i)` once per index but multiple times per block, `detach_blocks()` and `submit_blocks()` execute a function `block(start, end)` only once per block. The main advantage of this method is increased performance, but the main disadvantage is slightly more complicated code. In particular, the user must define the loop from `start` to `end` manually within each block, ensuring that all the indices in the block are handled. Here is the previous example again, this time using `detach_blocks()`: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::size_t #include // std::setw #include // std::cout int main() { BS::thread_pool pool(10); constexpr std::size_t max = 100; std::size_t squares[max]; pool.detach_blocks(0, max, [&squares](const std::size_t start, const std::size_t end) { for (std::size_t i = start; i < end; ++i) squares[i] = i * i; }); pool.wait(); for (std::size_t i = 0; i < max; ++i) std::cout << std::setw(2) << i << "^2 = " << std::setw(4) << squares[i] << ((i % 5 != 4) ? " | " : "\n"); } ``` Note how the block function takes two arguments, and includes the internal loop. Also, since we are using `detach_blocks()`, we must wait for the loop to finish executing using `wait()`. Alternatively, we could have used `submit_blocks()` and waited on the returned `BS::multi_future` object. Generally, compiler optimizations should be able to make `detach_loop()` and `submit_loop()` perform roughly the same as `detach_blocks()` and `submit_blocks()`. However, `detach_blocks()` and `submit_blocks()` are always going to be inherently faster, at the cost of being slightly more complicated to use. In addition, having low-level control of each block can allow for further optimizations, such as allocating resources per block instead of per index. As usual, you should perform your own benchmarks to see which option works best for your particular use case. ### Loops with return values As mentioned above, unlike `submit_task()`, the member function `submit_loop()` only takes loop functions with no return value. The reason is that each block is running the loop function multiple times, so a return value would not make sense. In contrast, `submit_blocks()` allows the block function to have a return value, as each block can return a unique value. The block function will be executed once for each block, but the blocks are managed by the thread pool, with the user only able to select the number of blocks, but not the range of each block. Therefore, there is limited usability in returning one value per block. However, for cases where this is desired, such as for summation or some sorting algorithms, `submit_blocks()` does accept functions with return values, in which case it returns a `BS::multi_future` object where `T` is the type of the return value. Here's an example of a function template summing all elements of type `T` in a given range: ```cpp #include "BS_thread_pool.hpp" // BS::multi_future, BS::thread_pool #include // std::uint64_t #include // std::future #include // std::cout BS::thread_pool pool; template T sum(T min, T max) { BS::multi_future loop_future = pool.submit_blocks( min, max + 1, [](const T start, const T end) { T block_total = 0; for (T i = start; i < end; ++i) block_total += i; return block_total; }, 100); T result = 0; for (std::future& future : loop_future) result += future.get(); return result; } int main() { std::cout << sum(1, 1'000'000); } ``` Note that we needed to specify the type `T` explicitly as `std::uint64_t`, that is, an unsigned 64-bit integer, as the result, 500,000,500,000, would not fit in a 32-bit integer. Here we used the fact that `BS::multi_future` is a specialization of `std::vector>`, so we can use a range-based `for` loop to iterate over the futures, and use the `get()` member function of each future to get its value. The values of the futures will be the partial sums from each block, so when we add them up, we will get the total sum. Note that we divided the loop into 100 blocks, so there will be 100 futures in total, each with the partial sum of 10,000 numbers. The range-based `for` loop will likely start before the loop finished executing, and each time it calls a future, it will get the value of that future if it is ready, or it will wait until the future is ready and then get the value. This increases performance, since we can start summing the results without waiting for the entire loop to finish executing first - we only need to wait for individual blocks. If we did want to wait until the entire loop finishes before summing the results, we could have used the `get()` member function of the `BS::multi_future` object itself, which returns an `std::vector` with the values obtained from each future. In that case, the sum could be obtained after calling `submit_blocks()`, for example using `std::reduce`, as follows: ```cpp #include "BS_thread_pool.hpp" // BS::multi_future, BS::thread_pool #include // std::uint64_t #include // std::cout #include // std::reduce #include // std::vector BS::thread_pool pool; template T sum(T min, T max) { BS::multi_future loop_future = pool.submit_blocks( min, max + 1, [](const T start, const T end) { T block_total = 0; for (T i = start; i < end; ++i) block_total += i; return block_total; }, 100); std::vector partial_sums = loop_future.get(); T result = std::reduce(partial_sums.begin(), partial_sums.end()); return result; } int main() { std::cout << sum(1, 1'000'000); } ``` ### Parallelizing sequences The member functions `detach_loop()`, `submit_loop()`, `detach_blocks()`, and `submit_blocks()` parallelize a loop by splitting it into blocks, and submitting each block as an individual task to the queue, with each such task iterating over all the indices in the corresponding block's range, which can be numerous. However, sometimes we have a loop with a small number of indices, or more generally, a sequence of tasks enumerated by some index. In such cases, we can avoid the overhead of splitting into blocks and simply submit each individual index as its own independent task to the pool's queue. This can be done with `detach_sequence()` and `submit_sequence()`. The syntax of these functions is similar to `detach_loop()` and `submit_loop()`, except that they don't have the `num_blocks` argument at the end. The sequence function must take only one argument, the index. As usual, `detach_sequence()` detaches the tasks and does not return a future, so you must use `wait()` if you need to wait for the entire sequence to finish executing, while `submit_sequence()` returns a `BS::multi_future`. If the tasks in the sequence return values, then the futures will contain those values, otherwise they will be `void` futures. Here is a simple example, where each task in the sequence calculates the factorial of its index: ```cpp #include "BS_thread_pool.hpp" // BS::multi_future, BS::thread_pool #include // std::uint64_t #include // std::cout #include // std::vector std::uint64_t factorial(const std::uint64_t n) { std::uint64_t result = 1; for (std::uint64_t i = 2; i <= n; ++i) result *= i; return result; } int main() { BS::thread_pool pool; constexpr std::uint64_t max = 20; BS::multi_future sequence_future = pool.submit_sequence(0, max + 1, factorial); std::vector factorials = sequence_future.get(); for (std::uint64_t i = 0; i < max + 1; ++i) std::cout << i << "! = " << factorials[i] << '\n'; } ``` Note how the factorials of each index are stored in the `BS::multi_future`, and can be obtained as a vector using `get()`; each element of the vector is equal to the factorial of the element's index, calculated by its own individual task in the sequence. **Warning:** Since each index in the sequence will be submitted as a separate task, `detach_sequence()` and `submit_sequence()` should only be used if the number of indices is small (say, within 1-2 orders of magnitude of the number of threads), and each index performs a substantial computation on its own. If you submit a sequence of 1 million indices, each performing a 1 ms calculation, the overhead of submitting each index as a separate task would far outweigh the benefits of parallelization. ### More about `BS::multi_future` The helper class `BS::multi_future`, which we have been using throughout this section, provides a convenient way to collect and access groups of futures. While a `BS::multi_future` object is created automatically by the pool when parallelizing loops, you can also use it to store futures manually, such as those obtained from `submit_task()` or by other means. `BS::multi_future` is a specialization of `std::vector>`, so it should be used in a similar way: * When you create a new `BS::multi_future` object, either use the default constructor to create an empty object and add futures to it later, or pass the desired number of futures to the constructor in advance. * Use the `[]` operator to access the future at a specific index, or the `push_back()` member function to append a new future to the list. (If the number of futures is known in advance, you should use `reserve()` to allocate memory for all of them first, and only then `push_back()` the individual futures, otherwise memory will have to be reallocated multiple times, which is very inefficient.) * The `size()` member function tells you how many futures are currently stored in the object. However, `BS::multi_future` also has additional member functions that are aimed specifically at handling futures: * Once all the futures are stored, you can use `wait()` to wait for all of them at once or `get()` to get an `std::vector` with the results from all of them. * You can check how many futures are ready using `ready_count()`. * You can check if all the stored futures are valid using `valid()`. * You can wait for all the stored futures for a specific duration with `wait_for()` or wait until a specific time with `wait_until()`. These functions return `true` if all futures have been waited for before the duration expired or the time point was reached, and `false` otherwise. Aside from using `BS::multi_future` to track the execution of parallelized loops, it can also be used, for example, whenever you have several different groups of tasks and you want to track the execution of each group individually. ### Submitting tasks in bulk without a loop Sometimes, you may have a large number of tasks to submit to the thread pool, which are not part of a loop or sequence. In such cases, you can use `detach_bulk()` or `submit_bulk()` to submit all the tasks at once. As usual, `detach_bulk()` simply detaches the tasks, while `submit_bulk()` returns a `BS::multi_future`. The two functions can be used in one of two ways: 1. By passing a container of callable objects. They must have no arguments; to submit functions with arguments, enclose them in lambda expressions. In the case of `submit_bulk()`, the callables may have return values, which will be stored in the returned `BS::multi_future`, but they must all return the same type. 2. By passing an iterator range, similarly to the standard library algorithms. This can be used, for example, to submit only a subset of tasks from a larger container. The following example demonstrates how to use `submit_bulk()` with a container: ```cpp #include "BS_thread_pool.hpp" // BS::multi_future, BS::thread_pool #include // std::function #include // std::cout #include // std::string #include // std::vector int main() { BS::thread_pool pool; std::vector> tasks; tasks.emplace_back([] { return "Do something."; }); tasks.emplace_back([] { return "Do something else."; }); tasks.emplace_back([] { return "Do another thing."; }); BS::multi_future results = pool.submit_bulk(tasks); for (const std::string& result : results.get()) std::cout << result << '\n'; } ``` ## Utility classes ### Synchronizing printing to a stream with `BS::synced_stream` When printing to an output stream from multiple threads in parallel, the output may become garbled. For example, try running this code: ```cpp #include "BS_thread_pool.hpp" // BS::thread_pool #include // std::cout BS::thread_pool pool; int main() { pool.submit_sequence(0, 5, [](const unsigned int i) { std::cout << "Task no. " << i << " executing.\n"; }) .wait(); } ``` The output will be a mess similar to this: ```none Task no. Task no. Task no. 3 executing. 0 executing. Task no. 41 executing. Task no. 2 executing. executing. ``` The reason is that, although each **individual** insertion to `std::cout` is thread-safe, there is no mechanism in place to ensure subsequent insertions from the same thread are printed contiguously. The thread pool utility class `BS::synced_stream` is designed to eliminate such synchronization issues. The stream to print to should be passed as a constructor argument. If no argument is supplied, `std::cout` will be used: ```cpp // Construct a synced stream that will print to std::cout. BS::synced_stream sync_out; // Construct a synced stream that will print to the output stream my_stream. BS::synced_stream sync_out(my_stream); ``` The member function `print()` takes an arbitrary number of arguments, which are inserted into the stream one by one, in the order they were given. `println()` does the same, but also prints a newline character `\n` at the end, for convenience. A mutex is used to synchronize this process, so that any other calls to `print()` or `println()` using the same `BS::synced_stream` object must wait until the previous call has finished. As an example, this code: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool BS::synced_stream sync_out; BS::thread_pool pool; int main() { pool.submit_sequence(0, 5, [](const unsigned int i) { sync_out.println("Task no. ", i, " executing."); }) .wait(); } ``` Will print out: ```none Task no. 0 executing. Task no. 1 executing. Task no. 2 executing. Task no. 3 executing. Task no. 4 executing. ``` **Warning:** Always create the `BS::synced_stream` object **before** the `BS::thread_pool` object, as we did in this example. When the `BS::thread_pool` object goes out of scope, it waits for the remaining tasks to be executed. If the `BS::synced_stream` object goes out of scope before the `BS::thread_pool` object, then any tasks using the `BS::synced_stream` will crash. Since objects are destructed in the opposite order of construction, creating the `BS::synced_stream` object before the `BS::thread_pool` object ensures that the `BS::synced_stream` is always available to the tasks, even while the pool is destructing. Most stream manipulators defined in the headers `` and ``, such as `std::setw` (set the character width of the next output), `std::setprecision` (set the precision of floating point numbers), and `std::fixed` (display floating point numbers with a fixed number of digits), can be passed as arguments to `print()` and `println()`, and will have the same effect as inserting them to the associated stream. The only exceptions are the flushing manipulators `std::endl` and `std::flush`, which will not work because the compiler will not be able to figure out which template specializations to use. Instead, use `BS::synced_stream::endl` and `BS::synced_stream::flush`. Here is an example: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::sqrt #include // std::setprecision, std::setw #include // std::fixed BS::synced_stream sync_out; BS::thread_pool pool; int main() { sync_out.print(std::setprecision(10), std::fixed); pool.submit_sequence(0, 16, [](const unsigned int i) { sync_out.print("The square root of ", std::setw(2), i, " is ", std::sqrt(i), ".", BS::synced_stream::endl); }) .wait(); } ``` Note, however, that `BS::synced_stream::endl` should only be used if flushing is desired; otherwise, a newline character should be used instead. As with `std::endl`, using `BS::synced_stream::endl` too often will cause a performance hit, as it will force the stream to flush the buffer every time it is called. If desired, `BS::synced_stream` can also synchronize printing into more than one stream at a time. To facilitate this, we can pass a list of output streams to the constructor. For example, the following program will print the same output to both `std::cout` and a log file: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::ofstream #include // std::cout BS::thread_pool pool; int main() { std::ofstream log_file("task.log"); BS::synced_stream sync_out(std::cout, log_file); pool.submit_sequence(0, 5, [&sync_out](const unsigned int i) { sync_out.println("Task no. ", i, " executing."); }) .wait(); } ``` Note that we must wait on the future before the `main()` function ends, as otherwise the log file may be destructed before the tasks finish executing. If we used `detach_sequence()`, which does not return a future, we would have to call `pool.wait()` in the last line. In this example we did not create the `BS::synced_stream` as a global object, since we wanted to pass the log file as a stream to the constructor. However, it is also possible to add streams to or remove streams from an existing `BS::synced_stream` object using the member functions `add_stream()` and `remove_stream()`. For example, in the following program, we create a `BS::synced_stream` global object with the default constructor, so that it prints to `std::cout`, but then we change our minds, remove `std::cout` from the list of streams, and add a log file instead: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::ofstream #include // std::cout BS::synced_stream sync_out; BS::thread_pool pool; int main() { std::ofstream log_file("task.log"); sync_out.remove_stream(std::cout); sync_out.add_stream(log_file); pool.submit_sequence(0, 5, [](const unsigned int i) { sync_out.println("Task no. ", i, " executing."); }) .wait(); } ``` It is common practice to create a global `BS::synced_stream` object, so that it can be accessed from anywhere in the program, without having to pass it to every function that might want to print something to the stream. However, if you also have a global `BS::thread_pool` object, you must always make sure to define the global `BS::synced_stream` object **before** the global `BS::thread_pool` object, for the reasons explained in the warning above. Internally, `BS::synced_stream` keeps the streams in an `std::vector`. The order in which the streams are added is also the order in which they will be printed to. For more precise control, you can use the member function `get_streams()` to get a reference to this vector, and manipulate it directly as you see fit. ## Managing tasks ### Monitoring the tasks Sometimes you may wish to monitor what is happening with the tasks you submitted to the pool. This may be done using these three member functions: * `get_tasks_queued()` gets the number of tasks currently waiting in the queue to be executed. * `get_tasks_running()` gets the number of tasks currently being executed by the threads. * `get_tasks_total()` gets the total number of unfinished tasks: either still in the queue, or being executed by a thread. Note that `get_tasks_total() == get_tasks_queued() + get_tasks_running()`. These functions are demonstrated in the following program: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::chrono #include // std::this_thread BS::synced_stream sync_out; BS::thread_pool pool(4); void sleep_half_second(const unsigned int i) { std::this_thread::sleep_for(std::chrono::milliseconds(500)); sync_out.println("Task ", i, " done."); } void monitor_tasks() { sync_out.println(pool.get_tasks_total(), " tasks total, ", pool.get_tasks_running(), " tasks running, ", pool.get_tasks_queued(), " tasks queued."); } int main() { pool.wait(); pool.detach_sequence(0, 12, sleep_half_second); monitor_tasks(); std::this_thread::sleep_for(std::chrono::milliseconds(750)); monitor_tasks(); std::this_thread::sleep_for(std::chrono::milliseconds(500)); monitor_tasks(); std::this_thread::sleep_for(std::chrono::milliseconds(500)); monitor_tasks(); pool.wait(); } ``` Assuming you have at least 4 hardware threads (so that 4 tasks can run concurrently), the output should be similar to: ```none 12 tasks total, 0 tasks running, 12 tasks queued. Task 0 done. Task 1 done. Task 2 done. Task 3 done. 8 tasks total, 4 tasks running, 4 tasks queued. Task 4 done. Task 5 done. Task 6 done. Task 7 done. 4 tasks total, 4 tasks running, 0 tasks queued. Task 8 done. Task 9 done. Task 10 done. Task 11 done. 0 tasks total, 0 tasks running, 0 tasks queued. ``` The reason we called `pool.wait()` in the beginning is that when the thread pool is created, an initialization task runs in each thread, so if we don't wait, the first line would say there are 16 tasks in total, including the 4 initialization tasks. See [below](#thread-initialization-functions) for more details. Of course, we also called `pool.wait()` at the end to ensure that all tasks have finished executing before the program ends. ### Purging tasks Consider a situation where the user cancels a multithreaded operation while it is still ongoing. Perhaps the operation was split into multiple tasks, and half of the tasks are currently being executed by the pool's threads, but the other half are still waiting in the queue. The thread pool cannot terminate the tasks that are already running, as C++ does not provide that functionality (and in any case, abruptly terminating a task while it's running could have extremely bad consequences, such as memory leaks and data corruption). However, the tasks that are still waiting in the queue can be purged using the `purge()` member function. Once `purge()` is called, any tasks still waiting in the queue will be discarded, and will never be executed by the threads. Please note that there is no way to restore the purged tasks; they are gone forever! Consider for example the following program: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::chrono #include // std::this_thread BS::synced_stream sync_out; BS::thread_pool pool(4); int main() { pool.detach_sequence(0, 8, [](const unsigned int i) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); sync_out.println("Task ", i, " done."); }); std::this_thread::sleep_for(std::chrono::milliseconds(50)); pool.purge(); pool.wait(); } ``` The program submits 8 tasks to the queue. Each task waits 100 milliseconds and then prints a message. The thread pool has 4 threads, so it will execute the first 4 tasks in parallel, and then the remaining 4. We wait 50 milliseconds, to ensure that the first 4 tasks have all started running. Then we call `purge()` to purge the remaining 4 tasks. As a result, these tasks never get executed. However, since the first 4 tasks are still running when `purge()` is called, they will finish uninterrupted; `purge()` only discards tasks that have not yet started running. The output of the program therefore only contains the messages from the first 4 tasks: ```none Task 0 done. Task 1 done. Task 2 done. Task 3 done. ``` Please note that, as explained above, the thread pool cannot terminate running tasks on its own. If you need to do that, you must incorporate a mechanism into the task itself that will terminate the task safely. For example, you could create an atomic flag that the task checks periodically, terminating itself if the flag is set. Here is a simple example: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::chrono #include // std::this_thread BS::synced_stream sync_out; BS::thread_pool pool(4); int main() { std::atomic stop_flag = false; pool.detach_sequence(0, 8, [&stop_flag](const unsigned int i) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); if (stop_flag) return; sync_out.println("Task ", i, " done."); }); std::this_thread::sleep_for(std::chrono::milliseconds(50)); stop_flag = true; pool.purge(); pool.wait(); } ``` This program will not print out any output, as the tasks will terminate themselves prematurely when `stop_flag` is set to `true`. In this case, we did not have to call `purge()`, but by doing so we prevented the other 4 tasks from being executed for no reason. ### Exception handling `submit_task()` catches any exceptions thrown by the submitted task and forwards them to the corresponding future. They can then be caught when invoking the `get()` member function of the future. For example: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::exception #include // std::future #include // std::runtime_error BS::synced_stream sync_out; BS::thread_pool pool; double inverse(const double x) { if (x == 0) throw std::runtime_error("Division by zero!"); return 1 / x; } int main() { constexpr double num = 0; std::future my_future = pool.submit_task( [num] { return inverse(num); }); try { const double result = my_future.get(); sync_out.println("The inverse of ", num, " is ", result, "."); } catch (const std::exception& e) { sync_out.println("Caught exception: ", e.what()); } } ``` The output will be: ```none Caught exception: Division by zero! ``` However, if you change `num` to any non-zero number, no exceptions will be thrown and the inverse will be printed. It is important to note that `wait()` does not throw any exceptions; only `get()` does. Therefore, even if your task does not return anything, i.e. your future is an `std::future`, you must still use `get()` on the future obtained from it if you want to catch exceptions thrown by it. Here is an example: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::exception #include // std::future #include // std::runtime_error BS::synced_stream sync_out; BS::thread_pool pool; void print_inverse(const double x) { if (x == 0) throw std::runtime_error("Division by zero!"); sync_out.println("The inverse of ", x, " is ", 1 / x, "."); } int main() { constexpr double num = 0; std::future my_future = pool.submit_task( [num] { print_inverse(num); }); try { my_future.get(); } catch (const std::exception& e) { sync_out.println("Caught exception: ", e.what()); } } ``` When using `BS::multi_future` to handle multiple futures at once, exception handling works the same way: if any of the futures may throw exceptions, you may catch these exceptions when calling `get()`, even in the case of `BS::multi_future`. Note that if you use `detach_task()`, or any other `detach` member function, there is no way to catch exceptions thrown by the task, as a future will not be returned. In such cases, all exceptions thrown by the task will be silently ignored, to prevent program termination. If you need to catch exceptions in a detached task, you must do so within the task itself, as in this example: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::exception #include // std::runtime_error BS::synced_stream sync_out; BS::thread_pool pool; double inverse(const double x) { if (x == 0) throw std::runtime_error("Division by zero!"); return 1 / x; } int main() { constexpr double num = 0; pool.detach_task( [num] { try { const double result = inverse(num); sync_out.println("The inverse of ", num, " is ", result, "."); } catch (const std::exception& e) { sync_out.println("Caught exception: ", e.what()); } }); pool.wait(); } ``` If exceptions are explicitly disabled in your codebase, or if the feature-test macro `__cpp_exceptions` is undefined for any other reason, exception handling will be automatically disabled in the thread pool. ### Getting information about the current thread The class `BS::this_thread` provides functionality analogous to `std::this_thread`, that is, it allows a thread to reference itself. It contains the following static member functions: * `BS::this_thread::get_index()` can be used to get the index of the current thread as an `std::optional` object. * If this thread belongs to a `BS::thread_pool` object, the return value will be an index in the range `[0, N)` where `N == BS::thread_pool::get_thread_count()`. * Otherwise, for example if this thread is the main thread or an independent thread not in any pools, `std::nullopt` will be returned. * `BS::this_thread::get_pool()` can be used to get a pointer to the thread pool that owns the current thread as an `std::optional` object. * If this thread belongs to a `BS::thread_pool` object, the return value will be a `void` pointer to that object. * Otherwise, `std::nullopt` will be returned. An [`std::optional`](https://en.cppreference.com/w/cpp/utility/optional) is an object that may or may not have a value. [`std::nullopt`](https://en.cppreference.com/w/cpp/utility/optional/nullopt) is a placeholder which indicates that the object does not have a value. To access an `std::optional`, you should first use [`std::optional::has_value()`](https://en.cppreference.com/w/cpp/utility/optional/operator_bool) to check if it contains a value, and if so, use [`std::optional::value()`](https://en.cppreference.com/w/cpp/utility/optional/value) to obtain that value. A shortcut for `if (x.has_value())` is `if (x)`, and a shortcut for `x.value()` is `*x`. The reason that `BS::this_thread::get_pool()` returns a `void*` is that `BS::thread_pool` is a template. Once you obtain the pool pointer, you must cast it to the desired instantiation of the template if you want to use any member functions. Note that you have to cast it to the correct type; if you cast a pointer to a `BS::light_thread_pool` into a pointer to a `BS::priority_thread_pool`, for example, your program will have undefined behavior. (Please see the [optional features](#optional-features) section for more information about the template parameters and aliases.) Here is an example illustrating all of the above: ```cpp #include "BS_thread_pool.hpp" // BS::light_thread_pool, BS::synced_stream, BS::this_thread #include // std::atomic #include // std::size_t #include // std::optional #include // std::thread BS::synced_stream sync_out; BS::light_thread_pool p1; BS::light_thread_pool p2; std::atomic ltr = 'A'; void check_this_thread(const char letter) { const std::optional my_pool = BS::this_thread::get_pool(); const std::optional my_index = BS::this_thread::get_index(); if (my_pool && my_index) { const std::size_t pool_number = *my_pool == &p1 ? 1 : 2; sync_out.println("Task ", letter, " is being executed by thread #", *my_index, " of pool #", pool_number, '.'); static_cast(*my_pool)->detach_task( [letter] { sync_out.println("-> Task ", ltr++, " was submitted by task ", letter, " using detach_task()."); }); } else { sync_out.println("Task ", letter, " is being executed by an independent thread, not in any thread pools."); std::thread( [letter] { sync_out.println("-> Task ", ltr++, " was submitted by task ", letter, " using a detached std::thread."); }) .detach(); } } int main() { p1.submit_task( [] { check_this_thread(ltr++); }) .wait(); p2.submit_task( [] { check_this_thread(ltr++); }) .wait(); std::thread( [] { check_this_thread(ltr++); }) .join(); } ``` The output of this program will be similar to: ```none Task A is being executed by thread #3 of pool #1. -> Task B was submitted by task A using detach_task(). Task C is being executed by thread #7 of pool #2. -> Task D was submitted by task C using detach_task(). Task E is being executed by an independent thread, not in any thread pools. -> Task F was submitted by task E using a detached std::thread. ``` In this example, we execute the task `check_this_thread()` in three different ways: 1. By submitting it from the thread pool `p1`. 2. By submitting it from the thread pool `p2`. 3. By submitting it from an independent `std::thread`. The task calls `BS::this_thread::get_pool()` and `BS::this_thread::get_index()` and receives two `std::optional` objects, `my_pool` and `my_index`. If both have a value (that is, evaluate to `true`), then the task knows it is running in a thread pool. The actual values are then obtained by "dereferencing" them: the pool pointer is `*my_pool`, and the thread index is `*my_index`. The task deduces which pool it is running in by comparing the pointer `*my_pool` to the addresses of the pools `p1` and `p2`. It also gets the index of the thread from `*my_index`. Finally, it detaches an additional task (without waiting for it, as that might cause a deadlock!) from its own pool by first casting the `void*` pointer to the correct type, which in this case is `BS::light_thread_pool*`, and then calling the `detach_task()` member function of that specific pool. If `my_pool` and `my_index` do not have values (that is, evaluate to `false`), then the task knows it is running in an independent thread. In this case, it detaches the additional task using another independent thread. ### Thread initialization functions Sometimes, it is necessary to initialize the threads before they run any tasks. This can be done by submitting a proper initialization function to the `BS::thread_pool` constructor or to `reset()`, either as the only argument or as the second argument after the desired number of threads. The thread initialization function must have no return value. It can either take one argument, the thread index of type `std::size_t`, or zero arguments. In the latter case, the function can use `BS::this_thread::get_index()` to find the thread index. In addition, the function can use `BS::this_thread::get_pool()` to find which pool its thread belongs to. The initialization functions are effectively submitted as a set of special tasks, one per thread, which bypass the queue, but still count towards the number of running tasks. This means `get_tasks_total()` and `get_tasks_running()` will report that these tasks are running if they are checked immediately after the pool is initialized. This is done so that the user has the option to either wait for the initialization functions to finish, by calling `wait()` on the pool, or just keep going. In either case, the initialization functions will always finish running before any tasks are executed by the corresponding thread, so there is no reason to wait for them to finish unless they have some side-effects that affect the main thread, or if they must finish running on **all** the threads before the pool starts executing any tasks. Here is a simple example: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::mt19937_64, std::random_device BS::synced_stream sync_out; thread_local std::mt19937_64 twister; int main() { BS::thread_pool pool( [] { twister.seed(std::random_device()()); }); pool.submit_sequence(0, 4, [](int) { sync_out.println("I generated a random number: ", twister()); }) .wait(); } ``` In this example, we create a `thread_local` Mersenne twister engine, meaning that each thread has its own independent engine. However, if we do not seed the engine, each thread would generate the exact same sequence of pseudo-random numbers. To remedy this, we pass an initialization function to the `BS::thread_pool` constructor which seeds the twister in each thread with the (hopefully) non-deterministic random number generator `std::random_device`. Note that the lambda function we passed to `submit_sequence()` has the signature `[](int)`, with an unnamed `int` argument, as it does not make use of the sequence index, which will be a number in the range `[0, 4)`. This is an easy way to simply submit the same task multiple times. **Warning:** Thread initialization functions must not throw any exceptions, as that will result in program termination. Any exceptions must be handled explicitly within the function. ### Thread cleanup functions Similarly to the thread initialization function, it is also possible to provide the pool with a cleanup function to run in each thread right before it is destroyed, which will happen when the pool is destructed or reset. Like the initialization function, the cleanup function must have no return value, and can either take one argument, the thread index of type `std::size_t`, or zero arguments. Each pool can have its own cleanup function, which is specified using the member function `set_cleanup_func()`. Here is an example: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::this_thread, BS::thread_pool #include // std::chrono #include // std::size_t #include // std::ofstream #include // std::to_string #include // std::this_thread thread_local std::ofstream log_file; thread_local BS::synced_stream sync_out(log_file); constexpr std::size_t threads = 4; int main() { BS::thread_pool pool(threads, [](const std::size_t idx) { log_file.open("thread_" + std::to_string(idx) + ".log"); }); pool.set_cleanup_func( [] { log_file.close(); }); pool.submit_sequence(0, threads * 10, [](const std::size_t idx) { std::this_thread::sleep_for(std::chrono::milliseconds(50)); sync_out.println("Task ", idx, " is running on thread ", *BS::this_thread::get_index(), '.'); }) .wait(); } ``` In this example, we create 4 threads, each of which has a separate thread-local `BS::synced_stream` object writing to its own log file of the form `thread_N.log` where `N` is the thread index. The initialization function, passed as an argument to the constructor, opens the log file. The cleanup function, set using `set_cleanup_func()`, closes the log file. We submit 40 tasks to the queue using `submit_sequence()`, each of which prints a message to the log file indicating which thread it is running on. When the `main()` function exits and `pool` is destroyed, the cleanup function is called for each thread, ensuring that the log files are closed properly. **Warning:** As with initialization functions, thread cleanup functions must not throw any exceptions, as that will result in program termination. Any exceptions must be handled explicitly within the function. ### Passing task arguments by constant reference In C++, it is often crucial to pass function arguments by reference or constant reference, instead of by value. This allows the function to access the object being passed directly, rather than creating a new copy of the object. We have already seen [above](#detaching-and-waiting-for-tasks) that submitting an argument by reference is a simple matter of capturing it with a `&` in the lambda capture list. To submit by **constant** reference, we can use `std::as_const()` as in the following example: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::as_const BS::synced_stream sync_out; void increment(int& x) { ++x; } void print(const int& x) { sync_out.println(x); } int main() { BS::thread_pool pool; int n = 0; pool.submit_task( [&n] { increment(n); }) .wait(); pool.submit_task( [&n = std::as_const(n)] { print(n); }) .wait(); } ``` The `increment()` function takes a **reference** to an integer, and increments that integer. Passing the argument by reference guarantees that `n` itself, in the scope of `main()`, will be incremented - rather than a copy of it in the scope of `increment()`. Similarly, the `print()` function takes a **constant reference** to an integer, and prints that integer. Passing the argument by constant reference guarantees that the variable will not be accidentally modified by the function, even though we are accessing `n` itself, rather than a copy. If we replace `print()` with `increment()`, the program won't compile, as `increment()` cannot take constant references. Generally, it is not really necessary to pass arguments by constant reference, but it is more "correct" to do so, if we would like to guarantee that the variable being referenced is indeed never modified. ## Optional features ### Enabling features The thread pool has some optional features, which are disabled by default to minimize overhead. They can be enabled by passing the appropriate template parameter to the `BS::thread_pool` class when creating the pool. The template parameter is a bitmask, so you can enable several features at once by combining them with the bitwise OR operator `|`. The bitmask flags are members of the `BS::tp` enumeration class: * `BS::tp::priority` enables [task priority](#setting-task-priority). * `BS::tp::pause` enables [pausing the pool](#pausing-the-pool). * `BS::tp::wait_deadlock_checks` enables [wait deadlock checks](#avoiding-wait-deadlocks). * The default is `BS::tp::none`, which disables all optional features. For example, to enable both task priority and pausing the pool, the thread pool object should be created like this: ```cpp BS::thread_pool pool; ``` Convenience aliases are defined as follows: * `BS::light_thread_pool` disables all optional features (equivalent to `BS::thread_pool` with the default template parameter, that is, `BS::thread_pool`). * `BS::priority_thread_pool` enables task priority (equivalent to `BS::thread_pool`). * `BS::pause_thread_pool` enables pausing the pool (equivalent to `BS::thread_pool`). * `BS::wdc_thread_pool` enables wait deadlock checks (equivalent to `BS::thread_pool`). There are no aliases with multiple features enabled; if this is desired, you must either pass the template parameter explicitly or define your own alias, and use the bitwise OR operator as shown above. Note that, since optional features are enabled separately for each `BS::thread_pool` object, you can have multiple pools with different features enabled in the same program. For example, you can have one `BS::light_thread_pool` for tasks that do not need to be prioritized, and a separate `BS::priority_thread_pool` for tasks that do. ### Setting task priority Turning on the `BS::tp::priority` flag in the template parameter to `BS::thread_pool` enables task priority. In addition, the library defines the convenience alias `BS::priority_thread_pool`, which is equivalent to `BS::thread_pool`. When this feature is enabled, the static member `priority_enabled` will be set to `true`. The priority of a task or group of tasks may then be specified as an additional argument (at the end of the argument list) to `detach_task()`, `submit_task()`, `detach_blocks()`, `submit_blocks()`, `detach_loop()`, `submit_loop()`, `detach_sequence()`, `submit_sequence()`, `detach_bulk()`, and `submit_bulk()`. If the priority is not specified, the default value will be 0. The priority is a number of type `BS::priority_t`, which is a signed 8-bit integer, so it can have any value between -128 and +127. The tasks will be executed in priority order from highest to lowest. If priority is assigned to the block/loop/sequence parallelization functions, which submit multiple tasks, then all of these tasks will have the same priority. The enumeration `BS::pr` contains some pre-defined priorities for users who wish to avoid magic numbers and enjoy better future-proofing. In order of decreasing priority, the pre-defined priorities are: `BS::pr::highest`, `BS::pr::high`, `BS::pr::normal`, `BS::pr::low`, and `BS::pr::lowest`. Here is a simple example: ```cpp #include "BS_thread_pool.hpp" // BS::priority_thread_pool, BS::synced_stream BS::synced_stream sync_out; BS::priority_thread_pool pool(1); int main() { pool.detach_task( [] { sync_out.println("This task will execute third."); }, BS::pr::normal); pool.detach_task( [] { sync_out.println("This task will execute fifth."); }, BS::pr::lowest); pool.detach_task( [] { sync_out.println("This task will execute second."); }, BS::pr::high); pool.detach_task( [] { sync_out.println("This task will execute first."); }, BS::pr::highest); pool.detach_task( [] { sync_out.println("This task will execute fourth."); }, BS::pr::low); } ``` This program will print out the tasks in the correct priority order. Note that for simplicity, we used a pool with just one thread, so the tasks will run one at a time. In a pool with 5 or more threads, all 5 tasks will actually run more or less at the same time, because, for example, the task with the second-highest priority will be picked up by another thread while the task with the highest priority is still running. Of course, this is just a pedagogical example. In a realistic use case we may want, for example, to submit tasks that must be completed immediately with high priority so they skip over other tasks already in the queue, or background non-urgent tasks with low priority so they evaluate only after higher-priority tasks are done. Task priority is facilitated using [`std::priority_queue`](https://en.cppreference.com/w/cpp/container/priority_queue), which has O(log n) complexity for storing new tasks, but only O(1) complexity for retrieving the next (i.e. highest-priority) task. This is in contrast with [`std::queue`](https://en.cppreference.com/w/cpp/container/queue), used if priority is disabled, which both stores and retrieves with O(1) complexity. Due to this, enabling the priority queue can incur a very slight decrease in performance, depending on the specific use case, which is why this feature is disabled by default. In other words, you gain functionality, but pay for it in performance. However, the difference in performance is never substantial, and compiler optimizations can often reduce it to a negligible amount. Lastly, please note that when using the priority queue, tasks will not necessarily be executed in the same order they were submitted, **even if they all have the same priority**. This is due to the implementation of `std::priority_queue` as a [binary heap](https://en.wikipedia.org/wiki/Binary_heap), which means tasks are stored as a binary tree instead of sequentially. ### Pausing the pool Turning on the `BS::tp::pause` flag in the template parameter to `BS::thread_pool` enables pausing the pool. In addition, the library defines the convenience alias `BS::pause_thread_pool`, which is equivalent to `BS::thread_pool`. When this feature is enabled, the static member `pause_enabled` will be set to `true`. This feature enables the member functions `pause()`, `unpause()`, and `is_paused()`. When you call `pause()`, the workers will temporarily stop retrieving new tasks out of the queue. However, any tasks already executing will keep running until they are done, since the thread pool has no control over the internal code of your tasks. If you need to pause a task in the middle of its execution, you must do that manually by programming your own pause mechanism into the task itself. To resume retrieving tasks, call `unpause()`. To check whether the pool is currently paused, call `is_paused()`. Here is an example: ```cpp #include "BS_thread_pool.hpp" // BS::pause_thread_pool, BS::synced_stream #include // std::chrono #include // std::this_thread BS::synced_stream sync_out; BS::pause_thread_pool pool(4); void sleep_half_second(const unsigned int i) { std::this_thread::sleep_for(std::chrono::milliseconds(500)); sync_out.println("Task ", i, " done."); } void check_if_paused() { if (pool.is_paused()) sync_out.println("Pool paused."); else sync_out.println("Pool unpaused."); } int main() { pool.detach_sequence(0, 8, sleep_half_second); sync_out.println("Submitted 8 tasks."); std::this_thread::sleep_for(std::chrono::milliseconds(250)); pool.pause(); check_if_paused(); std::this_thread::sleep_for(std::chrono::milliseconds(1000)); sync_out.println("Still paused..."); std::this_thread::sleep_for(std::chrono::milliseconds(1000)); pool.detach_sequence(8, 12, sleep_half_second); sync_out.println("Submitted 4 more tasks."); sync_out.println("Still paused..."); std::this_thread::sleep_for(std::chrono::milliseconds(1000)); pool.unpause(); check_if_paused(); } ``` Assuming you have at least 4 hardware threads, the output should be similar to: ```none Submitted 8 tasks. Pool paused. Task 0 done. Task 1 done. Task 2 done. Task 3 done. Still paused... Submitted 4 more tasks. Still paused... Pool unpaused. Task 4 done. Task 5 done. Task 6 done. Task 7 done. Task 8 done. Task 9 done. Task 10 done. Task 11 done. ``` In this example, we initially submit a total of 8 tasks to the queue. The first 4 tasks start running immediately (only 4, since the pool has 4 threads). We wait for 250ms, and then pause. The tasks that are already running (for 500ms) will keep running until they finished; pausing has no effect on currently running tasks. However, the other 4 tasks will not be executed yet. While the pool is paused, we submit 4 more tasks to the queue, but they just wait at the end of the queue. When we unpause, the remaining 4 initial tasks are executed, followed by the 4 new tasks. While the workers are paused, `wait()` will wait only for the running tasks instead of all tasks (otherwise it would wait forever). This is demonstrated by the following program: ```cpp #include "BS_thread_pool.hpp" // BS::pause_thread_pool, BS::synced_stream #include // std::chrono #include // std::this_thread BS::synced_stream sync_out; BS::pause_thread_pool pool(4); void sleep_half_second(const unsigned int i) { std::this_thread::sleep_for(std::chrono::milliseconds(500)); sync_out.println("Task ", i, " done."); } void check_if_paused() { if (pool.is_paused()) sync_out.println("Pool paused."); else sync_out.println("Pool unpaused."); } int main() { pool.detach_sequence(0, 8, sleep_half_second); sync_out.println("Submitted 8 tasks. Waiting for them to complete."); pool.wait(); pool.detach_sequence(8, 20, sleep_half_second); sync_out.println("Submitted 12 more tasks."); std::this_thread::sleep_for(std::chrono::milliseconds(250)); pool.pause(); check_if_paused(); sync_out.println("Waiting for the ", pool.get_tasks_running(), " running tasks to complete."); pool.wait(); sync_out.println("All running tasks completed. ", pool.get_tasks_queued(), " tasks still queued."); std::this_thread::sleep_for(std::chrono::milliseconds(1000)); sync_out.println("Still paused..."); std::this_thread::sleep_for(std::chrono::milliseconds(1000)); sync_out.println("Still paused..."); std::this_thread::sleep_for(std::chrono::milliseconds(1000)); pool.unpause(); check_if_paused(); std::this_thread::sleep_for(std::chrono::milliseconds(250)); sync_out.println("Waiting for the remaining ", pool.get_tasks_total(), " tasks (", pool.get_tasks_running(), " running and ", pool.get_tasks_queued(), " queued) to complete."); pool.wait(); sync_out.println("All tasks completed."); } ``` The output should be similar to: ```none Submitted 8 tasks. Waiting for them to complete. Task 0 done. Task 1 done. Task 2 done. Task 3 done. Task 4 done. Task 5 done. Task 6 done. Task 7 done. Submitted 12 more tasks. Pool paused. Waiting for the 4 running tasks to complete. Task 8 done. Task 9 done. Task 10 done. Task 11 done. All running tasks completed. 8 tasks still queued. Still paused... Still paused... Pool unpaused. Waiting for the remaining 8 tasks (4 running and 4 queued) to complete. Task 12 done. Task 13 done. Task 14 done. Task 15 done. Task 16 done. Task 17 done. Task 18 done. Task 19 done. All tasks completed. ``` The first `wait()`, which was called while the pool was not paused, waited for all 8 tasks, both running and queued. The second `wait()`, which was called after pausing the pool, only waited for the 4 running tasks, while the other 8 tasks remained queued, and were not executed since the pool was paused. Finally, the third `wait()`, which was called after unpausing the pool, waited for the remaining 8 tasks, both running and queued. Note that pausing the pool adds additional checks to the waiting and worker functions, which have a very small but non-zero overhead. This is why this feature is disabled by default. **Warning:** If the thread pool is destroyed while paused, any tasks still in the queue will never be executed! ### Avoiding wait deadlocks Turning on the `BS::tp::wait_deadlock_checks` flag in the template parameter to `BS::thread_pool` enables wait deadlock checks. In addition, the library defines the convenience alias `BS::wdc_thread_pool`, which is equivalent to `BS::thread_pool`. When this feature is enabled, the static member `wait_deadlock_checks_enabled` will be set to `true`. To understand why this feature is useful, consider the following program: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool BS::synced_stream sync_out; BS::thread_pool pool; int main() { pool.detach_task( [] { pool.wait(); sync_out.println("Done waiting."); }); } ``` This program creates a thread pool, and then detaches a task that waits for tasks in the same thread pool to complete. If you run this program, it will never print the message "Done waiting", because the task will wait for **itself** to complete. This causes a **deadlock**, and the program will wait forever. Usually, in simple programs, this will never happen. However, in more complicated programs, perhaps ones running multiple thread pools in parallel, wait deadlocks could potentially occur. In such cases, wait deadlock checks may be useful. If enabled, `wait()`, `wait_for()`, and `wait_until()` will check whether the user tried to call them from within a thread of the same pool, and if so, they will throw the exception `BS::wait_deadlock` instead of waiting. Here is an example: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::wdc_thread_pool BS::synced_stream sync_out; BS::wdc_thread_pool pool; int main() { pool.detach_task( [] { try { pool.wait(); sync_out.println("Done waiting."); } catch (const BS::wait_deadlock&) { sync_out.println("Error: Deadlock!"); } }); } ``` This time, `wait()` will detect the deadlock, and will throw an exception, causing the output to be "Error: Deadlock!". Wait deadlock checks are disabled by default because wait deadlocks are not something that happens often, and the check adds a small but non-zero overhead every time `wait()`, `wait_for()`, or `wait_until()` is called. Note that if the feature-test macro `__cpp_exceptions` is undefined, wait deadlock checks will be automatically disabled, and trying to compile a program which creates a pool with the `BS::tp::wait_deadlock_checks` flag enabled will result in a compilation error. ## Native extensions ### Enabling the native extensions While portability is one of the guiding principle for developing this library, non-portable features such as setting the thread priority using the operating system's native API can be very useful. Therefore, the library includes native extensions - which are disabled by default, as they are not portable. (Note that as long as the native extensions are disabled, the library is 100% standard C++.) The native extensions may be enabled by defining the macro `BS_THREAD_POOL_NATIVE_EXTENSIONS` at compilation time. If including the library as a header file, the macro must be defined before `#include "BS_thread_pool.hpp"`. Note that even if the macro is defined, the native extensions are disabled automatically if a supported operating system (Windows, Linux, or macOS) is not detected. If importing the library [as a C++20 module](#importing-the-library-as-a-c20-module), defining the macro before importing the module will not work, as modules cannot access macros defined in the program that imported them. Instead, you must define the macro as a compiler flag: `-D BS_THREAD_POOL_NATIVE_EXTENSIONS` for Clang and GCC or `/D BS_THREAD_POOL_NATIVE_EXTENSIONS` for MSVC. [The test program](#testing-the-library) only tests the native extensions if the macro `BS_THREAD_POOL_NATIVE_EXTENSIONS` is defined at compilation time. If importing the library [as a C++20 module](#importing-the-library-as-a-c20-module), please ensure that the macro is also enabled when compiling the module. The `constexpr` flag `BS::thread_pool_native_extensions` indicates whether the thread pool library was compiled with native extensions enabled. Note that the flag will be `false` if `BS_THREAD_POOL_NATIVE_EXTENSIONS` is defined but the operating system is unsupported. **Warning:** Please note that the native extensions have only been tested on the operating systems listed above under [compiling and compatibility](#compiling-and-compatibility). They have not been tested on older versions of these operating systems, other Linux distributions, or any other operating systems, and are therefore not guaranteed to work on every system. If you encounter any issues, please report them on [the GitHub repository](https://github.com/bshoshany/thread-pool). ### Setting thread priority The thread pool's native extensions provide the ability to set a thread's priority using the operating system's native API. Please note that this is **not** the same as [setting a task's priority](#setting-task-priority), which is a feature of the thread pool's queue, unrelated to the pool's threads themselves. Task priority controls which tasks are executed first, while thread priority (roughly) controls how much CPU time a thread gets compared to other threads. In addition, you can use the native extensions to set the priority of any thread (such as a thread created using `std::thread`), not just a pool thread. For performance-critical applications, you may wish to increase the thread priority, while for applications that should run in the background, you may wish to decrease it. As priority is handled very differently on different operating systems, the thread pool library provides an abstraction layer over the native APIs, in the form of the enumeration class `BS::os_thread_priority`, which has the following 7 members: * `BS::os_thread_priority::idle` * `BS::os_thread_priority::lowest` * `BS::os_thread_priority::below_normal` * `BS::os_thread_priority::normal` * `BS::os_thread_priority::above_normal` * `BS::os_thread_priority::highest` * `BS::os_thread_priority::realtime` On Windows, these pre-defined priorities map 1-to-1 with [the thread priority values defined by the Windows API](https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadpriority) (with `realtime` mapping to time critical priority). On Linux and macOS, thread priorities are a lot more complicated, so these pre-defined priorities are mapped to the parameters available in the native API. On Linux (with POSIX threads), thread priority is determined by three factors: [scheduling policy](https://www.man7.org/linux/man-pages/man3/pthread_setschedparam.3.html), priority value, and ["nice" value](https://www.man7.org/linux/man-pages/man2/setpriority.2.html). The thread pool library's abstraction layer distills these factors into the above pre-defined levels, for simplicity and portability. The total number of possible combinations of parameters is much larger, but allowing more fine-grained control would not be portable, and in any case it would have limited use. For the precise mapping, please refer to the source code itself (in the header file `BS_thread_pool.hpp`). On macOS, the thread pool library will also use POSIX threads, but unlike Linux, the "nice" value is per-process, not per-thread (in compliance with the POSIX standard). However, macOS does allow more freedom with respect to the available range of priorities. Again, for the precise details of the mapping, please refer to the source code itself. Most users do not need to worry about the specifics of how thread priority is handled on different operating systems. The abstraction layer provided by the thread pool library is meant to make everything as simple and portable as possible. However, it is important to note that only Windows allows a non-privileged user to set a thread's priority to a higher value. On Linux and macOS, a non-privileged user can only set a thread's priority to a lower value, and only root can set a higher value; also, confusingly, if a user decreased the priority of their thread from normal to a lower priority, they cannot increase it back to normal without root privileges, even though normal was the thread's initial priority. Thread priority is managed using two static member functions of the `BS::this_thread` class: * `BS::this_thread::get_os_thread_priority()` gets the current thread's priority. It returns an object of type `std::optional`. If the returned object does not contain a value, then either the priority could not be determined, or it is not one of the pre-defined values listed above. * `BS::this_thread::set_os_thread_priority()` sets the current thread's priority. It returns `true` if the priority was set successfully, or `false` otherwise. Usually, `false` means that the user does not have the necessary permissions to set the desired priority. Increasing or decreasing the priority of all the threads in a pool can be done most easily using an [initialization function](#thread-initialization-functions). Here is an example: ```cpp #define BS_THREAD_POOL_NATIVE_EXTENSIONS #include "BS_thread_pool.hpp" // BS::os_thread_priority, BS::synced_stream, BS::this_thread, BS::thread_pool #include // std::size_t #include // std::map #include // std::optional #include // std::string BS::synced_stream sync_out; BS::os_thread_priority target = BS::os_thread_priority::highest; const std::map os_thread_priority_map = {{BS::os_thread_priority::idle, "idle"}, {BS::os_thread_priority::lowest, "lowest"}, {BS::os_thread_priority::below_normal, "below_normal"}, {BS::os_thread_priority::normal, "normal"}, {BS::os_thread_priority::above_normal, "above_normal"}, {BS::os_thread_priority::highest, "highest"}, {BS::os_thread_priority::realtime, "realtime"}}; std::string os_thread_priority_name(const BS::os_thread_priority priority) { const std::map::const_iterator it = os_thread_priority_map.find(priority); return (it != os_thread_priority_map.end()) ? it->second : "unknown"; } void set_priority(const std::size_t idx) { const std::optional get_result = BS::this_thread::get_os_thread_priority(); if (get_result) sync_out.println("The OS thread priority of thread ", idx, " is currently set to '", os_thread_priority_name(*get_result), "'."); else sync_out.println("Error: Failed to get the OS thread priority of thread ", idx, '!'); const bool set_result = BS::this_thread::set_os_thread_priority(target); sync_out.println(set_result ? "Successfully" : "Error: Failed to", " set the OS priority of thread ", idx, " to '", os_thread_priority_name(target), "'."); } int main() { BS::thread_pool pool(4, set_priority); } ``` On Linux or macOS, please ensure that you run this example as root using `sudo`, otherwise it will fail. In this example we used an initialization function `set_priority()` to first print the initial priority of each thread (which should be "normal") and then set the priority of each thread to "highest". `os_thread_priority_name()` is a helper function to convert a `BS::os_thread_priority` value to a human-readable string. ### Setting thread affinity The thread pool's native extensions allow the user to set a thread's processor affinity using the operating system's native API. Processor affinity, sometimes called "pinning", controls which logical processors a thread is allowed to run on. Generally, a non-hyperthreaded core corresponds to one logical processor, and a hyperthreaded core corresponds to two logical processors. This can be useful for performance optimization, as it can reduce cache misses. However, it can also degrade performance, sometimes severely, since the thread will not run at all until its assigned cores are available. Therefore, it is usually better to let the operating system's scheduler manage thread affinities on its own, except in very specific cases. Please note that setting thread affinity works on Windows and Linux, but not on macOS and Android, as the native API does not allow it. As affinity is handled differently on different operating systems, the thread pool library provides an abstraction layer over the native APIs. In this abstraction layer, affinity is controlled using an `std::vector` where each element corresponds to a logical processor. Thread affinity is managed using two static member functions of the `BS::this_thread` class: * `BS::this_thread::get_os_thread_affinity()` gets the current thread's affinity. It returns an object of type `std::optional>`. If the returned object does not contain a value, then the affinity could not be determined. On macOS and Android, this function always returns `std::nullopt`. * `BS::this_thread::set_os_thread_affinity()` sets the current thread's affinity. It returns `true` if the affinity was set successfully, or `false` otherwise. On macOS and Android, this function always returns `false`. Note that the thread affinity must be a subset of the process affinity (as obtained using [`BS::get_os_process_affinity()`](#setting-process-affinity)) for the containing process of a thread. Setting thread affinity can significantly increase performance if multiple threads are accessing the same data, as the data can be kept in the local cache of the specific core that the threads are running on. This is illustrated in the following program: ```cpp #define BS_THREAD_POOL_NATIVE_EXTENSIONS #include "BS_thread_pool.hpp" // BS::synced_stream, BS::this_thread #include // std::atomic #include // std::chrono #include // std::uint64_t #include // std::thread #include // std::vector void do_test(const bool pin_threads) { BS::synced_stream sync_out; constexpr std::uint64_t num_increments = 10'000'000; sync_out.println(pin_threads ? "With " : "Without", " thread pinning:"); std::atomic counter = 0; auto worker = [&counter, pin_threads] { if (pin_threads) { std::vector affinity(std::thread::hardware_concurrency(), false); affinity[0] = true; BS::this_thread::set_os_thread_affinity(affinity); } for (std::uint64_t i = 0; i < num_increments; ++i) ++counter; }; const std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now(); std::thread thread1(worker); std::thread thread2(worker); thread1.join(); thread2.join(); const std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); sync_out.println("Final count: ", counter, ", execution time: ", (std::chrono::duration_cast(end - start)).count(), " ms."); } int main() { do_test(false); do_test(true); } ``` The output should be similar to: ```none Without thread pinning: Final count: 20000000, execution time: 160 ms. With thread pinning: Final count: 20000000, execution time: 68 ms. ``` In this program, we create two threads, each of which increments an atomic counter 10 million times. First, we do this without thread pinning; in this case, since the OS will most likely run the threads on two different cores, the state of the atomic variable will need to be synchronized between the two cores, which will incur a performance penalty. Then, we do this with thread pinning, using `BS::this_thread::set_os_thread_affinity()` to set the affinity of each thread to core 0 by passing a vector with `true` at index 0 and `false` at all other indices. In this case, the atomic variable will be kept in the local cache of core 0, which will increase performance. **Warning:** Setting the affinity of threads in a pool is almost never a good idea! When you submit a task to a thread pool, you have no control over which thread it will actually run in. The main benefit of thread affinity is to reduce cache misses, but there is no way to guarantee that tasks accessing the same data will run on the same core if they are submitted to a pool. In fact, setting the affinity of the pool threads will almost certainly decrease performance, sometimes substantially, as the operating system's scheduler will be prevented from assigning threads to cores in the most optimal way. The most common use case for `BS::this_thread::set_os_thread_affinity()` is to set the affinity of individual threads created independently of any pool, for example using `std::thread`. ### Setting thread names The thread pool's native extensions permit setting a thread's name using the operating system's native API. This can be useful for debugging, as the names of the threads will be visible in the debugger (for example, in the Call Stack on Visual Studio Code). As with other features of the native extensions, the thread pool library provides an abstraction layer over the native APIs, consisting of the following two static member functions of the `BS::this_thread` class: * `BS::this_thread::get_os_thread_name()` gets the current thread's name. It returns an object of type `std::optional`. If the returned object does not contain a value, then the name could not be determined. * `BS::this_thread::set_os_thread_name()` sets the current thread's name. It returns `true` if the name was set successfully, or `false` otherwise. Note that on Linux thread names are limited to 16 characters, including the null terminator. This feature is illustrated by the following program: ```cpp #define BS_THREAD_POOL_NATIVE_EXTENSIONS #include "BS_thread_pool.hpp" // BS::synced_stream, BS::this_thread, BS::thread_pool #include // std::size_t #include // std::optional #include // std::string, std::to_string BS::synced_stream sync_out; void set_name(const std::size_t idx) { const std::string name = "Thread " + std::to_string(idx); const bool result = BS::this_thread::set_os_thread_name(name); sync_out.println(result ? "Successfully" : "Error: Failed to", " set the name of thread ", idx, " to '", name, "'."); } void get_name() { const std::optional result = BS::this_thread::get_os_thread_name(); if (result) sync_out.println("This thread's name is set to '", *result, "'."); else sync_out.println("Error: Failed to get this thread's name!"); } int main() { const bool result = BS::this_thread::set_os_thread_name("Main Thread"); sync_out.println(result ? "Successfully" : "Error: Failed to", " set the name of the main thread."); BS::thread_pool pool(4, set_name); pool.wait(); // Place a breakpoint here to see the thread names in the debugger. pool.submit_task(get_name).wait(); } ``` If you place a breakpoint on the indicated line, you will be able to see the names of the threads in the debugger. The main thread will be named "Main Thread", while the 4 pool threads will be named "Thread 0" to "Thread 3". In the last line, a random thread's name will be read and printed out. ### Setting process priority Although not directly related to multithreading, `BS::thread_pool`'s native extensions also provide the ability to set the entire process's priority using the operating system's native API. As with thread priority, the thread pool library provides an abstraction layer over the native APIs, in the form of the enumeration class `BS::os_process_priority`, which has the following 6 members: * `BS::os_process_priority::idle` * `BS::os_process_priority::below_normal` * `BS::os_process_priority::normal` * `BS::os_process_priority::above_normal` * `BS::os_process_priority::high` * `BS::os_process_priority::realtime` On Windows, these pre-defined priorities map 1-to-1 with [the process priority classes defined by the Windows API](https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setpriorityclass). On Linux and macOS, process priorities are mapped to ["nice" values](https://www.man7.org/linux/man-pages/man2/setpriority.2.html), as given by the actual values of the enumeration members (note that lower numbers correspond to higher priorities). Process priority is managed using two functions: * `BS::get_os_process_priority()` gets the process's priority. It returns an object of type `std::optional`. If the returned object does not contain a value, then either the priority could not be determined, or it is not one of the pre-defined values listed above. * `BS::set_os_process_priority()` sets the process's priority. It returns `true` if the priority was set successfully, or `false` otherwise. Usually, `false` means that the user does not have the necessary permissions to set the desired priority. This is demonstrated by the following program: ```cpp #define BS_THREAD_POOL_NATIVE_EXTENSIONS #include "BS_thread_pool.hpp" // BS::get_os_process_priority, BS::os_process_priority, BS::set_os_process_priority, BS::synced_stream #include // std::map #include // std::optional #include // std::string BS::synced_stream sync_out; BS::os_process_priority target = BS::os_process_priority::high; const std::map os_process_priority_map = {{BS::os_process_priority::idle, "idle"}, {BS::os_process_priority::below_normal, "below_normal"}, {BS::os_process_priority::normal, "normal"}, {BS::os_process_priority::above_normal, "above_normal"}, {BS::os_process_priority::high, "high"}, {BS::os_process_priority::realtime, "realtime"}}; std::string os_process_priority_name(const BS::os_process_priority priority) { const std::map::const_iterator it = os_process_priority_map.find(priority); return (it != os_process_priority_map.end()) ? it->second : "unknown"; } int main() { const std::optional get_result = BS::get_os_process_priority(); if (get_result) sync_out.println("The OS process priority is currently set to '", os_process_priority_name(*get_result), "'."); else sync_out.println("Error: Failed to get the OS process priority!"); const bool set_result = BS::set_os_process_priority(target); sync_out.println(set_result ? "Successfully" : "Error: Failed to", " set the OS process priority to '", os_process_priority_name(target), "'."); } ``` On Linux or macOS, please ensure that you run this example as root using `sudo`, otherwise it will fail. (Note that here we didn't actually need to use `BS::synced_stream`, since we are not using the thread pool, and only the main thread prints to the stream; we used it only for consistency with other examples.) ### Setting process affinity The thread pool's native extensions also allow the user to set the entire process's processor affinity using the operating system's native API. This works on Windows and Linux, but not on macOS, as the native API does not allow it. As with thread affinity, the thread pool library provides an abstraction layer over the native APIs, in the form of an `std::vector` where each element corresponds to a logical processor. Process affinity is managed using two functions: * `BS::get_os_process_affinity()` gets the process's affinity. It returns an object of type `std::optional>`. If the returned object does not contain a value, then the affinity could not be determined. On macOS, this function always returns `std::nullopt`. * `BS::set_os_process_affinity()` sets the process's affinity. It returns `true` if the affinity was set successfully, or `false` otherwise. On macOS, this function always returns `false`. Note that by counting the number of elements set to `true` in `BS::get_os_process_affinity()`, you can determine how many logical processors are available to the process. If the native extensions are enabled, a pool created with the default constructor will use this method to determine the number of threads available to the process, which can be less than the number of hardware threads, and use this as the default number of pool threads. This is demonstrated by the following program: ```cpp #define BS_THREAD_POOL_NATIVE_EXTENSIONS #include "BS_thread_pool.hpp" // BS::get_os_process_affinity(), BS::set_os_process_affinity, BS::synced_stream, BS::thread_pool #include // std::count #include // std::optional #include // std::thread #include // std::vector BS::synced_stream sync_out; int main() { sync_out.println("Total hardware threads: ", std::thread::hardware_concurrency()); BS::thread_pool pool1; sync_out.println("Threads in first pool: ", pool1.get_thread_count()); const bool success = BS::set_os_process_affinity({true, true, true}); if (success) { const std::optional> affinity = BS::get_os_process_affinity(); if (affinity) { sync_out.println("Total threads now available to the process: ", std::count(affinity->begin(), affinity->end(), true)); BS::thread_pool pool2; sync_out.println("Threads in second pool: ", pool2.get_thread_count()); return 0; } } sync_out.println("ERROR: Failed to set or get process affinity."); } ``` Assuming that the program was executed without setting the affinity of the process beforehand (e.g. using `taskset` on Linux), `pool1` will be created with the total number of hardware threads. However, we then manually set the affinity of the process so that only the first 3 logical processors are enabled (by passing a vector with 3 `true` elements and all other elements `false`). Therefore, `pool2` will be created with only 3 threads. If, for example, 32 hardware threads are available in total, the output will be: ```none Total hardware threads: 32 Threads in first pool: 32 Total threads now available to the process: 3 Threads in second pool: 3 ``` ### Accessing native thread handles If the native extensions are enabled, the `BS::thread_pool` class gains the member function `get_native_handles()`, which returns a vector containing the underlying implementation-defined thread handles for each of the pool's threads. These can then be used in an implementation-specific way to manage the threads at the OS level. Here is a quick example: ```cpp #define BS_THREAD_POOL_NATIVE_EXTENSIONS #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool #include // std::thread #include // std::vector BS::synced_stream sync_out; BS::thread_pool pool(4); int main() { std::vector handles = pool.get_native_handles(); for (std::size_t i = 0; i < handles.size(); ++i) sync_out.println("Thread ", i, " native handle: ", handles[i]); } ``` The output will depend on your compiler and operating system. Here is an example: ```none Thread 0 native handle: 00000000000000AC Thread 1 native handle: 00000000000000B0 Thread 2 native handle: 00000000000000B4 Thread 3 native handle: 00000000000000B8 ``` **Warning:** Please note that any code written using the native handles directly will **not** be portable. As detailed above, the thread pool's native extensions define abstraction layers for several commonly used thread operations, which are portable on supported platforms, and are therefore strongly preferred over non-portable operations. The native handles are made available for users who need to perform operations that are not covered by these abstraction layers. ## Testing the library ### Automated tests The file `BS_thread_pool_test.cpp` in the `tests` folder of [the GitHub repository](https://github.com/bshoshany/thread-pool) will perform automated tests of all aspects of the library. In addition, the code is meant to serve as an extensive example of how to properly use the library. The test program also takes the following command line arguments: * `help`: Show a help message and exit. Any other arguments will be ignored. * `stdout`: Print to the standard output. * `log`: Print to a log file. It will have the same name as the executable, with a suffix `-yyyy-mm-dd_hh.mm.ss.log` based on the current date and time. * `tests`: Perform standard tests. * `deadlock`: Perform long deadlock tests. * `benchmarks`: Perform full Mandelbrot set benchmarks. * `plot`: Perform quick Mandelbrot set benchmarks. * `save`: Save the Mandelbrot set image to a file. If no options are entered, the default is `benchmarks log stdout tests`. If the file `default_args.txt` exists in the same folder or the parent folder, the test program reads the default arguments from it (space separated in a single line). Command line arguments can still override these defaults. This is useful when debugging. The following macros can be defined during compilation (using the `-D` flag in Clang and GCC or `/D` in MSVC) to enable additional features: * `BS_THREAD_POOL_TEST_IMPORT_MODULE`: Import the thread pool library [as a C++20 module](#importing-the-library-as-a-c20-module). Note that the module must be compiled beforehand, as explained in the relevant section. * `BS_THREAD_POOL_NATIVE_EXTENSIONS`: Test the [native extensions](#native-extensions). If importing the library as a C++20 module, ensure that the library was compiled with the same macro. The bundled [`compile_cpp.py` script](#the-compile_cpppy-script), if run with `python scripts/compile_cpp.py tests/BS_thread_pool_test.cpp --run --try-all --type=release --verbose`, will automatically detect if Clang, GCC, and/or MSVC are available, and compile and run the test program using each available compiler 3 times: 1. With C++17 support. 2. With C++20 support, using `import BS.thread_pool`. 3. With C++23 support, using `import BS.thread_pool`, and using `import std`. If any of the tests fail, please [submit a bug report](https://github.com/bshoshany/thread-pool/issues) including the exact specifications of your system (OS, CPU, compiler, etc.) and the generated log file. However, please note that **only the latest versions of each compiler are supported**. By default, the test program prints colored output using ANSI escape codes for better readability. This can be disabled by setting the `NO_COLOR` environment variable. ### Performance tests `BS_thread_pool_test.cpp` also performs benchmarks, using a highly-optimized multithreaded algorithm which generates a plot of the [Mandelbrot set](https://en.wikipedia.org/wiki/Mandelbrot_set), utilizing a normalized iteration count algorithm and linear interpolation to create smooth coloring. If tests are enabled, the benchmarks will only be performed if all of the tests pass. These benchmarks are heavily CPU-intensive, which results in a high speedup factor due to multithreading, ideally utilizing every core and thread to their fullest extent. This makes them useful for optimizing the library, since they are more sensitive to the thread pool's own performance than to other factors such as memory or cache. The full benchmarks are enabled using the command line argument `benchmarks`, which is enabled by default. The command line argument `plot` can be used to just plot the Mandelbrot set once, either instead of or in addition to doing the full benchmarks. This will plot the largest possible image that can be plotted in 5 seconds, and only measure the performance in pixels/ms for the entire plot. The test program prints out the Mandelbrot set it generates, downsampled to fit in a terminal window (at 120 character width). This will be in 24-bit color in the terminal, and in monochrome using Unicode blocks in the log file. If the `NO_COLOR` environment variable is set, the terminal output will also be in monochrome. (Note: On Windows Terminal, ensure that `adjustIndistinguishableColors` is disabled in the settings, otherwise the plot will not be displayed correctly.) If you want to see the plot in full resolution, pass the `save` command line argument, and the plot will be saved to `BS_thread_pool_benchmark_mandelbrot.bmp` (it's a BMP file to avoid having to depend on 3rd-party libraries). This is off by default, since that file can get quite large. The program determines the optimal resolution of the Mandelbrot plot by testing how many pixels are needed to reach a certain target duration when parallelizing the loop using a number of tasks equal to the number of threads. This ensures that the benchmarks take approximately the same amount of time (per thread) on all systems, and are thus more consistent and portable. Once the appropriate resolution has been determined, the program plots the Mandelbrot set. For more details about the algorithm used, please see the source code for `BS_thread_pool_test.cpp`. This operation is performed both single-threaded and multithreaded, with the multithreaded computation spread across multiple tasks submitted to the pool. Multithreaded tests are performed with increasingly higher task counts, while keeping the number of threads in the pool equal to the hardware concurrency for optimal performance. Each test is repeated multiple times, with the run times averaged over all runs of the same test. The program keeps increasing the number of tasks by a factor of 2 until diminishing returns are encountered. The run times of the tests are compared, and the maximum speedup obtained compared to the single-threaded test is calculated. If the [native extensions](#native-extensions) are enabled, the program will try to increase the priority of both the process itself and all the threads in the pool to the highest possible value, to prevent other processes from interfering with the benchmarks. Therefore, to obtain the most reliable benchmarks, it is recommended to run the tests as a privileged user, especially on Linux or macOS where only root can increase the priority. As an example, here are the results of the benchmarks running on a 24-core (8P+16E) / 32-thread Intel i9-13900K CPU. The tests were compiled using MSVC in C++23 mode, to obtain maximum performance using the latest C++23 features. Compiler optimizations were enabled using the `/O2` flag. The benchmarks were run 5 times, and the result with the median speedup was as follows: ```none Generating a 3927x3927 plot of the Mandelbrot set... Each test will be repeated 30 times to collect reliable statistics. 1 task: [..............................] (single-threaded) -> Mean: 500.8 ms, standard deviation: 1.3 ms, speed: 1026.5 pixels/ms. 8 tasks: [..............................] -> Mean: 146.0 ms, standard deviation: 0.3 ms, speed: 3520.9 pixels/ms. 16 tasks: [..............................] -> Mean: 82.2 ms, standard deviation: 1.5 ms, speed: 6256.1 pixels/ms. 32 tasks: [..............................] -> Mean: 49.8 ms, standard deviation: 1.2 ms, speed: 10322.2 pixels/ms. 64 tasks: [..............................] -> Mean: 26.9 ms, standard deviation: 1.2 ms, speed: 19109.5 pixels/ms. 128 tasks: [..............................] -> Mean: 22.8 ms, standard deviation: 0.9 ms, speed: 22545.8 pixels/ms. 256 tasks: [..............................] -> Mean: 21.4 ms, standard deviation: 0.5 ms, speed: 24058.2 pixels/ms. 512 tasks: [..............................] -> Mean: 20.7 ms, standard deviation: 0.6 ms, speed: 24833.1 pixels/ms. 1024 tasks: [..............................] -> Mean: 21.0 ms, standard deviation: 0.4 ms, speed: 24478.3 pixels/ms. Maximum speedup obtained by multithreading vs. single-threading: 24.2x, using 512 tasks. ``` This CPU has 24 cores, of which 8 are fast (5.40 GHz max) performance cores with hyperthreading (thus providing 16 threads in total), and 16 are slower (4.30 GHz max) efficiency cores without hyperthreading, for a total of 32 threads. Due to the hybrid architecture, it is not trivial to calculate the theoretical maximum speedup. However, we can get a rough estimate by noticing that the E-cores are about 20% slower than the P-cores, and that hyperthreading is generally known to provide around a 30% speedup. Thus, the estimated theoretical speedup (compared to a single P-core) is 8 × 1.3 + 16 × 0.8 = 23.2x. The actual median speedup obtained, 24.2x, is 4.3% above this estimate, which indicates that the thread pool provides optimal performance and allows the Mandelbrot plot algorithm to take full advantage of the CPU's capabilities. It should also be noted that even though the available number of hardware threads is 32, the maximum possible speedup is achieved not with 32 tasks, but with 512 tasks - half the square of the number of hardware threads. The reason for this is that splitting the job into more tasks than threads eliminates thread idle time, as explained [above](#optimizing-the-number-of-blocks). However, at 1024 tasks we encounter diminishing returns, as the overhead of submitting the tasks to the pool starts to outweigh the benefits of parallelization. ### Finding the version of the library Starting with v5.0.0, the thread pool library defines the `constexpr` object `BS::thread_pool_version`, which can be used to check the version of the library at compilation time. This object is of type `BS::version`, with members `major`, `minor`, and `patch`, and all comparison operators defined as `constexpr`. It also has a `to_string()` member function and an `operator<<` overload for easy printing at runtime (used by the [test program](#testing-the-library)). Since `BS::thread_pool_version` is a `constexpr` object, it can be used in any context where a `constexpr` object is allowed, such as `static_assert()` and `if constexpr`. For example, the following program will fail to compile if the version is not 5.1.0 or higher: ```cpp #include "BS_thread_pool.hpp" static_assert(BS::thread_pool_version >= BS::version(5, 1, 0), "This program requires version 5.1.0 or later of the BS::thread_pool library."); int main() { // ... } ``` As another example, the following program will print the version of the library (this will implicitly use the `<<` operator of `BS::version`) and then conditionally compile one of two branches of code depending on the version of the library: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool BS::synced_stream sync_out; BS::thread_pool pool; int main() { sync_out.println("Detected BS::thread_pool v", BS::thread_pool_version, '.'); if constexpr (BS::thread_pool_version <= BS::version(5, 1, 0)) { // Do something supported by BS::thread_pool v5.1.0 or earlier. } else { // Do something supported by newer versions of BS::thread_pool after v5.1.0. } } ``` `BS::thread_pool_version` was introduced in v5.0.0, and it is the preferred way to do version checking. However, for backwards compatibility, if you are not sure if you are going to get v4 or v5 of the library, you can check the version using the following preprocessor macros, which were introduced in v4.0.1: * `BS_THREAD_POOL_VERSION_MAJOR` - indicates the major version. * `BS_THREAD_POOL_VERSION_MINOR` - indicates the minor version. * `BS_THREAD_POOL_VERSION_PATCH` - indicates the patch version. These macros allow for conditional inclusion of code using `#if` directives. As an example, the member function [`set_cleanup_func()`](#thread-cleanup-functions) was introduced in v5.0.0. Therefore, if the major version number is 5 or higher, we can use this function; otherwise, we must find some other way to do the cleanup: ```cpp #include "BS_thread_pool.hpp" // BS::synced_stream, BS::thread_pool BS::synced_stream sync_out; BS::thread_pool pool; int main() { #if BS_THREAD_POOL_VERSION_MAJOR >= 5 pool.set_cleanup_func( [] { sync_out.println("Doing cleanup..."); }); #else // Do the cleanup in some other way. #endif } ``` However, please note that if the library is imported [as a C++20 module](#importing-the-library-as-a-c20-module), these macros will not be available, since macros cannot be exported from a module. In this case, you must use `BS::thread_pool_version` instead. (Indeed, this is exactly why it was introduced in the first place.) ## Importing the library as a C++20 module ### Compiling the module If C++20 features are available, the library can be imported as a C++20 module using `import BS.thread_pool`. This is the officially recommended way to use the library, as it has many benefits, such as faster compilation times, better encapsulation, no namespace pollution, no include order issues, easier maintainability, simpler dependency management, and more. The `constexpr` flag `BS::thread_pool_module` indicates whether the thread pool library was compiled as a module. For more information on C++20 modules, please see [cppreference.com](https://en.cppreference.com/w/cpp/language/modules). The module file itself is `BS.thread_pool.cppm`, located in the `modules` folder, and it is just a thin wrapper around the header file `BS_thread_pool.hpp`. The C++20 standard does not provide a way for one file to be used as both a module and a header file, so both files are needed in order to compile the library as a module. (However, to use the library as a header file, only `BS_thread_pool.hpp` is needed.) Note that the header file `BS_thread_pool.hpp` has an underscore `_` following `BS`, for backwards compatibility with older versions of the library. However, the module file `BS.thread_pool.cppm` has a dot `.` following `BS`, to conform with the C++20 module naming convention, where dots represent hierarchy; all modules written by the author of this library will use the `BS.` prefix. This feature has been tested with the latest versions of Clang, GCC, and MSVC. Unfortunately, at the time of writing, C++20 modules are still not fully implemented in all compilers, and each compiler implements them differently. The easiest way to compile the module itself, as well as any programs that import it, is using the `compile_cpp.py` Python script provided in [the GitHub repository](https://github.com/bshoshany/thread-pool), which will automatically figure out the appropriate flags for each compiler. Please see the [next section](#compiling-with-compile_cpppy-using-import-bsthread_pool) for more information. However, if you prefer to compile manually, the module must first be compiled into a binary file, in a format specific to each compiler, as described in the following sections. Once it is compiled once and for all, this binary file (plus an object file, in MSVC) is the only file needed to import the library; the `.cppm` and `.hpp` files are no longer needed. However, any program using the module must be compiled with a flag indicating to the compiler where to find that binary file. Once the module is compiled, it can be imported using `import BS.thread_pool`. In all the examples above, you can simply replace `#include "BS_thread_pool.hpp"` with `import BS.thread_pool;` in order to import the library as a module. The only exception is the [native extensions](#native-extensions), which are enabled in the examples using a macro; as explained in that section, the macro must be defined as a compiler flag, as modules cannot access macros defined in the program that imported them. Here is a quick example: ```cpp import BS.thread_pool; BS::synced_stream sync_out; BS::thread_pool pool; int main() { pool.submit_task( [] { sync_out.println("Thread pool library successfully imported using C++20 modules!"); }) .wait(); } ``` Below we will provide the commands for compiling the library as a module and then compiling [the test program](#testing-the-library) `BS_thread_pool_test.cpp` using this module, with Clang, GCC, and MSVC, as well as with CMake. In [the GitHub repository](https://github.com/bshoshany/thread-pool), the relevant files are organized as follows: ``` ├── README.md <- this documentation file ├── include │ └── BS_thread_pool.hpp <- the header file ├── modules │ └── BS.thread_pool.cppm <- the module file ├── scripts │ └── compile_cpp.py <- the compile script (optional) └── tests └── BS_thread_pool_test.cpp <- the test program ``` In the following examples, it is assumed that the commands are executed in the root directory of the repository (the one that contains `README.md`). The compiled files will be placed in a `build` subdirectory, which should be created beforehand. ### Compiling with `compile_cpp.py` using `import BS.thread_pool` The bundled Python script [`compile_cpp.py`](#the-compile_cpppy-script) can be used to easily compile any programs that import the library as a module. The script will automatically figure out the appropriate flags for each compiler, so you do not have to worry about the details. For example, to compile the test program `BS_thread_pool_test.cpp` and have it import the `BS.thread_pool` module, simply run the following command in the root folder of the repository: ```bash python scripts/compile_cpp.py tests/BS_thread_pool_test.cpp -s=c++20 -i=include -t=release -m="BS.thread_pool=modules/BS.thread_pool.cppm,include/BS_thread_pool.hpp" -o=build/BS_thread_pool_test -d=BS_THREAD_POOL_TEST_IMPORT_MODULE -v ``` Please see [below](#the-compile_cpppy-script) for an explanation of the command line arguments. The `-d` argument defines the macro `BS_THREAD_POOL_TEST_IMPORT_MODULE`, which is used to indicate to the test program that it needs to import the library as a module instead of including the header file. **Note that this macro is only used by the test program; it is not needed when you compile your own programs.** To enable the [native extensions](#native-extensions), you should also add `-d=BS_THREAD_POOL_NATIVE_EXTENSIONS` to define the required macro. To use C++23, replace `-s=c++20` with `-s=c++23`. Since we used `-t=release`, optimization flags will be added automatically. If you now type `build/BS_thread_pool_test`, the test program will run; you can also add the argument `-r` to run it automatically after compilation. If the module was successfully imported, the test program will print the message: ```none Thread pool library imported using: import BS.thread_pool (C++20 modules). ``` For further customization, it is recommended to create a `compile_cpp.yaml` file as explained [below](#the-compile_cpppy-script). ### Compiling with Clang using `import BS.thread_pool` Note: The following instructions have only been tested using Clang v21.1.8, the latest version at the time of writing, and may not work with older versions of the compiler. To compile the module file `BS.thread_pool.cppm` with Clang, first create the `build` folder using `mkdir build`, and then run the following command in the root folder of the repository: ```bash clang++ modules/BS.thread_pool.cppm --precompile -std=c++20 -I include -o build/BS.thread_pool.pcm ``` Here is a breakdown of the compiler arguments: * `modules/BS.thread_pool.cppm`: The module file to compile. Note that it will include the file `include/BS_thread_pool.hpp` automatically. * `--precompile`: Do not run the linker, only compile the module. * `-std=c++20`: Use the C++20 standard. For C++23, use `-std=c++23`. * `-I include`: Add the `include` folder to the include path, so that the module can find the header file `BS_thread_pool.hpp`. * `-o build/BS.thread_pool.pcm`: Output the compiled module to `build/BS.thread_pool.pcm`. The extension `.pcm` is used by Clang for precompiled modules. Note that to enable the [native extensions](#native-extensions), you should add `-D BS_THREAD_POOL_NATIVE_EXTENSIONS` to define the required macro. Once the module is compiled, you can compile the test program as follows: ```bash clang++ tests/BS_thread_pool_test.cpp -fmodule-file="BS.thread_pool=build/BS.thread_pool.pcm" -std=c++20 -o build/BS_thread_pool_test -D BS_THREAD_POOL_TEST_IMPORT_MODULE ``` Here is a breakdown of the compiler arguments: * `tests/BS_thread_pool_test.cpp`: The program to compile. * `-fmodule-file="BS.thread_pool=build/BS.thread_pool.pcm"`: Specify that the module `BS.thread_pool` is located in the file `build/BS.thread_pool.pcm`. * `-std=c++20`: Same as above. * `-o build/BS_thread_pool_test`: Output the compiled program to `build/BS_thread_pool_test` (or `build/BS_thread_pool_test.exe` on Windows). * `-D BS_THREAD_POOL_TEST_IMPORT_MODULE`: Define the macro `BS_THREAD_POOL_TEST_IMPORT_MODULE`, which is used to indicate to the test program that it needs to import the library as a module instead of including the header file. **Note that this macro is only used by the test program; it is not needed when you compile your own programs.** Again, you should add `-D BS_THREAD_POOL_NATIVE_EXTENSIONS` if you wish to test the native extensions. You do not need to use the `-I` flag, since the header file is not needed, only the `.pcm` file. If you now type `build/BS_thread_pool_test`, the test program will run. If the module was successfully imported, the test program will print the message: ```none Thread pool library imported using: import BS.thread_pool (C++20 modules). ``` Of course, you should add warning, debugging, optimization, and other compiler flags to the commands above as needed. For more information about using C++20 modules with Clang, please see [the official documentation](https://clang.llvm.org/docs/StandardCPlusPlusModules.html). **Note:** At the time of writing, there is a bug in Clang with libc++ where using `std::jthread` in a C++20 module causes a compilation error. As a workaround, until the bug is fixed, the thread pool library automatically falls back to `std::thread` if it detects that Clang and libc++ are being used together with C++20 modules. This workaround can be disabled by defining `BS_THREAD_POOL_DISABLE_WORKAROUNDS` when compiling the module. **Note:** On macOS, Apple Clang does not support C++20 modules. Please either install the latest version of LLVM Clang using [Homebrew](https://formulae.brew.sh/formula/llvm), or include the library as a header file. ### Compiling with GCC using `import BS.thread_pool` Note: The following instructions have only been tested using GCC v15.2.0, the latest version at the time of writing, and may not work with older versions of the compiler. To compile the module file `BS.thread_pool.cppm` with GCC, first create the `build` folder using `mkdir build`, and then run the following command in the root folder of the repository: ```bash g++ -x c++ modules/BS.thread_pool.cppm -c "-fmodule-mapper=|@g++-mapper-server -r build" -fmodule-only -fmodules -std=c++20 -I include ``` Here is a breakdown of the compiler arguments: * `-x c++`: Treat the input file as a C++ file. This is necessary because the file has the `.cppm` extension, which is not recognized by GCC. * `modules/BS.thread_pool.cppm`: The module file to compile. Note that it will include the file `include/BS_thread_pool.hpp` automatically. * `-c`: Do not run the linker, only compile the module. * `"-fmodule-mapper=|@g++-mapper-server -r build"`: Specify to the module mapper that the compiled module should be placed in the `build` folder. This will create a file `build/BS.thread_pool.gcm`. The extension `.gcm` is used by GCC for compiled modules. * `-fmodule-only`: Do not create an object file for the module. * `-fmodules`: Enable C++20 modules. * `-std=c++20`: Use the C++20 standard. For C++23, use `-std=c++23`. * `-I include`: Add the `include` folder to the include path, so that the module can find the header file `BS_thread_pool.hpp`. Note that to enable the [native extensions](#native-extensions), you should add `-D BS_THREAD_POOL_NATIVE_EXTENSIONS` to define the required macro. Once the module is compiled, you can compile the test program as follows: ```bash g++ tests/BS_thread_pool_test.cpp "-fmodule-mapper=|@g++-mapper-server -r build" -fmodules -std=c++20 -o build/BS_thread_pool_test -D BS_THREAD_POOL_TEST_IMPORT_MODULE ``` Here is a breakdown of the compiler arguments: * `tests/BS_thread_pool_test.cpp`: The program to compile. * `"-fmodule-mapper=|@g++-mapper-server -r build"`: Specify to the module mapper that the compiled module can be found in the `build` folder. It will look for the file `build/BS.thread_pool.gcm`. * `-fmodules`, `-std=c++20`: Same as above. * `-o build/BS_thread_pool_test`: Output the compiled program to `build/BS_thread_pool_test` (or `build/BS_thread_pool_test.exe` on Windows). * `-D BS_THREAD_POOL_TEST_IMPORT_MODULE`: Define the macro `BS_THREAD_POOL_TEST_IMPORT_MODULE`, which is used to indicate to the test program that it needs to import the library as a module instead of including the header file. **Note that this macro is only used by the test program; it is not needed when you compile your own programs.** Again, you should add `-D BS_THREAD_POOL_NATIVE_EXTENSIONS` if you wish to test the native extensions. You do not need to use the `-I` flag, since the header file is not needed, only the `.gcm` file. If you now type `build/BS_thread_pool_test`, the test program will run. If the module was successfully imported, the test program will print the message: ```none Thread pool library imported using: import BS.thread_pool (C++20 modules). ``` Of course, you should add warning, debugging, optimization, and other compiler flags to the commands above as needed. For more information about using C++20 modules with GCC, please see [the official documentation](https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Modules.html). ### Compiling with MSVC using `import BS.thread_pool` Note: The following instructions have only been tested using MSVC v19.50.35721, the latest version at the time of writing, and may not work with older versions of the compiler. To compile the module file `BS.thread_pool.cppm` with MSVC, first open the Visual Studio Developer PowerShell for the appropriate CPU architecture. For example, on Visual Studio 2026, for x64 architecture, execute the following command in PowerShell in the root folder of the repository: ```pwsh & 'C:\Program Files\Microsoft Visual Studio\18\Community\Common7\Tools\Launch-VsDevShell.ps1' -Arch amd64 -HostArch amd64 -SkipAutomaticLocation ``` For ARM64, replace `amd64` with `arm64`. (Do not use the "Developer PowerShell for VS" Start Menu shortcut, as it may not use the correct CPU architecture by default.) Create the `build` folder using `mkdir build`, and then run the following command in the root folder of the repository: ```pwsh cl modules/BS.thread_pool.cppm /c /EHsc /interface /nologo /permissive- /std:c++20 /TP /Zc:__cplusplus /I include /ifcOutput build/BS.thread_pool.ifc /Fo:build/BS.thread_pool.obj ``` Here is a breakdown of the compiler arguments: * `modules/BS.thread_pool.cppm`: The module file to compile. Note that it will include the file `include/BS_thread_pool.hpp` automatically. * `/c`: Do not run the linker, only compile the module. * `/EHsc`: Enable C++ exceptions. * `/interface`: Treat the input file as a module interface unit. This is needed because MSVC expects the `.ixx` extension for the module file, but this library uses the `.cppm` extension. * `/nologo`: Do not display the compiler's banner. * `/permissive-`: Disable permissive behaviors, that is, enforce strict C++ standard conformance. * `/std:c++20`: Use the C++20 standard. For C++23, use `/std:c++latest`. * `/TP`: Treat the input file as a C++ file. This is necessary because the file has the `.cppm` extension, which is not recognized by MSVC. * `/Zc:__cplusplus`: Make the `__cplusplus` preprocessor macro correctly reflect the C++ standard being used. * `/I include`: Add the `include` folder to the include path, so that the module can find the header file `BS_thread_pool.hpp`. * `/ifcOutput build/BS.thread_pool.ifc`: Output the compiled module to `build/BS.thread_pool.ifc`. The extension `.ifc` is used by MSVC for module interface files. * `/Fo:build/BS.thread_pool.obj`: Output the compiled object file to `build/BS.thread_pool.obj`. Note that to enable the [native extensions](#native-extensions), you should add `/D BS_THREAD_POOL_NATIVE_EXTENSIONS` to define the required macro. Once the module is compiled, you can compile the test program as follows: ```pwsh cl tests/BS_thread_pool_test.cpp build/BS.thread_pool.obj /reference BS.thread_pool=build/BS.thread_pool.ifc /EHsc /nologo /permissive- /std:c++20 /Zc:__cplusplus /Fo:build/BS_thread_pool_test.obj /Fe:build/BS_thread_pool_test.exe /D BS_THREAD_POOL_TEST_IMPORT_MODULE ``` Here is a breakdown of the compiler arguments: * `tests/BS_thread_pool_test.cpp`: The program to compile. * `build/BS.thread_pool.obj`: The module object file to link to the program. * `/reference BS.thread_pool=build/BS.thread_pool.ifc`: Specify that the module `BS.thread_pool` is located in the file `build/BS.thread_pool.ifc`. * `/EHsc`, `/nologo`, `/permissive-`, `/std:c++20`, `/Zc:__cplusplus`: Same as above. * `/Fo:build/BS_thread_pool_test.obj`: Output the compiled object file to `build/BS_thread_pool_test.obj`. * `/Fe:build/BS_thread_pool_test.exe`: Output the compiled program to `build/BS_thread_pool_test.exe`. * `/D BS_THREAD_POOL_TEST_IMPORT_MODULE`: Define the macro `BS_THREAD_POOL_TEST_IMPORT_MODULE`, which is used to indicate to the test program that it needs to import the library as a module instead of including the header file. **Note that this macro is only used by the test program; it is not needed when you compile your own programs.** Again, you should add `/D BS_THREAD_POOL_NATIVE_EXTENSIONS` if you wish to test the native extensions. You do not need to use the `/I` flag, since the header file is not needed, only the `.obj` and `.ifc` files. If you now type `build/BS_thread_pool_test`, the test program will run. If the module was successfully imported, the test program will print the message: ```none Thread pool library imported using: import BS.thread_pool (C++20 modules). ``` Of course, you should add warning, debugging, optimization, and other compiler flags to the commands above as needed. For more information about using C++20 modules with MSVC, please see [this blog post](https://devblogs.microsoft.com/cppblog/using-cpp-modules-in-msvc-from-the-command-line-part-1/). ### Compiling with CMake using `import BS.thread_pool` Note: The following instructions have only been tested using CMake v4.2.1, the latest version at the time of writing, and may not work with older versions. Also, modules are currently not supported by CMake with all generators; please see the CMake documentation for more information. If you are using [CMake](https://cmake.org/), you can use `target_sources()` with `CXX_MODULES` to include the module file `BS.thread_pool.cppm`. CMake will then automatically compile the module and link it to your program. Here is an example of a `CMakeLists.txt` file that can be used to build the test program and import the thread pool library as a module: ```cmake cmake_minimum_required(VERSION 4.2.1) project(BS_thread_pool_test LANGUAGES CXX) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) if(MSVC) add_compile_options(/permissive- /Zc:__cplusplus) endif() add_library(BS_thread_pool) target_sources(BS_thread_pool PRIVATE FILE_SET CXX_MODULES FILES modules/BS.thread_pool.cppm) target_include_directories(BS_thread_pool PRIVATE include) add_executable(${PROJECT_NAME} tests/BS_thread_pool_test.cpp) target_link_libraries(${PROJECT_NAME} PRIVATE BS_thread_pool) target_compile_definitions(${PROJECT_NAME} PRIVATE BS_THREAD_POOL_TEST_IMPORT_MODULE) ``` Note that for MSVC we have to add the `/permissive-` flag to enforce strict C++ standard conformance, otherwise the test program will not compile, and `/Zc:__cplusplus`, otherwise the test program cannot detect the correct C++ version. This is handled automatically by the `if(MSVC)` block. To enable the [native extensions](#native-extensions), add the line `add_compile_definitions(BS_THREAD_POOL_NATIVE_EXTENSIONS)`. Replace `CMAKE_CXX_STANDARD 20` with `23` if you wish to use C++23 features. Place this file in the root folder of the repository, and then run the following commands: ```bash cmake -B build cmake --build build build/BS_thread_pool_test ``` For MSVC, replace the last command with `build/Debug/BS_thread_pool_test`. If the module was successfully imported, the test program will print the message: ```none Thread pool library imported using: import BS.thread_pool (C++20 modules). ``` Of course, you should add warning, debugging, optimization, and other compiler flags to the configuration above as needed. For more information about using C++20 modules with CMake, please see [the official documentation](https://cmake.org/cmake/help/latest/manual/cmake-cxxmodules.7.html). You can also instruct CMake to download the library automatically from the GitHub repository, as explained below, either using [CPM](#installing-using-cmake-with-cpm) or [`FetchContent`](#installing-using-cmake-with-fetchcontent). ## Importing the C++23 Standard Library as a module ### Enabling `import std` If C++23 features are available, the thread pool library can import the C++ Standard Library as a module using `import std`. This has the same benefits described [above](#importing-the-library-as-a-c20-module) for importing the library as a module, such as faster compilation times. To enable this feature, define the macro `BS_THREAD_POOL_IMPORT_STD` at compilation time. At the time of writing, importing the C++ Standard Library as a module is only officially supported by the following combinations of compilers and standard libraries: * Recent versions of LLVM Clang (**not** Apple Clang) with LLVM libc++. * Recent versions of GCC with libstdc++. * Recent versions of MSVC with Microsoft STL. If `BS_THREAD_POOL_IMPORT_STD` is defined, then you must also import the thread pool library itself as a module. If the library is included as a header file, this will force the program that included the header file to also import `std`, which is not desirable and can lead to compilation errors if the program `#include`s any Standard Library header files. Defining the macro before importing the module will not work, as modules cannot access macros defined in the program that imported them. Instead, you must define the macro as a compiler flag: `-D BS_THREAD_POOL_IMPORT_STD` for Clang and GCC or `/D BS_THREAD_POOL_IMPORT_STD` for MSVC. [The test program](#testing-the-library) will also import the `std` module if the macro `BS_THREAD_POOL_IMPORT_STD` is defined at compilation time. In that case, you should also enable the macro `BS_THREAD_POOL_TEST_IMPORT_MODULE` to import the thread pool library as a module. The `constexpr` flag `BS::thread_pool_import_std` indicates whether the thread pool library was compiled with `import std`. Note that the flag will be `false` if `BS_THREAD_POOL_IMPORT_STD` is defined but C++23 support is not enabled in the compiler. At the time of writing, importing the `std` module requires compiling it first. As explained in the [previous section](#importing-the-library-as-a-c20-module), using the bundled `compile_cpp.py` script is the easiest way to do this, as we show in the [next section](#compiling-with-compile_cpppy-using-import-std). However, for those who wish to compile manually, in the following sections we will explain how to do it with both Clang and MSVC, as well as with CMake. It is assumed that the reader has already read the section about importing the `BS.thread_pool` library as a module, so we omit some details here. ### Compiling with `compile_cpp.py` using `import std` The bundled Python script [`compile_cpp.py`](#the-compile_cpppy-script) can be used to easily compile any programs that import the C++ Standard Library as a module. The script will automatically figure out the appropriate flags for each compiler, so you do not have to worry about the details. For example, to compile the test program `BS_thread_pool_test.cpp` and have it import both the `BS.thread_pool` module and the `std` module, simply run the following command in the root folder of the repository: ```bash python scripts/compile_cpp.py tests/BS_thread_pool_test.cpp -s=c++23 -i=include -t=release -m="BS.thread_pool=modules/BS.thread_pool.cppm,include/BS_thread_pool.hpp" -o=build/BS_thread_pool_test -d=BS_THREAD_POOL_TEST_IMPORT_MODULE -d=BS_THREAD_POOL_IMPORT_STD -u=auto -v ``` Please see [below](#the-compile_cpppy-script) for an explanation of the command line arguments. The differences between this command and the one we used for [importing the thread pool library as a module](#compiling-with-compile_cpppy-using-import-bsthread_pool) are: * Changed `-s=c++20` to `-s=c++23` so we can use the C++23 standard. * Added `-d=BS_THREAD_POOL_IMPORT_STD` to define the required macro. * Added `-u=auto` to automatically detect the location of the `std` module. If this doesn't work, you will need to specify the path manually. To enable the [native extensions](#native-extensions), you should also add `-d=BS_THREAD_POOL_NATIVE_EXTENSIONS` to define the required macro. If you now type `build/BS_thread_pool_test`, the test program will run. If the `std` module was successfully imported, the test program will print the message: ```none C++ Standard Library imported using: * Thread pool library: import std (C++23 std module). * Test program: import std (C++23 std module). ``` For further customization, it is recommended to create a `compile_cpp.yaml` file as explained [below](#the-compile_cpppy-script). ### Compiling with Clang and LLVM libc++ using `import std` Note: The following instructions have only been tested using Clang v21.1.8 and LLVM libc++ v21.1.8, the latest versions at the time of writing, and may not work with older versions. Before compiling the `std` module, you must find the file `std.cppm`: * On Windows, libc++ is most likely installed via [MSYS2](https://www.msys2.org/), so the `std` module should be at `C:\msys64\clang64\share\libc++\v1\std.cppm`. If you did not install MSYS2 in `C:\msys64`, replace that with the correct path. If you installed libc++ without MSYS2, locate `std.cppm` manually in the installation folder. * On Linux, the `std` module should be at `/usr/lib/llvm-/share/libc++/v1/std.cppm`. Replace `` with the major version number of libc++. If you installed libc++ in a different folder, locate `std.cppm` manually in that folder. * On macOS with the ([Homebrew build](https://formulae.brew.sh/formula/llvm)), the `std` module should be at `/usr/local/Cellar/llvm//share/libc++/v1/std.cppm`. Replace `` with the full version number of libc++. If you installed libc++ in a different folder, locate `std.cppm` manually in that folder. To compile the module file `std.cppm` with Clang, first create the `build` folder using `mkdir build`, and then run the following command in the root folder of the repository: ```bash clang++ "path to std.cppm" --precompile -std=c++23 -o build/std.pcm -Wno-reserved-module-identifier ``` Of course, you should replace `"path to std.cppm"` with the actual path. The compiler arguments are explained [above](#compiling-with-clang-using-import-bsthread_pool). The additional argument `-Wno-reserved-module-identifier` is needed to silence a false-positive warning. Next, compile the `BS.thread_pool` module as [above](#compiling-with-clang-using-import-bsthread_pool), but with `-std=c++23` and the following additional flags: * `-fmodule-file="std=build/std.pcm"`: Specify that the module `std` is located in the file `build/std.pcm`. * `-D BS_THREAD_POOL_IMPORT_STD`: Instruct the library to import the `std` module. ```bash clang++ modules/BS.thread_pool.cppm --precompile -fmodule-file="std=build/std.pcm" -std=c++23 -I include -o build/BS.thread_pool.pcm -D BS_THREAD_POOL_IMPORT_STD ``` Add `-D BS_THREAD_POOL_NATIVE_EXTENSIONS` if you wish to enable the [native extensions](#native-extensions). Once the module is compiled, you can compile the test program as follows: ```bash clang++ tests/BS_thread_pool_test.cpp -fmodule-file="std=build/std.pcm" -fmodule-file="BS.thread_pool=build/BS.thread_pool.pcm" -std=c++23 -o build/BS_thread_pool_test -D BS_THREAD_POOL_TEST_IMPORT_MODULE -D BS_THREAD_POOL_IMPORT_STD ``` Again, you should add `-D BS_THREAD_POOL_NATIVE_EXTENSIONS` if you wish to test the native extensions. If you now type `build/BS_thread_pool_test`, the test program will run. If the `std` module was successfully imported, the test program will print the message: ```none C++ Standard Library imported using: * Thread pool library: import std (C++23 std module). * Test program: import std (C++23 std module). ``` ### Compiling with GCC and GNU libstdc++ using `import std` Note: The following instructions have only been tested using GCC v15.2.0 and GNU libstdc++ v15 (20250917), the latest versions at the time of writing, and may not work with older versions. With GNU libstdc++, the `std` module file is always available as the system module `bits/std.cc`. To compile this module file with GCC, first create the `build` folder using `mkdir build`, and then run the following command in the root folder of the repository: ```bash g++ -fsearch-include-path bits/std.cc -c "-fmodule-mapper=|@g++-mapper-server -r build" -fmodule-only -fmodules -std=c++23 -I include ``` The compiler arguments are explained [above](#compiling-with-gcc-using-import-bsthread_pool). The additional argument `-fsearch-include-path` is needed to tell the compiler to look for `bits/std.cc` in the include path (otherwise it will assume it is in the current directory). Next, compile the `BS.thread_pool` module as [above](#compiling-with-gcc-using-import-bsthread_pool), but with `-std=c++23` and the following additional flags: * `-D BS_THREAD_POOL_IMPORT_STD`: Instruct the library to import the `std` module. ```bash g++ -x c++ modules/BS.thread_pool.cppm -c "-fmodule-mapper=|@g++-mapper-server -r build" -fmodule-only -fmodules -std=c++23 -I include -D BS_THREAD_POOL_IMPORT_STD ``` Add `-D BS_THREAD_POOL_NATIVE_EXTENSIONS` if you wish to enable the [native extensions](#native-extensions). Once the module is compiled, you can compile the test program as follows: ```bash g++ tests/BS_thread_pool_test.cpp "-fmodule-mapper=|@g++-mapper-server -r build" -fmodules -std=c++23 -o build/BS_thread_pool_test -D BS_THREAD_POOL_TEST_IMPORT_MODULE -D BS_THREAD_POOL_IMPORT_STD ``` Again, you should add `-D BS_THREAD_POOL_NATIVE_EXTENSIONS` if you wish to test the native extensions. If you now type `build/BS_thread_pool_test`, the test program will run. If the `std` module was successfully imported, the test program will print the message: ```none C++ Standard Library imported using: * Thread pool library: import std (C++23 std module). * Test program: import std (C++23 std module). ``` **NOTE:** At the time of writing, there is a bug when using GCC with libstdc++ on Windows via MSYS2 where the `BS.thread_pool` module doesn't compile if both native extensions and `import std` are enabled. As a workaround, until the bug is fixed, the thread pool library automatically falls back to header files if it detects that GCC and libstdc++ are being used together with the C++23 `std` module on Windows. This workaround can be disabled by defining `BS_THREAD_POOL_DISABLE_WORKAROUNDS` when compiling the module. ### Compiling with MSVC and Microsoft STL using `import std` Note: The following instructions have only been tested using MSVC v19.50.35721 and Microsoft STL v145 (202508), the latest versions at the time of writing, and may not work with older versions. Before compiling the `std` module, you must find the file `std.ixx`. If you have Visual Studio 2026, it should be located in the folder `C:\Program Files\Microsoft Visual Studio\18\Community\VC\Tools\MSVC\\modules`. Replace `` with the full version number of the MSVC runtime library; the latest is `14.50.35717` at the time of writing. If you installed Visual Studio in a different folder, locate `std.ixx` manually in that folder. To compile the module file `std.ixx` with MSVC, first open the Visual Studio Developer PowerShell for the appropriate CPU architecture as explained [above](#compiling-with-msvc-using-import-bsthread_pool). Navigate to the repository folder, create the `build` folder using `mkdir build`, and then run the following command in the root folder of the repository: ```pwsh cl "path to std.ixx" /c /EHsc /nologo /permissive- /std:c++latest /Zc:__cplusplus /ifcOutput build/std.ifc /Fo:build/std.obj ``` Of course, you should replace `"path to std.ixx"` with the actual path. The compiler arguments are explained [above](#compiling-with-msvc-using-import-bsthread_pool). Next, compile the `BS.thread_pool` module as [above](#compiling-with-msvc-using-import-bsthread_pool), but with the following additional flags: * `/reference std=build/std.ifc`: Specify that the module `std` is located in the file `build/std.ifc`. * `/D BS_THREAD_POOL_IMPORT_STD`: Instruct the library to import the `std` module. ```pwsh cl modules/BS.thread_pool.cppm /reference std=build/std.ifc /c /EHsc /interface /nologo /permissive- /std:c++latest /TP /Zc:__cplusplus /I include /ifcOutput build/BS.thread_pool.ifc /Fo:build/BS.thread_pool.obj /D BS_THREAD_POOL_IMPORT_STD ``` Add `/D BS_THREAD_POOL_NATIVE_EXTENSIONS` if you wish to enable the [native extensions](#native-extensions). Once the module is compiled, you can compile the test program as follows (note that we added `build/std.obj` to link with the `std` module): ```pwsh cl tests/BS_thread_pool_test.cpp build/std.obj build/BS.thread_pool.obj /reference std=build/std.ifc /reference BS.thread_pool=build/BS.thread_pool.ifc /EHsc /nologo /permissive- /std:c++latest /Zc:__cplusplus /Fo:build/BS_thread_pool_test.obj /Fe:build/BS_thread_pool_test.exe /D BS_THREAD_POOL_TEST_IMPORT_MODULE /D BS_THREAD_POOL_IMPORT_STD ``` Again, you should add `/D BS_THREAD_POOL_NATIVE_EXTENSIONS` if you wish to test the native extensions. If you now type `build/BS_thread_pool_test`, the test program will run. If the `std` module was successfully imported, the test program will print the message: ```none C++ Standard Library imported using: * Thread pool library: import std (C++23 std module). * Test program: import std (C++23 std module). ``` ### Compiling with CMake using `import std` Note: The following instructions have only been tested using CMake v4.2.1, the latest version at the time of writing, and may not work with older versions. Also, modules are currently not supported by CMake with all generators; please see the CMake documentation for more information. If you are using [CMake](https://cmake.org/), you can enable `CMAKE_EXPERIMENTAL_CXX_IMPORT_STD` to automatically compile the `std` module, provided the compiler and standard library support it. Here is an example of a `CMakeLists.txt` file that can be used to build the test program, import the thread pool library as a module, and import the C++ Standard Library as a module: ```cmake cmake_minimum_required(VERSION 4.2.1) project(BS_thread_pool_test LANGUAGES CXX) set(CMAKE_CXX_STANDARD 23) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_EXPERIMENTAL_CXX_IMPORT_STD ON) add_compile_definitions(BS_THREAD_POOL_IMPORT_STD) if(MSVC) add_compile_options(/permissive- /Zc:__cplusplus) endif() add_library(BS_thread_pool) target_sources(BS_thread_pool PRIVATE FILE_SET CXX_MODULES FILES modules/BS.thread_pool.cppm) target_include_directories(BS_thread_pool PRIVATE include) add_executable(${PROJECT_NAME} tests/BS_thread_pool_test.cpp) target_link_libraries(${PROJECT_NAME} PRIVATE BS_thread_pool) target_compile_definitions(${PROJECT_NAME} PRIVATE BS_THREAD_POOL_TEST_IMPORT_MODULE) ``` The `if(MSVC)` block is explained [above](#compiling-with-msvc-using-import-bsthread_pool). To enable the [native extensions](#native-extensions), add the macro `BS_THREAD_POOL_NATIVE_EXTENSIONS` to `add_compile_definitions()`. Place this file in the root folder of the repository, and then run the following commands: ```bash cmake -B build cmake --build build build/BS_thread_pool_test ``` For MSVC, replace the last command with `build/Debug/BS_thread_pool_test`. If the `std` module was successfully imported, the test program will print the message: ```none C++ Standard Library imported using: * Thread pool library: import std (C++23 std module). * Test program: import std (C++23 std module). ``` You can also instruct CMake to download the library automatically from the GitHub repository, as explained below, either using [CPM](#installing-using-cmake-with-cpm) or [`FetchContent`](#installing-using-cmake-with-fetchcontent). ## Installing the library using package managers ### Installing using vcpkg If you are using the [vcpkg](https://vcpkg.io/) C/C++ package manager, you can easily install `BS::thread_pool` with the following command: ```bash vcpkg install bshoshany-thread-pool ``` To update the package to the latest version, run: ```bash vcpkg upgrade ``` Please refer to [this package's page on vcpkg.io](https://vcpkg.io/en/package/bshoshany-thread-pool) for more information. ### Installing using Meson If you are using the [Meson](https://mesonbuild.com/) build system, you can install `BS::thread_pool` from [WrapDB](https://mesonbuild.com/Wrapdb-projects.html). To do so, create a `subprojects` folder in your project (if it does not already exist) and run the following command: ```bash meson wrap install bshoshany-thread-pool ``` Then, use `dependency('bshoshany-thread-pool')` in your `meson.build` file to include the package. To update the package to the latest version, run: ```bash meson wrap update bshoshany-thread-pool ``` ### Installing using Conan If you are using the [Conan](https://conan.io/) C/C++ package manager, you can easily integrate `BS::thread_pool` into your project by adding the following lines to your `conanfile.txt`: ```ini [requires] bshoshany-thread-pool/5.1.0 ``` To update the package to the latest version, simply change the version number. Please refer to [this package's page on ConanCenter](https://conan.io/center/recipes/bshoshany-thread-pool) for more information. ### Installing using CMake with CPM Note: The following instructions have only been tested using CMake v4.2.1 and CPM v0.42.0, the latest versions at the time of writing, and may not work with older versions. If you are using [CMake](https://cmake.org/), you can install `BS::thread_pool` most easily with [CPM](https://github.com/cpm-cmake/CPM.cmake). If CPM is already installed, simply add the following to your project's `CMakeLists.txt`: ```cmake CPMAddPackage( NAME BS_thread_pool GITHUB_REPOSITORY bshoshany/thread-pool VERSION 5.1.0 EXCLUDE_FROM_ALL SYSTEM ) add_library(BS_thread_pool INTERFACE) target_include_directories(BS_thread_pool INTERFACE ${BS_thread_pool_SOURCE_DIR}/include) ``` This will automatically download the indicated version of the package from [the GitHub repository](https://github.com/bshoshany/thread-pool) and include it in your project. A convenient shorthand for GitHub packages also exists, in which case `CPMAddPackage()` can be called with a single argument of the form `"gh:user/name@version"`. After that, `CPM_LAST_PACKAGE_NAME` will be set to the name of the package, so we need to use this variable to define the include folder. This results in a more compact configuration: ```cmake CPMAddPackage("gh:bshoshany/thread-pool@5.1.0") add_library(BS_thread_pool INTERFACE) target_include_directories(BS_thread_pool INTERFACE ${${CPM_LAST_PACKAGE_NAME}_SOURCE_DIR}/include) ``` It is also possible to use CPM without installing it first, by adding the following lines to `CMakeLists.txt` before `CPMAddPackage()`: ```cmake set(CPM_DOWNLOAD_LOCATION ${CMAKE_BINARY_DIR}/CPM.cmake) if(NOT(EXISTS ${CPM_DOWNLOAD_LOCATION})) file(DOWNLOAD https://github.com/cpm-cmake/CPM.cmake/releases/latest/download/CPM.cmake ${CPM_DOWNLOAD_LOCATION}) endif() include(${CPM_DOWNLOAD_LOCATION}) ``` Here is an example of a complete `CMakeLists.txt` which automatically downloads and compiles the test program [`BS_thread_pool_test.cpp`](#testing-the-library): ```cmake cmake_minimum_required(VERSION 4.2.1) project(BS_thread_pool_test LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) if(MSVC) add_compile_options(/permissive- /Zc:__cplusplus) endif() set(CPM_DOWNLOAD_LOCATION ${CMAKE_BINARY_DIR}/CPM.cmake) if(NOT(EXISTS ${CPM_DOWNLOAD_LOCATION})) file(DOWNLOAD https://github.com/cpm-cmake/CPM.cmake/releases/latest/download/CPM.cmake ${CPM_DOWNLOAD_LOCATION}) endif() include(${CPM_DOWNLOAD_LOCATION}) CPMAddPackage("gh:bshoshany/thread-pool@5.1.0") add_library(BS_thread_pool INTERFACE) target_include_directories(BS_thread_pool INTERFACE ${${CPM_LAST_PACKAGE_NAME}_SOURCE_DIR}/include) add_executable(${PROJECT_NAME} ${${CPM_LAST_PACKAGE_NAME}_SOURCE_DIR}/tests/BS_thread_pool_test.cpp) target_link_libraries(${PROJECT_NAME} PRIVATE BS_thread_pool) ``` The `if(MSVC)` block is explained [above](#compiling-with-msvc-using-import-bsthread_pool). To enable the [native extensions](#native-extensions), add the line `add_compile_definitions(BS_THREAD_POOL_NATIVE_EXTENSIONS)`. Replace `CMAKE_CXX_STANDARD 17` with `20` or `23` if you wish to use C++20 or C++23 features, respectively. Of course, you should add warning, debugging, optimization, and other compiler flags to the configuration above as needed. With this `CMakeLists.txt` in an empty folder, type the following commands to build and run the project: ```bash cmake -B build cmake --build build build/BS_thread_pool_test ``` For MSVC, replace the last command with `build/Debug/BS_thread_pool_test`. Please see [here](#compiling-with-cmake-using-import-bsthread_pool) for instructions on how to import the library as a C++20 module with CMake, and [here](#compiling-with-cmake-using-import-std) for instructions on how to import the C++ Standard Library as a module with CMake. ### Installing using CMake with `FetchContent` Note: The following instructions have only been tested using CMake v4.2.1, the latest version at the time of writing, and may not work with older versions. If you are using [CMake](https://cmake.org/) but do not wish to use 3rd-party tools, you can also install `BS::thread_pool` using the built-in [`FetchContent`](https://cmake.org/cmake/help/latest/module/FetchContent.html) module. Here is an example of a complete `CMakeLists.txt` which automatically downloads and compiles the test program, as in the previous section, but this time using `FetchContent` directly: ```cmake cmake_minimum_required(VERSION 4.2.1) project(BS_thread_pool_test LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) if(MSVC) add_compile_options(/permissive- /Zc:__cplusplus) endif() include(FetchContent) set(FETCHCONTENT_UPDATES_DISCONNECTED ON) FetchContent_Declare( bshoshany_thread_pool GIT_REPOSITORY https://github.com/bshoshany/thread-pool.git GIT_TAG v5.1.0 DOWNLOAD_EXTRACT_TIMESTAMP TRUE EXCLUDE_FROM_ALL SYSTEM ) FetchContent_MakeAvailable(bshoshany_thread_pool) add_library(BS_thread_pool INTERFACE) target_include_directories(BS_thread_pool INTERFACE ${bshoshany_thread_pool_SOURCE_DIR}/include) add_executable(${PROJECT_NAME} ${bshoshany_thread_pool_SOURCE_DIR}/tests/BS_thread_pool_test.cpp) target_link_libraries(${PROJECT_NAME} PRIVATE BS_thread_pool) ``` ## Complete library reference This section provides a complete reference for all classes and functions available in this library, along with other important information. Functions are given with simplified prototypes (e.g. removing `const`) for ease of reading. Explanations are kept brief, as the purpose of this section is only to provide a quick reference; for more detailed information and usage examples, please refer to the full documentation above. Descriptions of each item can also be found in the [Doxygen](https://www.doxygen.nl/) comments embedded in the source code. Any modern IDE, such as [Visual Studio Code](https://code.visualstudio.com/), can use these Doxygen comments to provide automatic documentation for any class and member function in this library when hovering over code with the mouse or using auto-complete. ### The `BS::thread_pool` class template `BS::thread_pool` is the main thread pool class. It is used to create a pool of threads that continuously execute tasks submitted to a queue. It can take template parameters, which enable optional features as described [below](#optional-features-and-the-template-parameter). The member functions that are available by default, when no template parameters are used, are: * Constructors: * `thread_pool()`: Construct a new thread pool with a number of threads equal to `std::thread::hardware_concurrency()`, or if the native extensions are enabled, the number of threads available to the process as obtained from `BS::get_os_process_affinity()`. * `thread_pool(std::size_t num_threads)`: Construct a new thread pool with the specified number of threads. * `thread_pool(F&& init)`: Construct a new thread pool with the default number of threads and the specified initialization function. `F` is a template parameter. * `thread_pool(std::size_t num_threads, F&& init)`: Construct a new thread pool with the specified number of threads and the specified initialization function. * Resetters: * `void reset()`: Reset the pool with the default number of threads (as if constructed with the default constructor). Waits for all tasks first; if pausing is enabled, waits only for running tasks, and queued tasks resume after the pool is reset. If the pool was paused before resetting it, the new pool will be paused as well. * `void reset(std::size_t num_threads)`: Reset the pool with a new number of threads. * `void reset(F&& init)`: Reset the pool with the default number of threads and a new initialization function. `F` is a template parameter. * `void reset(std::size_t num_threads, F&& init)`: Reset the pool with a new number of threads and a new initialization function. * Setters: * `void set_cleanup_func(F&& cleanup)`: Set the thread pool's cleanup function. `F` is a template parameter. * Getters: * `std::size_t get_tasks_queued()`: Get the number of tasks currently waiting in the queue to be executed by the threads. * `std::size_t get_tasks_running()`: Get the number of tasks currently being executed by the threads. * `std::size_t get_tasks_total()`: Get the total number of unfinished tasks: either still waiting in the queue, or running in a thread. Note that `get_tasks_total() == get_tasks_queued() + get_tasks_running()`. * `std::size_t get_thread_count()`: Get the number of threads in the pool. * `std::vector get_thread_ids()`: Get a vector containing the unique identifiers for each of the pool's threads, as obtained by `std::thread::get_id()` (or `std::jthread::get_id()` in C++20 and later). * Task submission without futures (`T1`, `T2`, `F`, `C`, and `I` are template parameters): * `void detach_task(F&& task)`: Submit a function with no arguments and no return value into the task queue. To submit a function with arguments, enclose it in a lambda expression. * `void detach_blocks(T1 first_index, T2 index_after_last, F&& block, std::size_t num_blocks = 0)`: Parallelize a loop by automatically splitting it into blocks. The block function takes two arguments, the start and end of the block, so that it is only called once per block, but it is up to the user to make sure the block function correctly deals with all the indices in each block. * `void detach_loop(T1 first_index, T2 index_after_last, F&& loop, std::size_t num_blocks = 0)`: Parallelize a loop by automatically splitting it into blocks. The loop function takes one argument, the loop index, so that it is called many times per block. * `void detach_sequence(T1 first_index, T2 index_after_last, F&& sequence)`: Submit a sequence of tasks enumerated by indices to the queue. The sequence function takes one argument, the task index, and will be called once per index. * `void detach_bulk(C& container)`: Submit a container of functions with no arguments and no return values to the queue. * `void detach_bulk(I first, I last)`: Submit an iterator range containing functions with no arguments and no return values to the queue. * Task submission with futures (`T1`, `T2`, `F`, `R`, `C`, and `I` are template parameters): * `std::future submit_task(F&& task)`: Submit a function with no arguments into the task queue. To submit a function with arguments, enclose it in a lambda expression. * `BS::multi_future submit_blocks(T1 first_index, T2 index_after_last, F&& block, std::size_t num_blocks = 0)`: Parallelize a loop by automatically splitting it into blocks. The block function takes two arguments, the start and end of the block, so that it is only called once per block, but it is up to the user to make sure the block function correctly deals with all the indices in each block. Returns a `BS::multi_future` that contains the futures for all the blocks. * `BS::multi_future submit_loop(T1 first_index, T2 index_after_last, F&& loop, std::size_t num_blocks = 0)`: Parallelize a loop by automatically splitting it into blocks. The loop function takes one argument, the loop index, so that it is called many times per block. It must have no return value. Returns a `BS::multi_future` that contains the futures for all the blocks. * `BS::multi_future submit_sequence(T1 first_index, T2 index_after_last, F&& sequence)`: Submit a sequence of tasks enumerated by indices to the queue. The sequence function takes one argument, the task index, and will be called once per index. Returns a `BS::multi_future` that contains the futures for all the tasks. * `BS::multi_future submit_bulk(C& container)`: Submit a container of functions with no arguments to the queue. Returns a `BS::multi_future` that contains the futures for all the tasks. * `BS::multi_future submit_bulk(I first, I last)`: Submit an iterator range containing functions with no arguments to the queue. Returns a `BS::multi_future` that contains the futures for all the tasks. * Task management: * `void purge()`: Purge all the tasks waiting in the queue. Please note that there is no way to restore the purged tasks. * Waiting for tasks (`R`, `P`, `C`, and `D` are template parameters): * `void wait()`: Wait for all tasks to be completed, both those that are currently running in the threads and those that are still waiting in the queue. * `bool wait_for(std::chrono::duration& duration)`: Wait for tasks to be completed, but stop waiting after the specified duration has passed. Returns `true` if all tasks finished running, `false` if the duration expired but some tasks are still running. * `bool wait_until(std::chrono::time_point& timeout_time)`: Wait for tasks to be completed, but stop waiting after the specified time point has been reached. Returns `true` if all tasks finished running, `false` if the time point was reached but some tasks are still running. * Destructor: * `~thread_pool()`: Wait for all tasks to complete, then destroy all threads. If a cleanup function was set, it will run in each thread right before it is destroyed. ### Optional features and the template parameter The thread pool has several optional features that must be explicitly enabled by passing a template parameter. The template parameter is a bitmask, so you can enable several features at once by combining them with the bitwise OR operator `|`. The bitmask flags are members of the `BS::tp` enumeration class. * **Task priority:** Enabled by turning on the `BS::tp::priority` flag in the template parameter. When enabled, the static member `priority_enabled` will be set to `true`. * When enabled, the priority of a task or group of tasks may be specified as an additional argument (at the end of the argument list) to all detach and submit functions. If the priority is not specified, the default value will be 0. * The priority is of type `BS::priority_t`, a signed 8-bit integer, with values between -128 and +127. The tasks will be executed in priority order from highest to lowest. Groups of parallelized tasks will all have the same priority. * The enumeration `BS::pr` contains some pre-defined priorities: `BS::pr::highest`, `BS::pr::high`, `BS::pr::normal`, `BS::pr::low`, and `BS::pr::lowest`. * **Pausing:** Enabled by turning on the `BS::tp::pause` flag in the template parameter. When enabled, the static member `pause_enabled` will be set to `true`. Adds the following member functions: * `void pause()`: Pause the pool. The workers will temporarily stop retrieving new tasks out of the queue, although any tasks already executing will keep running until they are finished. * `void unpause()`: Unpause the pool. The workers will resume retrieving new tasks out of the queue. * `bool is_paused()`: Check whether the pool is currently paused. * **Wait deadlock checks:** Enabled by turning on the `BS::tp::wait_deadlock_checks` flag in the template parameter. When enabled, the static member `wait_deadlock_checks_enabled` will be set to `true`. * When enabled, `wait()`, `wait_for()`, and `wait_until()` will check whether the user tried to call them from within a thread of the same pool, which would result in a deadlock. If so, they will throw the exception `BS::wait_deadlock` instead of waiting. * If the feature-test macro `__cpp_exceptions` is undefined, wait deadlock checks will be automatically disabled, and trying to enable this feature will result in a compilation error. Convenience aliases are defined as follows: * `BS::light_thread_pool` disables all optional features (equivalent to `BS::thread_pool` with the default template parameter, that is, `BS::thread_pool`). * `BS::priority_thread_pool` enables task priority (equivalent to `BS::thread_pool`). * `BS::pause_thread_pool` enables pausing the pool (equivalent to `BS::thread_pool`). * `BS::wdc_thread_pool` enables wait deadlock checks (equivalent to `BS::thread_pool`). ### The `BS::this_thread` class The class `BS::this_thread` provides functionality analogous to `std::this_thread`. It contains the following static member functions: * `static std::optional get_index()`: Get the index of the current thread. The optional object will not have a value if the thread is not in a pool. * `static std::optional get_pool()`: Get a pointer to the thread pool that owns the current thread. The optional object will not have a value if the thread is not in a pool. If the [native extensions](#the-native-extensions) are enabled, the class will contain additional static member functions. Please see the relevant section for more information. ### The native extensions The native extensions may be enabled by defining the macro `BS_THREAD_POOL_NATIVE_EXTENSIONS` at compilation time. If including the library as a header file, the macro must be defined before `#include "BS_thread_pool.hpp"`. If importing the library as a C++20 module, the macro must be defined as a compiler flag. The native extensions use the operating system's native API, and are thus not portable; however, they should work on Windows, Linux, and macOS. The native extensions add the following functions to the `BS` namespace: * `bool BS::set_os_process_affinity(std::vector& affinity)`: Set the processor affinity of the current process. The argument is an `std::vector` where each element corresponds to a logical processor. Returns `true` if the affinity was set successfully, `false` otherwise. Does not work on macOS. * `std::optional> BS::get_os_process_affinity()`: Get the processor affinity of the current process. The optional object will not have a value if the affinity could not be determined. Does not work on macOS. * `bool BS::set_os_process_priority(BS::os_process_priority priority)`: Set the priority of the current process. The argument must be a member of the `BS::os_process_priority` enumeration, which contains the options `idle`, `below_normal`, `normal`, `above_normal`, `high`, and `realtime`. Returns `true` if the priority was set successfully, or `false` otherwise. * `std::optional BS::get_os_process_priority()`: Get the priority of the current process. The optional object will not have a value if the priority could not be determined, or it is not one of the pre-defined values in the `BS::os_process_priority` enumeration. The native extensions also add the following static member functions to `BS::this_thread`: * `bool BS::this_thread::set_os_thread_affinity(std::vector& affinity)`: Set the processor affinity of the current thread. The argument is an `std::vector` where each element corresponds to a logical processor. Note that the thread affinity must be a subset of the process affinity for the containing process of a thread. Does not work on macOS and Android. * `std::optional> BS::this_thread::get_os_thread_affinity()`: Get the processor affinity of the current thread. The optional object will not have a value if the affinity could not be determined. Does not work on macOS and Android. * `bool BS::this_thread::set_os_thread_name(std::string& name)`: Set the name of the current thread. Note that on Linux thread names are limited to 16 characters, including the null terminator. Returns `true` if the name was set successfully, `false` otherwise. * `std::optional BS::this_thread::get_os_thread_name()`: Get the name of the current thread. The optional object will not have a value if the name could not be determined. * `bool BS::this_thread::set_os_thread_priority(BS::os_thread_priority priority)`: Set the priority of the current thread. The argument must be a member of the `BS::os_thread_priority` enumeration, which contains the options `idle`, `lowest`, `below_normal`, `normal`, `above_normal`, `highest`, and `realtime`. Returns `true` if the priority was set successfully, or `false` otherwise. * `std::optional BS::this_thread::get_os_thread_priority()`: Get the priority of the current thread. The optional object will not have a value if the priority could not be determined, or it is not one of the pre-defined values in the `BS::os_thread_priority` enumeration. Finally, the native extensions add the following member function to `BS::thread_pool`: * `std::vector get_native_handles()`: Get a vector containing the underlying implementation-defined thread handles for each of the pool's threads. ### The `BS::multi_future` class `BS::multi_future` is a helper class used to facilitate waiting for and/or getting the results of multiple futures at once. It is defined as a specialization of `std::vector>`. This means that all of the member functions that can be used on an [`std::vector>`](https://en.cppreference.com/w/cpp/container/vector) can also be used on a `BS::multi_future`. For example, you may use a range-based for loop with a `BS::multi_future`, since it has iterators. In addition to inherited member functions, `BS::multi_future` has the following specialized member functions (`R` and `P`, `C`, and `D` are template parameters): * `[void or std::vector] get()`: Get the results from all the futures stored in this `BS::multi_future`, rethrowing any stored exceptions. If the futures return `void`, this function returns `void` as well. If the futures return a type `T`, this function returns a vector containing the results. * `std::size_t ready_count()`: Check how many of the futures stored in this `BS::multi_future` are ready. * `bool valid()`: Check if all the futures stored in this `BS::multi_future` are valid. * `void wait()`: Wait for all the futures stored in this `BS::multi_future`. * `bool wait_for(std::chrono::duration& duration)`: Wait for all the futures stored in this `BS::multi_future`, but stop waiting after the specified duration has passed. Returns `true` if all futures have been waited for before the duration expired, `false` otherwise. * `bool wait_until(std::chrono::time_point& timeout_time)`: Wait for all the futures stored in this `BS::multi_future` object, but stop waiting after the specified time point has been reached. Returns `true` if all futures have been waited for before the time point was reached, `false` otherwise. ### The `BS::synced_stream` class `BS::synced_stream` is a utility class which can be used to synchronize printing to one or more output streams by different threads. It has the following member functions (`T` is a template parameter pack): * `synced_stream()`: Construct a new synced stream which prints to `std::cout`. * `synced_stream(T&... streams)`: Construct a new synced stream which prints to the given output streams. * `void add_stream(std::ostream& stream)`: Add a stream to the list of output streams. * `std::vector& get_streams()`: Get a reference to a vector containing pointers to the output streams to print to. * `void print(T&... items)`: Print any number of items into the output streams. Ensures that no other threads print to the streams simultaneously, as long as they all exclusively use the same `BS::synced_stream` object to print. * `void println(T&&... items)`: Print any number of items into the output streams, followed by a newline character. * `void remove_stream(std::ostream& stream)`: Remove a stream from the list of output streams. In addition, the class comes with two stream manipulators, which are meant to help the compiler figure out which template specializations to use with the class: * `BS::synced_stream::endl`: An explicit cast of `std::endl`. Prints a newline character to the stream, and then flushes it. Should only be used if flushing is desired, otherwise a newline character should be used instead. * `BS::synced_stream::flush`: An explicit cast of `std::flush`. Used to flush the stream. ### The `BS::version` class `BS::version` is a utility class used to represent a version number. It has public members `major`, `minor`, and `patch`, as well as the following member functions: * `constexpr version(std::uint64_t major, std::uint64_t minor, std::uint64_t patch)`: Construct a new version object with the specified major, minor, and patch numbers. * `std::strong_ordering operator<=>(version&)`: 3-way comparison operator for two version numbers, in C++20 and later. In C++17, the operators `==`, `!=`, `<`, `<=`, `>`, `>=` are instead defined explicitly. * `std::string to_string()`: Convert the version number to a string in the format `"major.minor.patch"`. * `std::ostream& operator<<(std::ostream& stream, version& ver)`: Output the version string to a stream. In addition, the library defines a `constexpr` object `BS::thread_pool_version` of type `BS::version`, which can be used to check the version of the library at compilation time. Note that this feature is only available starting with v5.0.0 of the library; previous versions used the macros `BS_THREAD_POOL_VERSION_MAJOR`, `BS_THREAD_POOL_VERSION_MINOR`, and `BS_THREAD_POOL_VERSION_PATCH`, which are still defined for compatibility purposes, but are not accessible if the library is imported as a C++20 module. ### Diagnostic variables The library defines the following `constexpr` variables: * `bool thread_pool_import_std`: Indicates whether the library imported the C++23 Standard Library module using `import std`. * `bool thread_pool_module`: Indicates whether the library was compiled as a C++20 module. * `bool thread_pool_native_extensions`: Indicates whether the native extensions are enabled. ### All names exported by the C++20 module When the library is imported as a C++20 module using `import BS.thread_pool`, it exports the following names, in alphabetical order: * `BS::common_index_type_t` * `BS::light_thread_pool` * `BS::multi_future` * `BS::pause_thread_pool` * `BS::pr` * `BS::priority_t` * `BS::priority_thread_pool` * `BS::synced_stream` * `BS::this_thread` * `BS::thread_pool` * `BS::thread_pool_import_std` * `BS::thread_pool_module` * `BS::thread_pool_native_extensions` * `BS::thread_pool_version` * `BS::tp` (plus related bitwise operators) * `BS::version` * `BS::wdc_thread_pool` If exceptions are enabled, the following names are also exported: * `BS::wait_deadlock` If the native extensions are enabled, the following names are also exported: * `BS::get_os_process_affinity` * `BS::get_os_process_priority` * `BS::os_process_priority` * `BS::os_thread_priority` * `BS::set_os_process_affinity` * `BS::set_os_process_priority` ## Development tools ### The `compile_cpp.py` script The Python script `compile_cpp.py`, in the `scripts` folder of [the GitHub repository](https://github.com/bshoshany/thread-pool), can be used to compile any C++ source file with different compilers on different platforms. It has only been tested using Python v3.14.2, the latest version at the time of writing, and may not work with older versions. The script was written by the author of the library to make it easier to test the library with different combinations of compilers, standards, and platforms using the built-in Visual Studio Code tasks. However, note that this script is not meant to replace CMake or any full-fledged build system, it's just a convenient script for developing single-header libraries like this one or other small projects. The `compile_cpp.py` script also transparently handles C++20 modules and importing the C++ Standard Library as a module in C++23. Therefore, users of this library who wish to import it as a C++20 module may find this script particularly useful. The compilation parameters can be configured using the command line arguments and/or via an optional YAML configuration file `compile_cpp.yaml`. The command line arguments are as follows: * Positional argument(s): the source file(s) to compile. * `-h` or `--help`: Show the help message and exit. * `-a` or `--arch`: The target architecture (MSVC only). Must be one of `[amd64, arm64]`, default is `amd64`. * `-b` or `--clear-output`: Clear the output folder before compiling. If no source files are specified, just clear and exit. The outcome is always an empty output folder. * `-c` or `--compiler`: Which compiler to use. Must be one of `[cl, clang++, g++]`. The default is to determine it automatically based on the platform. * `-d` or `--define`: Macros to define. Use this argument multiple times to define more than one macro. Additional macros can be defined in `compile_cpp.yaml`. * `-e` or `--force`: Force recompilation even if the executable is up to date. * `-f` or `--flag`: Extra compiler flags to add. Use this argument multiple times to add more than one flag. Additional flags can be specified in `compile_cpp.yaml`. * `-g` or `--ignore-config`: Ignore the `compile_cpp.yaml` configuration file, if it exists. * `-i` or `--include`: The include folder to use. Use this argument multiple times to use more than one include folder. Additional include folders can be specified in `compile_cpp.yaml`. * `-l` or `--as-module`: Enable this flag to compile the file as a C++20 module. * `-m` or `--module`: C++20 module files to use if desired, in the format `module_name=module_file,dependencies,...`. Use this argument multiple times to use more than one module. Additional modules can be specified in `compile_cpp.yaml`. The dependencies are only used to determine whether the module needs to be recompiled. * `-n` or `--deps`: Dependencies used to detect if recompilation is needed. If these files are modified, then the executable is recompiled even if the source files have not been modified. Use this argument multiple times to add more than one dependency. Additional dependencies can be specified in `compile_cpp.yaml`. Note that this is not used for C++20 modules, which have their own dependencies, listed when using `-m`. * `-o` or `--output`: The output folder and/or executable name. End with `/` to create the folder if it doesn't already exist. If not specified, the folder defined in `compile_cpp.yaml` will be used. If the executable name is not specified, it will be determined automatically in the format `{source}_[module_]{type}-{compiler}-{standard}` where: * `source` is the name of the first source file (without the extension). * `module_`, if present, indicates that the file is a C++20 module (if `-l`/`--as-module` is enabled) * `type` is one of `[debug, release]`. * `compiler` is one of `[clang, gcc, msvc]`. * `standard` is one of `[cpp17, cpp20, cpp23]`. * `-p` or `--pass`: Pass command line arguments to the compiled program when running it, if `-r`/`--run` is specified. Use this argument multiple times to pass more than one argument to the program. Additional arguments can be specified in `compile_cpp.yaml`. * `-r` or `--run`: Enable this flag to run the program after compiling it. * `-s` or `--std`: Which C++ standard to use. Must be one of `[c++17, c++20, c++23]`. The default is `c++23`. * `-t` or `--type`: Which mode to compile in. Must be one of `[debug, release]`. The default is `debug`. * `-u` or `--std-module`: Specify the path to the standard library module (C++23 only). Taken from `compile_cpp.yaml` if not specified. Use `auto` to auto-detect or `disable` to explicitly disable. * `-v` or `--verbose`: Enable this flag to print the script's diagnostic messages. * `-x` or `--disable-exceptions`: If set to `true`, disables exceptions in the compiler flags. If set to `false`, exceptions will be enabled. If not specified, the setting will be taken from `compile_cpp.yaml`. * `-y` or `--try-all`: Test compilation using all possible combinations of compilers and C++ standards available in the system. Also runs each compiled program if `-r`/`--run` is specified. All other arguments are passed to all compilation attempts. Cannot be used together with `-c`/`--compiler` or `-s`/`--std`. The `compile_cpp.yaml` file includes the following fields: * `defines`: A list of macros to define when compiling the source files. * `deps`: A list of dependencies, such as header files or libraries. All source files compiled using this script will be recompiled if any of these files change. * `disable_exceptions`: Whether to disable exceptions in the compiler flags. Defaults to `false` if not specified. * `flags`: A map of flags to pass to each compiler. The compiler should be one of `[cl, clang++, g++]`. The flags should be a list of strings. * `includes`: A list of include folders. * `modules`: A map of C++20 modules in the format `module_name: [module_path, dependencies, ...]`. Will only be used in C++20 or C++23 mode. The dependencies are only used to determine whether the module needs to be recompiled. * `output`: The output folder for the compiled files. * `pass_args`: A list of arguments to pass to the program if running it after compilation. * `std_module`: A map of paths to the standard library modules for each OS and compiler combination (C++23 only). The OS should be one of `[Windows, Linux, Darwin]`. Use `auto` to determine the path automatically if possible. Please see the `compile_cpp.yaml` file in the GitHub repository for an example of how to use it. By default, the script prints colored output using ANSI escape codes for better readability. This can be disabled by setting the `NO_COLOR` environment variable. ### Visual Studio Code tasks For Visual Studio Code users, the GitHub repository includes three `.vscode` folders: * `.vscode-windows`, to be used in Windows with Clang, GCC, and MSVC. * `.vscode-linux`, to be used in Linux with Clang and GCC. * `.vscode-macos`, to be used in macOS with LLVM Clang (not Apple Clang). Each folder contains appropriate `c_cpp_properties.json`, `launch.json`, and `tasks.json` files that utilize the included Python script [`compile_cpp.py`](#the-compile_cpppy-script). Users are welcome to use these files in their own projects, but they may require some modifications to work on specific systems. ## About the project ### Bug reports and feature requests This library is under continuous and active development. If you encounter any bugs, or if you would like to request any additional features, please feel free to [open a new issue on GitHub](https://github.com/bshoshany/thread-pool/issues) and I will look into it as soon as I can. ### Contribution and pull request policy Contributions are always welcome. However, I release my projects in cumulative updates after editing and testing them locally on my system, so **my policy is to never accept any pull requests**. If you open a pull request, and I decide to incorporate your suggestion into the project, I will first modify your code to comply with the project's coding conventions (formatting, syntax, naming, comments, programming practices, etc.), and perform some tests to ensure that the change doesn't break anything. I will then merge it into the next release of the project, possibly together with some other changes. The new release will also include a note in `CHANGELOG.md` with a link to your pull request, and modifications to the documentation in `README.md` as needed. ### Starring the repository If you found this project useful, please consider [starring it on GitHub](https://github.com/bshoshany/thread-pool/stargazers)! This allows me to see how many people are using my code, and motivates me to keep working to improve it. ### Acknowledgements Many GitHub users have helped improve this project, directly or indirectly, via issues, pull requests, comments, and/or personal correspondence. Please see `CHANGELOG.md` for links to specific issues and pull requests that have been the most helpful. Thank you all for your contribution! 😊 ### Copyright and citing Copyright (c) 2021-2026 [Barak Shoshany](https://baraksh.com/). Licensed under the [MIT license](https://github.com/bshoshany/thread-pool/blob/master/LICENSE.txt). If you use this library in software of any kind, please provide a link to [the GitHub repository](https://github.com/bshoshany/thread-pool) in the source code and documentation. If you use this library in published research, please cite it as follows: * Barak Shoshany, *"A C++17 Thread Pool for High-Performance Scientific Computing"*, [doi:10.1016/j.softx.2024.101687](https://doi.org/10.1016/j.softx.2024.101687), [SoftwareX 26 (2024) 101687](https://www.sciencedirect.com/science/article/pii/S235271102400058X), [arXiv:2105.00613](https://arxiv.org/abs/2105.00613) You can use the following BibTeX entry: ```bibtex @article{Shoshany2024_ThreadPool, archiveprefix = {arXiv}, author = {Barak Shoshany}, doi = {10.1016/j.softx.2024.101687}, eprint = {2105.00613}, journal = {SoftwareX}, pages = {101687}, title = {{A C++17 Thread Pool for High-Performance Scientific Computing}}, url = {https://www.sciencedirect.com/science/article/pii/S235271102400058X}, volume = {26}, year = {2024} } ``` Please note that the papers on [SoftwareX](https://www.sciencedirect.com/science/article/pii/S235271102400058X) and [arXiv](https://arxiv.org/abs/2105.00613) are not up to date with the latest version of the library. These publications are only intended to facilitate discovery of this library by scientists, and to enable citing it in scientific research. Documentation for the latest version is provided only by the `README.md` file in [the GitHub repository](https://github.com/bshoshany/thread-pool). ### About the author My name is Barak Shoshany and I am a theoretical, mathematical, and computational physicist. I work as an Assistant Professor of Physics at Brock University in Ontario, Canada, and I am also a Sessional Lecturer at McMaster University. My research focuses on the nature of time and causality in general relativity and quantum mechanics, as well as symbolic and high-performance scientific computing. For more about me, please see [my personal website](https://baraksh.com/). ### Learning more about C++ Beginner C++ programmers may be interested in [my lecture notes](https://baraksh.com/CSE701/notes/) for a graduate-level course taught at McMaster University, which teach modern C and C++ from scratch, including some of the advanced techniques and programming practices used in developing this library. I have been teaching this course every year since 2020, and the notes are continuously updated and improved based on student feedback. ### Other projects to check out If you are a physicist or astronomer, you may be interested in my project [OGRe](https://github.com/bshoshany/OGRe): An Object-Oriented General Relativity Package for Mathematica, or its Python port [OGRePy](https://github.com/bshoshany/OGRePy): An Object-Oriented General Relativity Package for Python. thread-pool-5.1.0/compile_cpp.yaml000066400000000000000000000036101512633616700171230ustar00rootroot00000000000000# A list of macros to define when compiling the source files. defines: [BS_THREAD_POOL_TEST_IMPORT_MODULE, BS_THREAD_POOL_IMPORT_STD, BS_THREAD_POOL_NATIVE_EXTENSIONS] # A list of dependencies, such as header files or libraries. All source files compiled using this script will be recompiled if any of these files change. (Note that this is not used for C++20 modules, which have their own dependencies, listed in the modules map.) deps: [include/BS_thread_pool.hpp] # Whether to disable exceptions. disable_exceptions: false # A map of flags to pass to each compiler. The compiler should be one of [cl, clang++, g++]. The flags should be a list of strings. flags: cl: [/W4, /D_CRT_SECURE_NO_WARNINGS] clang++: [-Wall, -Wextra, -Wconversion, -Wsign-conversion, -Wpedantic, -Wshadow, -Weffc++, -Wdocumentation, -march=native, -fcolor-diagnostics, -fansi-escape-codes, -stdlib=libc++] g++: [-Wall, -Wextra, -Wconversion, -Wsign-conversion, -Wpedantic, -Wshadow, -Wuseless-cast, -march=native, -fdiagnostics-color=always, -Wnrvo] # A list of include folders. includes: [include] # A map of C++20 modules in the format "module_name: [module_path, dependencies, ...]". Will only be used in C++20 or C++23 mode. The dependencies are any files that the module depends on, and are only used to determine whether the module needs to be recompiled. modules: BS.thread_pool: [modules/BS.thread_pool.cppm, include/BS_thread_pool.hpp] # The output folder for the compiled files. output: build/ # A list of arguments to pass to the program if running it after compilation. pass_args: [] # A map of paths to the standard library modules for each OS and compiler combination (C++23 only). The OS should be one of [Darwin, Linux, Windows]. Use "auto" to determine the path automatically if possible. std_module: Darwin: clang++: auto Linux: clang++: auto g++: auto Windows: cl: auto clang++: auto g++: auto thread-pool-5.1.0/include/000077500000000000000000000000001512633616700153705ustar00rootroot00000000000000thread-pool-5.1.0/include/BS_thread_pool.hpp000066400000000000000000003607231512633616700210000ustar00rootroot00000000000000/** * ██████ ███████ ████████ ██ ██ ██████ ███████ █████ ██████ ██████ ██████ ██████ ██ * ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ * ██████ ███████ ██ ███████ ██████ █████ ███████ ██ ██ ██████ ██ ██ ██ ██ ██ * ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ * ██████ ███████ ██ ██ ██ ██ ██ ███████ ██ ██ ██████ ███████ ██ ██████ ██████ ███████ * * @file BS_thread_pool.hpp * @author Barak Shoshany (baraksh@gmail.com) (https://baraksh.com/) * @version 5.1.0 * @date 2026-01-03 * @copyright Copyright (c) 2021-2026 Barak Shoshany. Licensed under the MIT license. If you found this project useful, please consider starring it on GitHub! If you use this library in software of any kind, please provide a link to the GitHub repository https://github.com/bshoshany/thread-pool in the source code and documentation. If you use this library in published research, please cite it as follows: Barak Shoshany, "A C++17 Thread Pool for High-Performance Scientific Computing", doi:10.1016/j.softx.2024.101687, SoftwareX 26 (2024) 101687, arXiv:2105.00613 * * @brief `BS::thread_pool`: a fast, lightweight, modern, and easy-to-use C++17/C++20/C++23 thread pool library. This header file contains the entire library, and is the only file needed to use the library. */ #ifndef BS_THREAD_POOL_HPP #define BS_THREAD_POOL_HPP // We need to include since if we're using `import std` it will not define any feature-test macros. #ifdef __has_include #if __has_include() #include // NOLINT(misc-include-cleaner) #endif #endif // At the time of this release, there is a bug in Clang with libc++ where using `std::jthread` in a C++20 module causes a compilation error. As a workaround, until the bug is fixed, the thread pool library automatically falls back to `std::thread` if it detects that Clang and libc++ are being used together with C++20 modules. This workaround can be disabled by defining `BS_THREAD_POOL_DISABLE_WORKAROUNDS` when compiling the module. TODO: Remove this workaround when the bug is fixed. #if defined(__clang__) && defined(_LIBCPP_VERSION) && defined(BS_THREAD_POOL_MODULE) && (__cplusplus >= 202002L) && !defined(BS_THREAD_POOL_DISABLE_WORKAROUNDS) #ifdef __cpp_lib_jthread #undef __cpp_lib_jthread #endif #endif // At the time of this release, there is a bug when using GCC with libstdc++ on Windows via MSYS2 where the `BS.thread_pool` module doesn't compile if both native extensions and `import std` are enabled. As a workaround, until the bug is fixed, the thread pool library automatically falls back to header files if it detects that GCC and libstdc++ are being used together with the C++23 `std` module on Windows. This workaround can be disabled by defining `BS_THREAD_POOL_DISABLE_WORKAROUNDS` when compiling the module. TODO: Remove this workaround when the bug is fixed. #if (defined(__GNUC__) && defined(_GLIBCXX_RELEASE) && defined(_WIN32)) && !defined(BS_THREAD_POOL_DISABLE_WORKAROUNDS) #ifdef BS_THREAD_POOL_IMPORT_STD #undef BS_THREAD_POOL_IMPORT_STD #endif #endif // In GCC with libstdc++ on Linux, loading the system headers after `import std` causes compilation errors, so we load them first. #ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS #if defined(_WIN32) #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef NOMINMAX #define NOMINMAX #endif #include #elif defined(__linux__) || defined(__APPLE__) #include #include #include #include #if defined(__linux__) #include #include #endif #else #undef BS_THREAD_POOL_NATIVE_EXTENSIONS #endif #endif // If the macro `BS_THREAD_POOL_IMPORT_STD` is defined, import the C++ Standard Library as a module. Otherwise, include the relevant Standard Library header files. #if defined(BS_THREAD_POOL_IMPORT_STD) && (__cplusplus >= 202004L) // Only allow importing the `std` module if the library itself is imported as a module. If the library is included as a header file, this will force the program that included the header file to also import `std`, which is not desirable and can lead to compilation errors if the program `#include`s any Standard Library header files. #ifdef BS_THREAD_POOL_MODULE import std; #else #error "The thread pool library cannot import the C++ Standard Library as a module using `import std` if the library itself is not imported as a module. Either use `import BS.thread_pool` to import the library, or remove the `BS_THREAD_POOL_IMPORT_STD` macro. Aborting compilation." #endif #else #undef BS_THREAD_POOL_IMPORT_STD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __cpp_concepts #include #endif #ifdef __cpp_exceptions #include #include #endif #ifdef __cpp_impl_three_way_comparison #include #endif #ifdef __cpp_lib_int_pow2 #include #endif #ifdef __cpp_lib_jthread #include #endif #endif // On Linux, defines macros called `major` and `minor`, which we undefine here to prevent conflicts. #ifdef major #undef major #endif #ifdef minor #undef minor #endif // On Windows, defines macros called `min` and `max`, which we undefine here to prevent conflicts. #ifdef min #undef min #endif #ifdef max #undef max #endif /** * @brief A namespace used by Barak Shoshany's projects. */ namespace BS { // Macros indicating the version of the thread pool library. #define BS_THREAD_POOL_VERSION_MAJOR 5 #define BS_THREAD_POOL_VERSION_MINOR 1 #define BS_THREAD_POOL_VERSION_PATCH 0 /** * @brief A struct used to store a version number, which can be checked and compared at compilation time. */ struct [[nodiscard]] version { constexpr version(const std::uint64_t major_, const std::uint64_t minor_, const std::uint64_t patch_) noexcept : major(major_), minor(minor_), patch(patch_) {} // In C++20 and later we can use the spaceship operator `<=>` to automatically generate comparison operators. In C++17 we have to define them manually. #ifdef __cpp_impl_three_way_comparison std::strong_ordering operator<=>(const version&) const = default; #else [[nodiscard]] constexpr friend bool operator==(const version& lhs, const version& rhs) noexcept { return std::tuple(lhs.major, lhs.minor, lhs.patch) == std::tuple(rhs.major, rhs.minor, rhs.patch); } [[nodiscard]] constexpr friend bool operator!=(const version& lhs, const version& rhs) noexcept { return !(lhs == rhs); } [[nodiscard]] constexpr friend bool operator<(const version& lhs, const version& rhs) noexcept { return std::tuple(lhs.major, lhs.minor, lhs.patch) < std::tuple(rhs.major, rhs.minor, rhs.patch); } [[nodiscard]] constexpr friend bool operator>=(const version& lhs, const version& rhs) noexcept { return !(lhs < rhs); } [[nodiscard]] constexpr friend bool operator>(const version& lhs, const version& rhs) noexcept { return std::tuple(lhs.major, lhs.minor, lhs.patch) > std::tuple(rhs.major, rhs.minor, rhs.patch); } [[nodiscard]] constexpr friend bool operator<=(const version& lhs, const version& rhs) noexcept { return !(lhs > rhs); } #endif [[nodiscard]] std::string to_string() const { return std::to_string(major) + '.' + std::to_string(minor) + '.' + std::to_string(patch); } friend std::ostream& operator<<(std::ostream& stream, const version& ver) { stream << ver.to_string(); return stream; } std::uint64_t major; std::uint64_t minor; std::uint64_t patch; }; // struct version /** * @brief The version of the thread pool library. */ inline constexpr version thread_pool_version(BS_THREAD_POOL_VERSION_MAJOR, BS_THREAD_POOL_VERSION_MINOR, BS_THREAD_POOL_VERSION_PATCH); #ifdef BS_THREAD_POOL_MODULE // If the library is being compiled as a module, ensure that the version of the module file matches the version of the header file. static_assert(thread_pool_version == version(BS_THREAD_POOL_MODULE), "The versions of BS.thread_pool.cppm and BS_thread_pool.hpp do not match. Aborting compilation."); /** * @brief A flag indicating whether the thread pool library was compiled as a C++20 module. */ inline constexpr bool thread_pool_module = true; #else /** * @brief A flag indicating whether the thread pool library was compiled as a C++20 module. */ inline constexpr bool thread_pool_module = false; #endif #ifdef BS_THREAD_POOL_IMPORT_STD /** * @brief A flag indicating whether the thread pool library imported the C++23 Standard Library module using `import std`. */ inline constexpr bool thread_pool_import_std = true; #else /** * @brief A flag indicating whether the thread pool library imported the C++23 Standard Library module using `import std`. */ inline constexpr bool thread_pool_import_std = false; #endif #ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS /** * @brief A flag indicating whether the thread pool library's native extensions are enabled. */ inline constexpr bool thread_pool_native_extensions = true; #else /** * @brief A flag indicating whether the thread pool library's native extensions are enabled. */ inline constexpr bool thread_pool_native_extensions = false; #endif /** * @brief The type used for the bitmask template parameter of the thread pool. */ using opt_t = std::uint8_t; /** * @brief An enumeration class of flags to be used in the bitmask template parameter of `BS::thread_pool` to enable optional features. */ enum class tp : opt_t { /** * @brief No optional features enabled. */ none = 0, /** * @brief Enable task priority. */ priority = 1 << 0, /** * @brief Enable pausing. */ pause = 1 << 1, /** * @brief Enable wait deadlock checks. */ wait_deadlock_checks = 1 << 2 }; // NOLINTBEGIN(bugprone-macro-parentheses) #define BS_THREAD_POOL_DEFINE_BITWISE_OPERATOR(ENUM, OP) \ constexpr ENUM operator OP(const ENUM lhs, const ENUM rhs) noexcept \ { \ return static_cast(static_cast>(lhs) OP static_cast>(rhs)); \ } \ constexpr ENUM& operator OP##=(ENUM& lhs, const ENUM rhs) noexcept \ { \ return lhs = lhs OP rhs; \ } // NOLINTEND(bugprone-macro-parentheses) BS_THREAD_POOL_DEFINE_BITWISE_OPERATOR(tp, &) BS_THREAD_POOL_DEFINE_BITWISE_OPERATOR(tp, |) BS_THREAD_POOL_DEFINE_BITWISE_OPERATOR(tp, ^) constexpr tp operator~(const tp value) noexcept { return static_cast(~static_cast>(value)); } template class thread_pool; #ifdef __cpp_lib_move_only_function /** * @brief The template to use to store functions in the task queue and other places. In C++23 and later we use `std::move_only_function`. */ using std::move_only_function; #else template class move_only_function; /** * @brief A simple polyfill for `std::move_only_function`, to be used if C++23 features are not available. Note that it does not have all the features of `std::move_only_function`, only the minimum needed for the thread pool library. * * @tparam R The return type of the function. * @tparam Args The argument types of the function. */ template class move_only_function { public: move_only_function() = default; move_only_function(move_only_function&&) noexcept = default; move_only_function& operator=(move_only_function&&) noexcept = default; move_only_function(const move_only_function&) = delete; move_only_function& operator=(const move_only_function&) = delete; ~move_only_function() = default; template , move_only_function> && std::is_invocable_r_v>> move_only_function(F&& func) : ptr(std::make_unique>>(std::forward(func))) {} // NOLINT(hicpp-explicit-conversions) R operator()(Args... args) { return ptr->call(std::forward(args)...); } private: struct func_concept { virtual ~func_concept() = default; virtual R call(Args... args) = 0; }; template struct func_model final : func_concept { template , func_model>>> explicit func_model(T&& func) : stored_func(std::forward(func)) {} R call(Args... args) override { if constexpr (std::is_void_v) { std::invoke(stored_func, std::forward(args)...); } else { return std::invoke(stored_func, std::forward(args)...); } } F stored_func; }; std::unique_ptr ptr = nullptr; }; #endif /** * @brief The type of tasks in the task queue. */ using task_t = move_only_function; #ifdef __cpp_lib_jthread /** * @brief The type of threads to use. In C++20 and later we use `std::jthread`. */ using thread_t = std::jthread; // The following macros are used to determine how to stop the workers. In C++20 and later we can use `std::stop_token`. #define BS_THREAD_POOL_WORKER_TOKEN const std::stop_token &stop_token, #define BS_THREAD_POOL_WAIT_TOKEN , stop_token #define BS_THREAD_POOL_STOP_CONDITION stop_token.stop_requested() #define BS_THREAD_POOL_OR_STOP_CONDITION #else /** * @brief The type of threads to use. In C++17 we use `std::thread`. */ using thread_t = std::thread; // The following macros are used to determine how to stop the workers. In C++17 we use a manual flag `workers_running`. #define BS_THREAD_POOL_WORKER_TOKEN #define BS_THREAD_POOL_WAIT_TOKEN #define BS_THREAD_POOL_STOP_CONDITION !workers_running #define BS_THREAD_POOL_OR_STOP_CONDITION || !workers_running #endif /** * @brief A type used to indicate the priority of a task. Defined to be a signed integer with a width of exactly 8 bits (-128 to +127). */ using priority_t = std::int8_t; /** * @brief An enum containing some pre-defined priorities for convenience. */ enum pr : priority_t // NOLINT(cppcoreguidelines-use-enum-class) This cannot be an `enum class` because we need the numerical values. { lowest = -128, low = -64, normal = 0, high = +64, highest = +127 }; /** * @brief A helper struct to store a task with an assigned priority. */ struct [[nodiscard]] pr_task { /** * @brief Construct a new task with an assigned priority. * * @param task_ The task. * @param priority_ The desired priority. */ explicit pr_task(task_t&& task_, const priority_t priority_ = 0) noexcept(std::is_nothrow_move_constructible_v) : task(std::move(task_)), priority(priority_) {} /** * @brief Compare the priority of two tasks. * * @param lhs The first task. * @param rhs The second task. * @return `true` if the first task has a lower priority than the second task, `false` otherwise. */ [[nodiscard]] friend bool operator<(const pr_task& lhs, const pr_task& rhs) noexcept { return lhs.priority < rhs.priority; } /** * @brief The task. It is `mutable` so it can be moved out of the `const` reference returned by `std::priority_queue::top()`. */ mutable task_t task; /** * @brief The priority of the task. */ priority_t priority = 0; }; // struct pr_task // In C++20 and later we can use concepts. In C++17 we instead use SFINAE ("Substitution Failure Is Not An Error") with `std::enable_if_t`. #ifdef __cpp_concepts #define BS_THREAD_POOL_IF_PAUSE_ENABLED template requires(P) template concept init_func_c = std::invocable || std::invocable; #define BS_THREAD_POOL_INIT_FUNC_CONCEPT(F) init_func_c F #else #define BS_THREAD_POOL_IF_PAUSE_ENABLED template > #define BS_THREAD_POOL_INIT_FUNC_CONCEPT(F) typename F, typename = std::enable_if_t || std::is_invocable_v> // NOLINT(bugprone-macro-parentheses) #endif /** * @brief A helper class to facilitate waiting for and/or getting the results of multiple futures at once. * * @tparam T The return type of the futures. */ template class [[nodiscard]] multi_future : public std::vector> { public: // Inherit all constructors from the base class `std::vector`. using std::vector>::vector; /** * @brief Get the results from all the futures stored in this `BS::multi_future`, rethrowing any stored exceptions. * * @return If the futures return `void`, this function returns `void` as well. Otherwise, it returns a vector containing the results. */ [[nodiscard]] std::conditional_t, void, std::vector> get() { if constexpr (std::is_void_v) { for (std::future& future : *this) future.get(); return; } else { std::vector results; results.reserve(this->size()); for (std::future& future : *this) results.push_back(future.get()); return results; } } /** * @brief Check how many of the futures stored in this `BS::multi_future` are ready. * * @return The number of ready futures. */ [[nodiscard]] std::size_t ready_count() const { std::size_t count = 0; for (const std::future& future : *this) { if (future.wait_for(std::chrono::duration::zero()) == std::future_status::ready) ++count; } return count; } /** * @brief Check if all the futures stored in this `BS::multi_future` are valid. * * @return `true` if all futures are valid, `false` if at least one of the futures is not valid. */ [[nodiscard]] bool valid() const noexcept { bool is_valid = true; for (const std::future& future : *this) is_valid = is_valid && future.valid(); return is_valid; } /** * @brief Wait for all the futures stored in this `BS::multi_future`. */ void wait() const { for (const std::future& future : *this) future.wait(); } /** * @brief Wait for all the futures stored in this `BS::multi_future`, but stop waiting after the specified duration has passed. This function first waits for the first future for the desired duration. If that future is ready before the duration expires, this function waits for the second future for whatever remains of the duration. It continues similarly until the duration expires. * * @tparam R An arithmetic type representing the number of ticks to wait. * @tparam P An `std::ratio` representing the length of each tick in seconds. * @param duration The amount of time to wait. * @return `true` if all futures have been waited for before the duration expired, `false` otherwise. */ template bool wait_for(const std::chrono::duration& duration) const { const std::chrono::time_point start_time = std::chrono::steady_clock::now(); for (const std::future& future : *this) { future.wait_for(duration - (std::chrono::steady_clock::now() - start_time)); if (duration < std::chrono::steady_clock::now() - start_time) return false; } return true; } /** * @brief Wait for all the futures stored in this `BS::multi_future`, but stop waiting after the specified time point has been reached. This function first waits for the first future until the desired time point. If that future is ready before the time point is reached, this function waits for the second future until the desired time point. It continues similarly until the time point is reached. * * @tparam C The type of the clock used to measure time. * @tparam D An `std::chrono::duration` type used to indicate the time point. * @param timeout_time The time point at which to stop waiting. * @return `true` if all futures have been waited for before the time point was reached, `false` otherwise. */ template bool wait_until(const std::chrono::time_point& timeout_time) const { for (const std::future& future : *this) { future.wait_until(timeout_time); if (timeout_time < C::now()) return false; } return true; } }; // class multi_future /** * @brief A helper class to divide a range into blocks. Used by `detach_blocks()`, `submit_blocks()`, `detach_loop()`, and `submit_loop()`. * * @tparam T The type of the indices. Should be a signed or unsigned integer. */ template class [[nodiscard]] blocks { public: /** * @brief Construct a `blocks` object with the given specifications. * * @param first_index_ The first index in the range. * @param index_after_last_ The index after the last index in the range. * @param num_blocks_ The desired number of blocks to divide the range into. */ blocks(const T first_index_, const T index_after_last_, const std::size_t num_blocks_) noexcept : num_blocks(num_blocks_), first_index(first_index_), index_after_last(index_after_last_) { if (index_after_last > first_index) { const std::size_t total_size = static_cast(index_after_last - first_index); num_blocks = std::min(num_blocks, total_size); block_size = total_size / num_blocks; remainder = total_size % num_blocks; if (block_size == 0) { block_size = 1; num_blocks = (total_size > 1) ? total_size : 1; } } else { num_blocks = 0; } } /** * @brief Get the index after the last index of a block. * * @param block The block number. * @return The index after the last index. */ [[nodiscard]] T end(const std::size_t block) const noexcept { return (block == num_blocks - 1) ? index_after_last : start(block + 1); } /** * @brief Get the number of blocks. Note that this may be different than the desired number of blocks that was passed to the constructor. * * @return The number of blocks. */ [[nodiscard]] std::size_t get_num_blocks() const noexcept { return num_blocks; } /** * @brief Get the first index of a block. * * @param block The block number. * @return The first index. */ [[nodiscard]] T start(const std::size_t block) const noexcept { return first_index + static_cast(block * block_size) + static_cast(block < remainder ? block : remainder); } private: /** * @brief The size of each block (except possibly the last block). */ std::size_t block_size = 0; /** * @brief The number of blocks. */ std::size_t num_blocks = 0; /** * @brief The remainder obtained after dividing the total size by the number of blocks. */ std::size_t remainder = 0; /** * @brief The first index in the range. */ T first_index = 0; /** * @brief The index after the last index in the range. */ T index_after_last = 0; }; // class blocks /** * @brief A function object class used by `detach_blocks()` and `submit_blocks()` to execute a block function over a specified range of indices. * * @tparam T The type of the indices. * @tparam F The type of the function. * @tparam R The return type of the function (can be `void`). */ template struct block_task { R operator()() { return (*block_ptr)(start, end); } std::shared_ptr> block_ptr; T start; T end; }; // struct block_task /** * @brief A function object class used by `detach_loop()` and `submit_loop()` to execute a loop function over a specified range of indices. * * @tparam T The type of the indices. * @tparam F The type of the function. */ template struct loop_task { void operator()() { for (T i = start; i < end; ++i) (*loop_ptr)(i); } std::shared_ptr> loop_ptr; T start; T end; }; // struct loop_task /** * @brief A function object class used by `detach_sequence()` and `submit_sequence()` to execute a sequence function over a specified index. * * @tparam T The type of the index. * @tparam F The type of the function. * @tparam R The return type of the function (can be `void`). */ template struct sequence_task { R operator()() { return (*sequence_ptr)(i); } std::shared_ptr> sequence_ptr; T i; }; // struct sequence_task /** * @brief A class that takes a function with a return value (but no arguments), and constructs a task with no return value along with a future used to retrieve the function's return value once the task is executed. Used by `submit_task()` and `submit_bulk()`. * * @tparam R The return type of the function (can be `void`). */ template struct task_and_future { template , task_and_future>>> explicit task_and_future(F&& func) { std::promise promise; future = promise.get_future(); task = [task = std::forward(func), promise = std::move(promise)]() mutable { #ifdef __cpp_exceptions try { #endif if constexpr (std::is_void_v) { task(); promise.set_value(); } else { promise.set_value(task()); } #ifdef __cpp_exceptions } catch (...) { try { promise.set_exception(std::current_exception()); } catch (...) { } } #endif }; } std::future future; task_t task; }; // struct task_and_future #ifdef __cpp_exceptions /** * @brief An exception that will be thrown by `wait()`, `wait_for()`, and `wait_until()` if the user tries to call them from within a thread of the same pool, which would result in a deadlock. Only used if the flag `BS::tp::wait_deadlock_checks` is enabled in the template parameter of `BS::thread_pool`. */ struct [[nodiscard]] wait_deadlock : public std::runtime_error { wait_deadlock() : std::runtime_error("BS::wait_deadlock") {}; }; #endif #ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS #if defined(_WIN32) /** * @brief An enum containing pre-defined OS-specific process priority values for portability. */ enum class os_process_priority { idle = IDLE_PRIORITY_CLASS, below_normal = BELOW_NORMAL_PRIORITY_CLASS, normal = NORMAL_PRIORITY_CLASS, above_normal = ABOVE_NORMAL_PRIORITY_CLASS, high = HIGH_PRIORITY_CLASS, realtime = REALTIME_PRIORITY_CLASS }; /** * @brief An enum containing pre-defined OS-specific thread priority values for portability. */ enum class os_thread_priority { idle = THREAD_PRIORITY_IDLE, lowest = THREAD_PRIORITY_LOWEST, below_normal = THREAD_PRIORITY_BELOW_NORMAL, normal = THREAD_PRIORITY_NORMAL, above_normal = THREAD_PRIORITY_ABOVE_NORMAL, highest = THREAD_PRIORITY_HIGHEST, realtime = THREAD_PRIORITY_TIME_CRITICAL }; #elif defined(__linux__) || defined(__APPLE__) /** * @brief An enum containing pre-defined OS-specific process priority values for portability. */ enum class os_process_priority { idle = PRIO_MAX - 2, below_normal = PRIO_MAX / 2, normal = 0, above_normal = PRIO_MIN / 3, high = PRIO_MIN * 2 / 3, realtime = PRIO_MIN }; /** * @brief An enum containing pre-defined OS-specific thread priority values for portability. */ enum class os_thread_priority { idle, lowest, below_normal, normal, above_normal, highest, realtime }; #endif /** * @brief Get the processor affinity of the current process using the current platform's native API. This should work on Windows and Linux, but is not possible on macOS as the native API does not allow it. * * @return An `std::optional` object, optionally containing the processor affinity of the current process as an `std::vector` where each element corresponds to a logical processor. If the returned object does not contain a value, then the affinity could not be determined. On macOS, this function always returns `std::nullopt`. */ [[nodiscard]] inline std::optional> get_os_process_affinity() { #if defined(_WIN32) DWORD_PTR process_mask = 0; DWORD_PTR system_mask = 0; if (GetProcessAffinityMask(GetCurrentProcess(), &process_mask, &system_mask) == 0) return std::nullopt; #ifdef __cpp_lib_int_pow2 const std::size_t num_cpus = static_cast(std::bit_width(system_mask)); #else std::size_t num_cpus = 0; if (system_mask != 0) { num_cpus = 1; while ((system_mask >>= 1U) != 0U) ++num_cpus; } #endif std::vector affinity(num_cpus); for (std::size_t i = 0; i < num_cpus; ++i) affinity[i] = ((process_mask & (1ULL << i)) != 0ULL); return affinity; #elif defined(__linux__) cpu_set_t cpu_set; CPU_ZERO(&cpu_set); if (sched_getaffinity(getpid(), sizeof(cpu_set_t), &cpu_set) != 0) return std::nullopt; const int num_cpus = get_nprocs(); if (num_cpus < 1) return std::nullopt; std::vector affinity(static_cast(num_cpus)); for (std::size_t i = 0; i < affinity.size(); ++i) affinity[i] = CPU_ISSET(i, &cpu_set); return affinity; #elif defined(__APPLE__) return std::nullopt; #endif } /** * @brief Set the processor affinity of the current process using the current platform's native API. This should work on Windows and Linux, but is not possible on macOS as the native API does not allow it. * * @param affinity The processor affinity to set, as an `std::vector` where each element corresponds to a logical processor. * @return `true` if the affinity was set successfully, `false` otherwise. On macOS, this function always returns `false`. */ inline bool set_os_process_affinity([[maybe_unused]] const std::vector& affinity) { #if defined(_WIN32) DWORD_PTR process_mask = 0; for (std::size_t i = 0; i < std::min(affinity.size(), sizeof(DWORD_PTR) * 8); ++i) process_mask |= (affinity[i] ? (1ULL << i) : 0ULL); return SetProcessAffinityMask(GetCurrentProcess(), process_mask) != 0; #elif defined(__linux__) cpu_set_t cpu_set; CPU_ZERO(&cpu_set); for (std::size_t i = 0; i < std::min(affinity.size(), CPU_SETSIZE); ++i) { if (affinity[i]) CPU_SET(i, &cpu_set); } return sched_setaffinity(getpid(), sizeof(cpu_set_t), &cpu_set) == 0; #elif defined(__APPLE__) return false; #endif } /** * @brief Get the priority of the current process using the current platform's native API. This should work on Windows, Linux, and macOS. * * @return An `std::optional` object, optionally containing the priority of the current process, as a member of the enum `BS::os_process_priority`. If the returned object does not contain a value, then either the priority could not be determined, or it is not one of the pre-defined values and therefore cannot be represented in a portable way. */ [[nodiscard]] inline std::optional get_os_process_priority() { #if defined(_WIN32) // On Windows, this is straightforward. const DWORD priority = GetPriorityClass(GetCurrentProcess()); if (priority == 0) return std::nullopt; return static_cast(priority); #elif defined(__linux__) || defined(__APPLE__) // On Linux/macOS there is no direct analogue of `GetPriorityClass()` on Windows, so instead we get the "nice" value. The usual range is -20 to 19 or 20, with higher values corresponding to lower priorities. However, we are only using 6 pre-defined values for portability, so if the value was set via any means other than `BS::set_os_process_priority()`, it may not match one of our pre-defined values. Note that `getpriority()` returns -1 on error, but since this does not correspond to any of our pre-defined values, this function will return `std::nullopt` anyway. const int nice_val = getpriority(PRIO_PROCESS, static_cast(getpid())); switch (nice_val) { case static_cast(os_process_priority::idle): return os_process_priority::idle; case static_cast(os_process_priority::below_normal): return os_process_priority::below_normal; case static_cast(os_process_priority::normal): return os_process_priority::normal; case static_cast(os_process_priority::above_normal): return os_process_priority::above_normal; case static_cast(os_process_priority::high): return os_process_priority::high; case static_cast(os_process_priority::realtime): return os_process_priority::realtime; default: return std::nullopt; } #endif } /** * @brief Set the priority of the current process using the current platform's native API. This should work on Windows, Linux, and macOS. However, note that higher priorities might require elevated permissions. * * @param priority The priority to set. Must be a value from the enum `BS::os_process_priority`. * @return `true` if the priority was set successfully, `false` otherwise. Usually, `false` means that the user does not have the necessary permissions to set the desired priority. */ inline bool set_os_process_priority(const os_process_priority priority) { #if defined(_WIN32) // On Windows, this is straightforward. return SetPriorityClass(GetCurrentProcess(), static_cast(priority)) != 0; #elif defined(__linux__) || defined(__APPLE__) // On Linux/macOS there is no direct analogue of `SetPriorityClass()` on Windows, so instead we set the "nice" value. The usual range is -20 to 19 or 20, with higher values corresponding to lower priorities. However, we are only using 6 pre-defined values for portability. Note that the "nice" values are only relevant for the `SCHED_OTHER` policy, but we do not set that policy here, as it is per-thread rather than per-process. // Also, it's important to note that a non-root user cannot decrease the nice value (i.e. increase the process priority), only increase it. This can cause confusing behavior. For example, if the current priority is `BS::os_process_priority::normal` and the user sets it to `BS::os_process_priority::idle`, they cannot change it back `BS::os_process_priority::normal`. return setpriority(PRIO_PROCESS, static_cast(getpid()), static_cast(priority)) == 0; #endif } #endif /** * @brief A class used to obtain information about the current thread and, if native extensions are enabled, get/set its priority, affinity, or name. */ class [[nodiscard]] this_thread { template friend class thread_pool; public: /** * @brief Get the index of the current thread. If this thread belongs to a `BS::thread_pool` object, the return value will be an index in the range `[0, N)` where `N == BS::thread_pool::get_thread_count()`. Otherwise, for example if this thread is the main thread or an independent thread not in any pools, `std::nullopt` will be returned. * * @return An `std::optional` object, optionally containing a thread index. */ [[nodiscard]] static std::optional get_index() noexcept { return my_index; } /** * @brief Get a pointer to the thread pool that owns the current thread. If this thread belongs to a `BS::thread_pool` object, the return value will be a `void` pointer to that object. Otherwise, for example if this thread is the main thread or an independent thread not in any pools, `std::nullopt` will be returned. * * @return An `std::optional` object, optionally containing a pointer to a thread pool. Note that this will be a `void` pointer, so it must be cast to the desired instantiation of the `BS::thread_pool` template in order to use any member functions. */ [[nodiscard]] static std::optional get_pool() noexcept { return my_pool; } #ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS /** * @brief Get the processor affinity of the current thread using the current platform's native API. This should work on Windows and Linux, but is not possible on macOS and Android as the native API does not allow it. * * @return An `std::optional` object, optionally containing the processor affinity of the current thread as an `std::vector` where each element corresponds to a logical processor. If the returned object does not contain a value, then the affinity could not be determined. On macOS and Android, this function always returns `std::nullopt`. */ [[nodiscard]] static std::optional> get_os_thread_affinity() { #if defined(_WIN32) // Windows does not have a `GetThreadAffinityMask()` function, but `SetThreadAffinityMask()` returns the previous affinity mask, so we can use that to get the current affinity and then restore it. It's a bit of a hack, but it works. Since the thread affinity must be a subset of the process affinity, we use the process affinity as the temporary value. DWORD_PTR process_mask = 0; DWORD_PTR system_mask = 0; if (GetProcessAffinityMask(GetCurrentProcess(), &process_mask, &system_mask) == 0) return std::nullopt; const DWORD_PTR previous_mask = SetThreadAffinityMask(GetCurrentThread(), process_mask); if (previous_mask == 0) return std::nullopt; SetThreadAffinityMask(GetCurrentThread(), previous_mask); #ifdef __cpp_lib_int_pow2 const std::size_t num_cpus = static_cast(std::bit_width(system_mask)); #else std::size_t num_cpus = 0; if (system_mask != 0) { num_cpus = 1; while ((system_mask >>= 1U) != 0U) ++num_cpus; } #endif std::vector affinity(num_cpus); for (std::size_t i = 0; i < num_cpus; ++i) affinity[i] = ((previous_mask & (1ULL << i)) != 0ULL); return affinity; #elif defined(__linux__) && !defined(__ANDROID__) cpu_set_t cpu_set; CPU_ZERO(&cpu_set); if (pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_set) != 0) return std::nullopt; const int num_cpus = get_nprocs(); if (num_cpus < 1) return std::nullopt; std::vector affinity(static_cast(num_cpus)); for (std::size_t i = 0; i < affinity.size(); ++i) affinity[i] = CPU_ISSET(i, &cpu_set); return affinity; #else return std::nullopt; #endif } /** * @brief Set the processor affinity of the current thread using the current platform's native API. This should work on Windows and Linux, but is not possible on macOS and Android as the native API does not allow it. Note that the thread affinity must be a subset of the process affinity (as obtained using `BS::get_os_process_affinity()`) for the containing process of a thread. * * @param affinity The processor affinity to set, as an `std::vector` where each element corresponds to a logical processor. * @return `true` if the affinity was set successfully, `false` otherwise. On macOS and Android, this function always returns `false`. */ static bool set_os_thread_affinity([[maybe_unused]] const std::vector& affinity) { #if defined(_WIN32) DWORD_PTR thread_mask = 0; for (std::size_t i = 0; i < std::min(affinity.size(), sizeof(DWORD_PTR) * 8); ++i) thread_mask |= (affinity[i] ? (1ULL << i) : 0ULL); return SetThreadAffinityMask(GetCurrentThread(), thread_mask) != 0; #elif defined(__linux__) && !defined(__ANDROID__) cpu_set_t cpu_set; CPU_ZERO(&cpu_set); for (std::size_t i = 0; i < std::min(affinity.size(), CPU_SETSIZE); ++i) { if (affinity[i]) CPU_SET(i, &cpu_set); } return pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_set) == 0; #else return false; #endif } /** * @brief Get the name of the current thread using the current platform's native API. This should work on Windows, Linux, and macOS. * * @return An `std::optional` object, optionally containing the name of the current thread. If the returned object does not contain a value, then the name could not be determined. */ [[nodiscard]] static std::optional get_os_thread_name() { #if defined(_WIN32) // On Windows thread names are wide strings, so we need to convert them to normal strings. PWSTR data = nullptr; const HRESULT hr = GetThreadDescription(GetCurrentThread(), &data); if (FAILED(hr)) return std::nullopt; if (data == nullptr) return std::nullopt; const int size = WideCharToMultiByte(CP_UTF8, 0, data, -1, nullptr, 0, nullptr, nullptr); if (size == 0) { LocalFree(data); return std::nullopt; } std::string name(static_cast(size) - 1, 0); const int result = WideCharToMultiByte(CP_UTF8, 0, data, -1, name.data(), size, nullptr, nullptr); LocalFree(data); if (result == 0) return std::nullopt; return name; #elif defined(__linux__) || defined(__APPLE__) #ifdef __linux__ // On Linux thread names are limited to 16 characters, including the null terminator. constexpr std::size_t buffer_size = 16; #else // On macOS thread names are limited to 64 characters, including the null terminator. constexpr std::size_t buffer_size = 64; #endif char name[buffer_size] = {}; if (pthread_getname_np(pthread_self(), name, buffer_size) != 0) return std::nullopt; return std::string(name); #endif } /** * @brief Set the name of the current thread using the current platform's native API. This should work on Windows, Linux, and macOS. Note that on Linux thread names are limited to 16 characters, including the null terminator. * * @param name The name to set. * @return `true` if the name was set successfully, `false` otherwise. */ static bool set_os_thread_name(const std::string& name) { #if defined(_WIN32) // On Windows thread names are wide strings, so we need to convert them from normal strings. const int size = MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, nullptr, 0); if (size == 0) return false; std::wstring wide(static_cast(size), 0); if (MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, wide.data(), size) == 0) return false; const HRESULT hr = SetThreadDescription(GetCurrentThread(), wide.data()); return SUCCEEDED(hr); #elif defined(__linux__) // On Linux this is straightforward. return pthread_setname_np(pthread_self(), name.data()) == 0; #elif defined(__APPLE__) // On macOS, unlike Linux, a thread can only set a name for itself, so the signature is different. return pthread_setname_np(name.data()) == 0; #endif } /** * @brief Get the priority of the current thread using the current platform's native API. This should work on Windows, Linux, and macOS. * * @return An `std::optional` object, optionally containing the priority of the current thread, as a member of the enum `BS::os_thread_priority`. If the returned object does not contain a value, then either the priority could not be determined, or it is not one of the pre-defined values. */ [[nodiscard]] static std::optional get_os_thread_priority() { #if defined(_WIN32) // On Windows, this is straightforward. const int priority = GetThreadPriority(GetCurrentThread()); if (priority == THREAD_PRIORITY_ERROR_RETURN) return std::nullopt; return static_cast(priority); #elif defined(__linux__) // On Linux, we distill the choices of scheduling policy, priority, and "nice" value into 7 pre-defined levels, for simplicity and portability. The total number of possible combinations of policies and priorities is much larger, so if the value was set via any means other than `BS::this_thread::set_os_thread_priority()`, it may not match one of our pre-defined values. int policy = 0; struct sched_param param = {}; if (pthread_getschedparam(pthread_self(), &policy, ¶m) != 0) return std::nullopt; if (policy == SCHED_FIFO && param.sched_priority == sched_get_priority_max(SCHED_FIFO)) { // The only pre-defined priority that uses SCHED_FIFO and the maximum available priority value is the "realtime" priority. return os_thread_priority::realtime; } if (policy == SCHED_RR && param.sched_priority == sched_get_priority_min(SCHED_RR) + ((sched_get_priority_max(SCHED_RR) - sched_get_priority_min(SCHED_RR)) / 2)) { // The only pre-defined priority that uses SCHED_RR and a priority in the middle of the available range is the "highest" priority. return os_thread_priority::highest; } #ifdef __linux__ if (policy == SCHED_IDLE) { // The only pre-defined priority that uses SCHED_IDLE is the "idle" priority. Note that this scheduling policy is not available on macOS. return os_thread_priority::idle; } #endif if (policy == SCHED_OTHER) { // For SCHED_OTHER, the result depends on the "nice" value. The usual range is -20 to 19 or 20, with higher values corresponding to lower priorities. Note that `getpriority()` returns -1 on error, but since this does not correspond to any of our pre-defined values, this function will return `std::nullopt` anyway. const int nice_val = getpriority(PRIO_PROCESS, static_cast(syscall(SYS_gettid))); switch (nice_val) { case PRIO_MIN + 2: return os_thread_priority::above_normal; case 0: return os_thread_priority::normal; case (PRIO_MAX / 2) + (PRIO_MAX % 2): return os_thread_priority::below_normal; case PRIO_MAX - 3: return os_thread_priority::lowest; #ifdef __APPLE__ // `SCHED_IDLE` doesn't exist on macOS, so we use the policy `SCHED_OTHER` with a "nice" value of `PRIO_MAX - 2`. case PRIO_MAX - 2: return os_thread_priority::idle; #endif default: return std::nullopt; } } return std::nullopt; #elif defined(__APPLE__) // On macOS, we distill the choices of scheduling policy and priority into 7 pre-defined levels, for simplicity and portability. The total number of possible combinations of policies and priorities is much larger, so if the value was set via any means other than `BS::this_thread::set_os_thread_priority()`, it may not match one of our pre-defined values. int policy = 0; struct sched_param param = {}; if (pthread_getschedparam(pthread_self(), &policy, ¶m) != 0) return std::nullopt; if (policy == SCHED_FIFO && param.sched_priority == sched_get_priority_max(SCHED_FIFO)) { // The only pre-defined priority that uses SCHED_FIFO and the maximum available priority value is the "realtime" priority. return os_thread_priority::realtime; } if (policy == SCHED_RR && param.sched_priority == sched_get_priority_min(SCHED_RR) + (sched_get_priority_max(SCHED_RR) - sched_get_priority_min(SCHED_RR)) / 2) { // The only pre-defined priority that uses SCHED_RR and a priority in the middle of the available range is the "highest" priority. return os_thread_priority::highest; } if (policy == SCHED_OTHER) { // For SCHED_OTHER, the result depends on the specific value of the priority. if (param.sched_priority == sched_get_priority_max(SCHED_OTHER)) return os_thread_priority::above_normal; if (param.sched_priority == sched_get_priority_min(SCHED_OTHER) + (sched_get_priority_max(SCHED_OTHER) - sched_get_priority_min(SCHED_OTHER)) / 2) return os_thread_priority::normal; if (param.sched_priority == sched_get_priority_min(SCHED_OTHER) + (sched_get_priority_max(SCHED_OTHER) - sched_get_priority_min(SCHED_OTHER)) * 2 / 3) return os_thread_priority::below_normal; if (param.sched_priority == sched_get_priority_min(SCHED_OTHER) + (sched_get_priority_max(SCHED_OTHER) - sched_get_priority_min(SCHED_OTHER)) / 3) return os_thread_priority::lowest; if (param.sched_priority == sched_get_priority_min(SCHED_OTHER)) return os_thread_priority::idle; return std::nullopt; } return std::nullopt; #endif } /** * @brief Set the priority of the current thread using the current platform's native API. This should work on Windows, Linux, and macOS. However, note that higher priorities might require elevated permissions. * * @param priority The priority to set. Must be a value from the enum `BS::os_thread_priority`. * @return `true` if the priority was set successfully, `false` otherwise. Usually, `false` means that the user does not have the necessary permissions to set the desired priority. */ static bool set_os_thread_priority(const os_thread_priority priority) { #if defined(_WIN32) // On Windows, this is straightforward. return SetThreadPriority(GetCurrentThread(), static_cast(priority)) != 0; #elif defined(__linux__) // On Linux, we distill the choices of scheduling policy, priority, and "nice" value into 7 pre-defined levels, for simplicity and portability. The total number of possible combinations of policies and priorities is much larger, but allowing more fine-grained control would not be portable. int policy = 0; struct sched_param param = {}; std::optional nice_val = std::nullopt; switch (priority) { case os_thread_priority::realtime: // "Realtime" pre-defined priority: We use the policy `SCHED_FIFO` with the highest possible priority. policy = SCHED_FIFO; param.sched_priority = sched_get_priority_max(SCHED_FIFO); break; case os_thread_priority::highest: // "Highest" pre-defined priority: We use the policy `SCHED_RR` ("round-robin") with a priority in the middle of the available range. policy = SCHED_RR; param.sched_priority = sched_get_priority_min(SCHED_RR) + ((sched_get_priority_max(SCHED_RR) - sched_get_priority_min(SCHED_RR)) / 2); break; case os_thread_priority::above_normal: // "Above normal" pre-defined priority: We use the policy `SCHED_OTHER` (the default). This policy does not accept a priority value, so priority must be 0. However, we set the "nice" value to the minimum value as given by `PRIO_MIN`, plus 2 (which should evaluate to -18). The usual range is -20 to 19 or 20, with higher values corresponding to lower priorities. policy = SCHED_OTHER; param.sched_priority = 0; nice_val = PRIO_MIN + 2; break; case os_thread_priority::normal: // "Normal" pre-defined priority: We use the policy `SCHED_OTHER`, priority must be 0, and we set the "nice" value to 0 (the default). policy = SCHED_OTHER; param.sched_priority = 0; nice_val = 0; break; case os_thread_priority::below_normal: // "Below normal" pre-defined priority: We use the policy `SCHED_OTHER`, priority must be 0, and we set the "nice" value to half the maximum value as given by `PRIO_MAX`, rounded up (which should evaluate to 10). policy = SCHED_OTHER; param.sched_priority = 0; nice_val = (PRIO_MAX / 2) + (PRIO_MAX % 2); break; case os_thread_priority::lowest: // "Lowest" pre-defined priority: We use the policy `SCHED_OTHER`, priority must be 0, and we set the "nice" value to the maximum value as given by `PRIO_MAX`, minus 3 (which should evaluate to 17). policy = SCHED_OTHER; param.sched_priority = 0; nice_val = PRIO_MAX - 3; break; case os_thread_priority::idle: // "Idle" pre-defined priority on Linux: We use the policy `SCHED_IDLE`, priority must be 0, and we don't touch the "nice" value. policy = SCHED_IDLE; param.sched_priority = 0; break; default: return false; } bool success = (pthread_setschedparam(pthread_self(), policy, ¶m) == 0); if (nice_val.has_value()) success = success && (setpriority(PRIO_PROCESS, static_cast(syscall(SYS_gettid)), nice_val.value()) == 0); return success; #elif defined(__APPLE__) // On macOS, unlike Linux, the "nice" value is per-process, not per-thread (in compliance with the POSIX standard). However, unlike Linux, `SCHED_OTHER` on macOS does have a range of priorities. So for `realtime` and `highest` priorities we use `SCHED_FIFO` and `SCHED_RR` respectively as for Linux, but for the other priorities we use `SCHED_OTHER` with a priority in the range given by `sched_get_priority_min(SCHED_OTHER)` to `sched_get_priority_max(SCHED_OTHER)`. int policy = 0; struct sched_param param = {}; switch (priority) { case os_thread_priority::realtime: // "Realtime" pre-defined priority: We use the policy `SCHED_FIFO` with the highest possible priority. policy = SCHED_FIFO; param.sched_priority = sched_get_priority_max(SCHED_FIFO); break; case os_thread_priority::highest: // "Highest" pre-defined priority: We use the policy `SCHED_RR` ("round-robin") with a priority in the middle of the available range. policy = SCHED_RR; param.sched_priority = sched_get_priority_min(SCHED_RR) + (sched_get_priority_max(SCHED_RR) - sched_get_priority_min(SCHED_RR)) / 2; break; case os_thread_priority::above_normal: // "Above normal" pre-defined priority: We use the policy `SCHED_OTHER` (the default) with the highest possible priority. policy = SCHED_OTHER; param.sched_priority = sched_get_priority_max(SCHED_OTHER); break; case os_thread_priority::normal: // "Normal" pre-defined priority: We use the policy `SCHED_OTHER` (the default) with a priority in the middle of the available range (which appears to be the default?). policy = SCHED_OTHER; param.sched_priority = sched_get_priority_min(SCHED_OTHER) + (sched_get_priority_max(SCHED_OTHER) - sched_get_priority_min(SCHED_OTHER)) / 2; break; case os_thread_priority::below_normal: // "Below normal" pre-defined priority: We use the policy `SCHED_OTHER` (the default) with a priority equal to 2/3rds of the normal value. policy = SCHED_OTHER; param.sched_priority = sched_get_priority_min(SCHED_OTHER) + (sched_get_priority_max(SCHED_OTHER) - sched_get_priority_min(SCHED_OTHER)) * 2 / 3; break; case os_thread_priority::lowest: // "Lowest" pre-defined priority: We use the policy `SCHED_OTHER` (the default) with a priority equal to 1/3rd of the normal value. policy = SCHED_OTHER; param.sched_priority = sched_get_priority_min(SCHED_OTHER) + (sched_get_priority_max(SCHED_OTHER) - sched_get_priority_min(SCHED_OTHER)) / 3; break; case os_thread_priority::idle: // "Idle" pre-defined priority on macOS: We use the policy `SCHED_OTHER` (the default) with the lowest possible priority. policy = SCHED_OTHER; param.sched_priority = sched_get_priority_min(SCHED_OTHER); break; default: return false; } return pthread_setschedparam(pthread_self(), policy, ¶m) == 0; #endif } #endif private: inline static thread_local std::optional my_index = std::nullopt; inline static thread_local std::optional my_pool = std::nullopt; }; // class this_thread /** * @brief A meta-programming template to determine the common type of two integer types. Unlike `std::common_type`, this template maintains correct signedness. * * @tparam T1 The first type. * @tparam T2 The second type. * @tparam Enable A dummy parameter to enable SFINAE in specializations. */ template struct common_index_type { // Fallback to `std::common_type_t` if no specialization matches. using type = std::common_type_t; }; // The common type of two signed integers is the larger of the integers, with the same signedness. template struct common_index_type && std::is_signed_v>> { using type = std::conditional_t<(sizeof(T1) >= sizeof(T2)), T1, T2>; }; // The common type of two unsigned integers is the larger of the integers, with the same signedness. template struct common_index_type && std::is_unsigned_v>> { using type = std::conditional_t<(sizeof(T1) >= sizeof(T2)), T1, T2>; }; // The common type of a signed and an unsigned integer is a signed integer that can hold the full ranges of both integers. template struct common_index_type && std::is_unsigned_v) || (std::is_unsigned_v && std::is_signed_v)>> { using S = std::conditional_t, T1, T2>; using U = std::conditional_t, T1, T2>; static constexpr std::size_t larger_size = (sizeof(S) > sizeof(U)) ? sizeof(S) : sizeof(U); using type = std::conditional_t>, // If the unsigned integer is 64 bits, the common type should also be an unsigned 64-bit integer, that is, `std::uint64_t`. The reason is that the most common scenario where this might happen is where the indices go from 0 to `x` where `x` has been previously defined as `std::size_t`, e.g. the size of a vector. Note that this will fail if the first index is negative; in that case, the user must cast the indices explicitly to the desired common type. If the unsigned integer is not 64 bits, then the signed integer must be 64 bits, hence the common type is `std::int64_t`. std::conditional_t>; }; /** * @brief A helper type alias to obtain the common type from the template `BS::common_index_type`. * * @tparam T1 The first type. * @tparam T2 The second type. */ template using common_index_type_t = typename common_index_type::type; /** * @brief A fast, lightweight, modern, and easy-to-use C++17/C++20/C++23 thread pool class. This alias defines a thread pool with all optional features disabled. */ using light_thread_pool = thread_pool; /** * @brief A fast, lightweight, modern, and easy-to-use C++17/C++20/C++23 thread pool class. This alias defines a thread pool with task priority enabled. */ using priority_thread_pool = thread_pool; /** * @brief A fast, lightweight, modern, and easy-to-use C++17/C++20/C++23 thread pool class. This alias defines a thread pool with pausing enabled. */ using pause_thread_pool = thread_pool; /** * @brief A fast, lightweight, modern, and easy-to-use C++17/C++20/C++23 thread pool class. This alias defines a thread pool with wait deadlock checks enabled. */ using wdc_thread_pool = thread_pool; /** * @brief A fast, lightweight, modern, and easy-to-use C++17/C++20/C++23 thread pool class. * * @tparam OptFlags A bitmask of flags which can be used to enable optional features. The flags are members of the `BS::tp` enumeration: `BS::tp::priority`, `BS::tp::pause`, and `BS::tp::wait_deadlock_checks`. The default is `BS::tp::none`, which disables all optional features. To enable multiple features, use the bitwise OR operator `|`, e.g. `BS::tp::priority | BS::tp::pause`. */ template class [[nodiscard]] thread_pool { public: /** * @brief A flag indicating whether task priority is enabled. */ static constexpr bool priority_enabled = (OptFlags & tp::priority) != tp::none; /** * @brief A flag indicating whether pausing is enabled. */ static constexpr bool pause_enabled = (OptFlags & tp::pause) != tp::none; /** * @brief A flag indicating whether wait deadlock checks are enabled. */ static constexpr bool wait_deadlock_checks_enabled = (OptFlags & tp::wait_deadlock_checks) != tp::none; #ifndef __cpp_exceptions static_assert(!wait_deadlock_checks_enabled, "Wait deadlock checks cannot be enabled if exception handling is disabled."); #endif // ============================ // Constructors and destructors // ============================ /** * @brief Construct a new thread pool. The number of threads will be the total number of hardware threads available, as reported by the implementation. This is usually determined by the number of cores in the CPU. If a core is hyperthreaded, it will count as two threads. If the native extensions are enabled, the pool will instead use the number of threads available to the process, as obtained from `BS::get_os_process_affinity()`, which can be less than the number of hardware threads. */ thread_pool() : thread_pool(0, [] {}) {} /** * @brief Construct a new thread pool with the specified number of threads. * * @param num_threads The number of threads to use. */ explicit thread_pool(const std::size_t num_threads) : thread_pool(num_threads, [] {}) {} /** * @brief Construct a new thread pool with the specified initialization function and the default number of threads. * * @param init An initialization function to run in each thread before it starts executing any submitted tasks. The function must have no return value, and can either take one argument, the thread index of type `std::size_t`, or zero arguments. It will be executed exactly once per thread, when the thread is first constructed. The initialization function must not throw any exceptions, as that will result in program termination. Any exceptions must be handled explicitly within the function. */ template explicit thread_pool(F&& init) : thread_pool(0, std::forward(init)) {} /** * @brief Construct a new thread pool with the specified number of threads and initialization function. * * @param num_threads The number of threads to use. * @param init An initialization function to run in each thread before it starts executing any submitted tasks. The function must have no return value, and can either take one argument, the thread index of type `std::size_t`, or zero arguments. It will be executed exactly once per thread, when the thread is first constructed. The initialization function must not throw any exceptions, as that will result in program termination. Any exceptions must be handled explicitly within the function. */ template thread_pool(const std::size_t num_threads, F&& init) { create_threads(num_threads, std::forward(init)); } // The copy and move constructors and assignment operators are deleted. The thread pool cannot be copied or moved. thread_pool(const thread_pool&) = delete; thread_pool(thread_pool&&) = delete; thread_pool& operator=(const thread_pool&) = delete; thread_pool& operator=(thread_pool&&) = delete; /** * @brief Destruct the thread pool. Waits for all tasks to complete, then destroys all threads. If a cleanup function was set, it will run in each thread right before it is destroyed. Note that if the pool is paused, then any tasks still in the queue will never be executed. */ ~thread_pool() noexcept { #ifdef __cpp_exceptions try { #endif wait(); #ifndef __cpp_lib_jthread destroy_threads(); #endif #ifdef __cpp_exceptions } catch (...) { } #endif } // ======================= // Public member functions // ======================= /** * @brief Parallelize a loop by automatically splitting it into blocks and submitting each block separately to the queue, with the specified priority. The block function takes two arguments, the start and end of the block, so that it is only called once per block, but it is up to the user to make sure the block function correctly deals with all the indices in each block. Does not return a `BS::multi_future`, so the user must use `wait()` or some other method to ensure that the loop finishes executing, otherwise bad things will happen. * * @tparam T1 The type of the first index. Should be a signed or unsigned integer. * @tparam T2 The type of the index after the last index. Should be a signed or unsigned integer. * @tparam T The common type of the indices, as determined by `BS::common_index_type_t`. * @tparam F The type of the block function. * @param first_index The first index in the loop. * @param index_after_last The index after the last index in the loop. The loop will iterate from `first_index` to `(index_after_last - 1)` inclusive. In other words, it will be equivalent to `for (T i = first_index; i < index_after_last; ++i)`. Note that if `index_after_last <= first_index`, no tasks will be submitted. * @param block A function that will be called once per block. Should take exactly two arguments: the first index in the block and the index after the last index in the block. `block(start, end)` should typically involve a loop of the form `for (T i = start; i < end; ++i)`. Must not return a value. * @param num_blocks The maximum number of blocks to split the loop into. The default is 0, which means the number of blocks will be equal to the number of threads in the pool. * @param priority The priority of the tasks. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. */ template , typename F> void detach_blocks(const T1 first_index, const T2 index_after_last, F&& block, const std::size_t num_blocks = 0, const priority_t priority = 0) { enqueue_blocks(static_cast(first_index), static_cast(index_after_last), std::forward(block), num_blocks, priority); } /** * @brief Submit an iterator range containing functions with no arguments and no return values into the task queue, with the specified priority. To submit functions with arguments, enclose them in lambda expressions. Does not return a `BS::multi_future`, so the user must use `wait()` or some other method to ensure that the loop finishes executing, otherwise bad things will happen. * * @tparam I The type of the iterators. * @param first An iterator to the first function. * @param last An iterator to one past the last function. * @param priority The priority of the tasks. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. */ template void detach_bulk(const I first, const I last, const priority_t priority = 0) { if (first != last) { bool notify = false; { const std::scoped_lock tasks_lock(tasks_mutex); if constexpr (pause_enabled) notify = tasks.empty() && !paused; else notify = tasks.empty(); for (I it = first; it != last; ++it) { if constexpr (priority_enabled) tasks.emplace(std::move(*it), priority); else tasks.emplace(std::move(*it)); } } if (notify) task_available_cv.notify_all(); } } /** * @brief Submit a container of functions with no arguments and no return values into the task queue, with the specified priority. To submit functions with arguments, enclose them in lambda expressions. Does not return a `BS::multi_future`, so the user must use `wait()` or some other method to ensure that the loop finishes executing, otherwise bad things will happen. * * @tparam C The type of the container. Must either be an array or have `begin()` and `end()` member functions. * @param container The container. * @param priority The priority of the tasks. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. */ template void detach_bulk(C& container, const priority_t priority = 0) { detach_bulk(std::begin(container), std::end(container), priority); } /** * @brief Parallelize a loop by automatically splitting it into blocks and submitting each block separately to the queue, with the specified priority. The loop function takes one argument, the loop index, and it is called exactly once per index, but many times per block. Does not return a `BS::multi_future`, so the user must use `wait()` or some other method to ensure that the loop finishes executing, otherwise bad things will happen. * * @tparam T1 The type of the first index. Should be a signed or unsigned integer. * @tparam T2 The type of the index after the last index. Should be a signed or unsigned integer. * @tparam T The common type of the indices, as determined by `BS::common_index_type_t`. * @tparam F The type of the loop function. * @param first_index The first index in the loop. * @param index_after_last The index after the last index in the loop. The loop will iterate from `first_index` to `(index_after_last - 1)` inclusive. In other words, it will be equivalent to `for (T i = first_index; i < index_after_last; ++i)`. Note that if `index_after_last <= first_index`, no tasks will be submitted. * @param loop A function that will be called once per index, many times per block. Should take exactly one argument: the loop index. Must not return a value. * @param num_blocks The maximum number of blocks to split the loop into. The default is 0, which means the number of blocks will be equal to the number of threads in the pool. * @param priority The priority of the tasks. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. */ template , typename F> void detach_loop(const T1 first_index, const T2 index_after_last, F&& loop, const std::size_t num_blocks = 0, const priority_t priority = 0) { enqueue_loop(static_cast(first_index), static_cast(index_after_last), std::forward(loop), num_blocks, priority); } /** * @brief Submit a sequence of tasks enumerated by indices to the queue, with the specified priority. The sequence function takes one argument, the task index, and will be called once per index. Does not return a `BS::multi_future`, so the user must use `wait()` or some other method to ensure that the sequence finishes executing, otherwise bad things will happen. * * @tparam T1 The type of the first index. Should be a signed or unsigned integer. * @tparam T2 The type of the index after the last index. Should be a signed or unsigned integer. * @tparam T The common type of the indices, as determined by `BS::common_index_type_t`. * @tparam F The type of the sequence function. * @param first_index The first index in the sequence. * @param index_after_last The index after the last index in the sequence. The sequence will iterate from `first_index` to `(index_after_last - 1)` inclusive. In other words, it will be equivalent to `for (T i = first_index; i < index_after_last; ++i)`. Note that if `index_after_last <= first_index`, no tasks will be submitted. * @param sequence A function that will be called once per index. Should take exactly one argument, the index. Must not return a value. * @param priority The priority of the tasks. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. */ template , typename F> void detach_sequence(const T1 first_index, const T2 index_after_last, F&& sequence, const priority_t priority = 0) { return enqueue_sequence(static_cast(first_index), static_cast(index_after_last), std::forward(sequence), priority); } /** * @brief Submit a function with no arguments and no return value into the task queue, with the specified priority. To submit a function with arguments, enclose it in a lambda expression. Does not return a future, so the user must use `wait()` or some other method to ensure that the task finishes executing, otherwise bad things will happen. * * @tparam F The type of the function. * @param task The function to submit. * @param priority The priority of the task. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. */ template void detach_task(F&& task, const priority_t priority = 0) { { const std::scoped_lock tasks_lock(tasks_mutex); if constexpr (priority_enabled) tasks.emplace(std::forward(task), priority); else tasks.emplace(std::forward(task)); } task_available_cv.notify_one(); } #ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS /** * @brief Get a vector containing the underlying implementation-defined thread handles for each of the pool's threads, as obtained by `std::thread::native_handle()` (or `std::jthread::native_handle()` in C++20 and later). * * @return The native thread handles. */ [[nodiscard]] std::vector get_native_handles() const { std::vector native_handles(thread_count); for (std::size_t i = 0; i < thread_count; ++i) native_handles[i] = threads[i].native_handle(); return native_handles; } #endif /** * @brief Get the number of tasks currently waiting in the queue to be executed by the threads. * * @return The number of queued tasks. */ [[nodiscard]] std::size_t get_tasks_queued() const { const std::scoped_lock tasks_lock(tasks_mutex); return tasks.size(); } /** * @brief Get the number of tasks currently being executed by the threads. * * @return The number of running tasks. */ [[nodiscard]] std::size_t get_tasks_running() const { const std::scoped_lock tasks_lock(tasks_mutex); return tasks_running; } /** * @brief Get the total number of unfinished tasks: either still waiting in the queue, or running in a thread. Note that `get_tasks_total() == get_tasks_queued() + get_tasks_running()`. * * @return The total number of tasks. */ [[nodiscard]] std::size_t get_tasks_total() const { const std::scoped_lock tasks_lock(tasks_mutex); return tasks_running + tasks.size(); } /** * @brief Get the number of threads in the pool. * * @return The number of threads. */ [[nodiscard]] std::size_t get_thread_count() const noexcept { return thread_count; } /** * @brief Get a vector containing the unique identifiers for each of the pool's threads, as obtained by `std::thread::get_id()` (or `std::jthread::get_id()` in C++20 and later). * * @return The unique thread identifiers. */ [[nodiscard]] std::vector get_thread_ids() const { std::vector thread_ids(thread_count); for (std::size_t i = 0; i < thread_count; ++i) thread_ids[i] = threads[i].get_id(); return thread_ids; } /** * @brief Check whether the pool is currently paused. Only enabled if the flag `BS::tp::pause` is enabled in the template parameter. * * @return `true` if the pool is paused, `false` if it is not paused. */ BS_THREAD_POOL_IF_PAUSE_ENABLED [[nodiscard]] bool is_paused() const { const std::scoped_lock tasks_lock(tasks_mutex); return paused; } /** * @brief Pause the pool. The workers will temporarily stop retrieving new tasks out of the queue, although any tasks already executing will keep running until they are finished. Only enabled if the flag `BS::tp::pause` is enabled in the template parameter. */ BS_THREAD_POOL_IF_PAUSE_ENABLED void pause() { const std::scoped_lock tasks_lock(tasks_mutex); paused = true; } /** * @brief Purge all the tasks waiting in the queue. Tasks that are currently running will not be affected, but any tasks still waiting in the queue will be discarded, and will never be executed by the threads. Please note that there is no way to restore the purged tasks. */ void purge() { const std::scoped_lock tasks_lock(tasks_mutex); tasks = {}; } /** * @brief Reset the pool with the default number of threads (as if constructed with the default constructor). Waits for all tasks to be completed, both running and queued, then destroys the thread pool and creates a new one with an empty task queue. If pausing is enabled, only waits for tasks that are currently running before destroying the pool; once the pool is reset, it will then resume executing the tasks that remained in the queue and any newly submitted tasks. If the pool was paused before resetting it, the new pool will be paused as well. */ void reset() { reset(0, [](std::size_t) {}); } /** * @brief Reset the pool with a new number of threads. Waits for all tasks to be completed, both running and queued, then destroys the thread pool and creates a new one with an empty task queue. If pausing is enabled, only waits for tasks that are currently running before destroying the pool; once the pool is reset, it will then resume executing the tasks that remained in the queue and any newly submitted tasks. If the pool was paused before resetting it, the new pool will be paused as well. * * @param num_threads The number of threads to use. */ void reset(const std::size_t num_threads) { reset(num_threads, [](std::size_t) {}); } /** * @brief Reset the pool with the default number of threads and a new initialization function. Waits for all tasks to be completed, both running and queued, then destroys the thread pool and creates a new one with an empty task queue. If pausing is enabled, only waits for tasks that are currently running before destroying the pool; once the pool is reset, it will then resume executing the tasks that remained in the queue and any newly submitted tasks. If the pool was paused before resetting it, the new pool will be paused as well. * * @param init An initialization function to run in each thread before it starts executing any submitted tasks. The function must have no return value, and can either take one argument, the thread index of type `std::size_t`, or zero arguments. It will be executed exactly once per thread, when the thread is first constructed. The initialization function must not throw any exceptions, as that will result in program termination. Any exceptions must be handled explicitly within the function. */ template void reset(F&& init) { reset(0, std::forward(init)); } /** * @brief Reset the pool with a new number of threads and a new initialization function. Waits for all tasks to be completed, both running and queued, then destroys the thread pool and creates a new one with an empty task queue. If pausing is enabled, only waits for tasks that are currently running before destroying the pool; once the pool is reset, it will then resume executing the tasks that remained in the queue and any newly submitted tasks. If the pool was paused before resetting it, the new pool will be paused as well. * * @param num_threads The number of threads to use. * @param init An initialization function to run in each thread before it starts executing any submitted tasks. The function must have no return value, and can either take one argument, the thread index of type `std::size_t`, or zero arguments. It will be executed exactly once per thread, when the thread is first constructed. The initialization function must not throw any exceptions, as that will result in program termination. Any exceptions must be handled explicitly within the function. */ template void reset(const std::size_t num_threads, F&& init) { if constexpr (pause_enabled) { std::unique_lock tasks_lock(tasks_mutex); const bool was_paused = paused; paused = true; tasks_lock.unlock(); reset_pool(num_threads, std::forward(init)); tasks_lock.lock(); paused = was_paused; tasks_lock.unlock(); if (!was_paused) task_available_cv.notify_all(); } else { reset_pool(num_threads, std::forward(init)); } } /** * @brief Set the thread pool's cleanup function. * * @param cleanup A cleanup function to run in each thread right before it is destroyed, which will happen when the pool is destructed or reset. The function must have no return value, and can either take one argument, the thread index of type `std::size_t`, or zero arguments. The cleanup function must not throw any exceptions, as that will result in program termination. Any exceptions must be handled explicitly within the function. */ template void set_cleanup_func(F&& cleanup) { if constexpr (std::is_invocable_v) { cleanup_func = std::forward(cleanup); } else { cleanup_func = [cleanup = std::forward(cleanup)](std::size_t) { cleanup(); }; } } /** * @brief Parallelize a loop by automatically splitting it into blocks and submitting each block separately to the queue, with the specified priority. The block function takes two arguments, the start and end of the block, so that it is only called once per block, but it is up to the user to make sure the block function correctly deals with all the indices in each block. If the block function has a return value, get a `BS::multi_future` for the eventual returned values. If the block function has no return value, get a `BS::multi_future` which can be used to wait until all the tasks finish. * * @tparam T1 The type of the first index. Should be a signed or unsigned integer. * @tparam T2 The type of the index after the last index. Should be a signed or unsigned integer. * @tparam T The common type of the indices, as determined by `BS::common_index_type_t`. * @tparam F The type of the block function. * @tparam R The return type of the block function (can be `void`). * @param first_index The first index in the loop. * @param index_after_last The index after the last index in the loop. The loop will iterate from `first_index` to `(index_after_last - 1)` inclusive. In other words, it will be equivalent to `for (T i = first_index; i < index_after_last; ++i)`. Note that if `index_after_last <= first_index`, no tasks will be submitted, and an empty `BS::multi_future` will be returned. * @param block A function that will be called once per block. Should take exactly two arguments: the first index in the block and the index after the last index in the block. `block(start, end)` should typically involve a loop of the form `for (T i = start; i < end; ++i)`. Can return a value. * @param num_blocks The maximum number of blocks to split the loop into. The default is 0, which means the number of blocks will be equal to the number of threads in the pool. * @param priority The priority of the tasks. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. * @return A `BS::multi_future` that can be used to wait for all the tasks to finish. If the block function returns a value, the `BS::multi_future` can also be used to obtain the values returned by each block. */ template , typename F, typename R = std::invoke_result_t, T, T>> [[nodiscard]] multi_future submit_blocks(const T1 first_index, const T2 index_after_last, F&& block, const std::size_t num_blocks = 0, const priority_t priority = 0) { return enqueue_blocks(static_cast(first_index), static_cast(index_after_last), std::forward(block), num_blocks, priority); } /** * @brief Submit an iterator range containing functions with no arguments into the task queue, with the specified priority. To submit functions with arguments, enclose them in lambda expressions. If the functions have return values, get a `BS::multi_future` for the eventual returned values. If the functions have no return values, get a `BS::multi_future` which can be used to wait until all the tasks finish. * * @tparam I The type of the iterators. * @tparam F The type of the functions. * @tparam R The return type of the functions (can be `void`, but must be the same for all the functions). * @param first An iterator to the first function. * @param last An iterator to one past the last function. * @param priority The priority of the tasks. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. * @return A `BS::multi_future` that can be used to wait for all the tasks to finish. If the functions return values, the `BS::multi_future` can also be used to obtain the values returned by each task. */ template ()), typename R = std::invoke_result_t>> [[nodiscard]] multi_future submit_bulk(const I first, const I last, const priority_t priority = 0) { if (first != last) { const std::size_t num_tasks = static_cast(std::distance(first, last)); multi_future all_futures; all_futures.reserve(num_tasks); std::vector all_tasks; all_tasks.reserve(num_tasks); for (I it = first; it != last; ++it) { task_and_future ft(std::move(*it)); all_futures.emplace_back(std::move(ft.future)); all_tasks.emplace_back(std::move(ft.task)); } detach_bulk(all_tasks, priority); return all_futures; } return {}; } /** * @brief Submit a container of functions with no arguments into the task queue, with the specified priority. To submit functions with arguments, enclose them in lambda expressions. If the functions have return values, get a `BS::multi_future` for the eventual returned values. If the functions have no return values, get a `BS::multi_future` which can be used to wait until all the tasks finish. * * @tparam C The type of the container. Must either be an array or have `begin()` and `end()` member functions. * @tparam F The type of the functions. * @tparam R The return type of the functions (can be `void`, but must be the same for all the functions). * @param container The container. * @param priority The priority of the tasks. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. * @return A `BS::multi_future` that can be used to wait for all the tasks to finish. If the functions return values, the `BS::multi_future` can also be used to obtain the values returned by each task. */ template ().begin()), typename R = std::invoke_result_t>> [[nodiscard]] multi_future submit_bulk(C& container, const priority_t priority = 0) { return submit_bulk(std::begin(container), std::end(container), priority); } /** * @brief Parallelize a loop by automatically splitting it into blocks and submitting each block separately to the queue, with the specified priority. The loop function takes one argument, the loop index, and it is called exactly once per index, but many times per block. Returns a `BS::multi_future` which can be used to wait until all the tasks finish. * * @tparam T1 The type of the first index. Should be a signed or unsigned integer. * @tparam T2 The type of the index after the last index. Should be a signed or unsigned integer. * @tparam T The common type of the indices, as determined by `BS::common_index_type_t`. * @tparam F The type of the loop function. * @param first_index The first index in the loop. * @param index_after_last The index after the last index in the loop. The loop will iterate from `first_index` to `(index_after_last - 1)` inclusive. In other words, it will be equivalent to `for (T i = first_index; i < index_after_last; ++i)`. Note that if `index_after_last <= first_index`, no tasks will be submitted, and an empty `BS::multi_future` will be returned. * @param loop A function that will be called once per index, many times per block. Should take exactly one argument: the loop index. Must not return a value. * @param num_blocks The maximum number of blocks to split the loop into. The default is 0, which means the number of blocks will be equal to the number of threads in the pool. * @param priority The priority of the tasks. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. * @return A `BS::multi_future` that can be used to wait for all the tasks to finish. */ template , typename F> [[nodiscard]] multi_future submit_loop(const T1 first_index, const T2 index_after_last, F&& loop, const std::size_t num_blocks = 0, const priority_t priority = 0) { return enqueue_loop(static_cast(first_index), static_cast(index_after_last), std::forward(loop), num_blocks, priority); } /** * @brief Submit a sequence of tasks enumerated by indices to the queue, with the specified priority. The sequence function takes one argument, the task index, and will be called once per index. If the sequence function has a return value, get a `BS::multi_future` for the eventual returned values. If the sequence function has no return value, get a `BS::multi_future` which can be used to wait until all the tasks finish. * * @tparam T1 The type of the first index. Should be a signed or unsigned integer. * @tparam T2 The type of the index after the last index. Should be a signed or unsigned integer. * @tparam T The common type of the indices, as determined by `BS::common_index_type_t`. * @tparam F The type of the sequence function. * @tparam R The return type of the sequence function (can be `void`). * @param first_index The first index in the sequence. * @param index_after_last The index after the last index in the sequence. The sequence will iterate from `first_index` to `(index_after_last - 1)` inclusive. In other words, it will be equivalent to `for (T i = first_index; i < index_after_last; ++i)`. Note that if `index_after_last <= first_index`, no tasks will be submitted, and an empty `BS::multi_future` will be returned. * @param sequence A function that will be called once per index. Should take exactly one argument, the index. Can return a value. * @param priority The priority of the tasks. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. * @return A `BS::multi_future` that can be used to wait for all the tasks to finish. If the sequence function returns a value, the `BS::multi_future` can also be used to obtain the values returned by each task. */ template , typename F, typename R = std::invoke_result_t, T>> [[nodiscard]] multi_future submit_sequence(const T1 first_index, const T2 index_after_last, F&& sequence, const priority_t priority = 0) { return enqueue_sequence(static_cast(first_index), static_cast(index_after_last), std::forward(sequence), priority); } /** * @brief Submit a function with no arguments into the task queue, with the specified priority. To submit a function with arguments, enclose it in a lambda expression. If the function has a return value, get a future for the eventual returned value. If the function has no return value, get an `std::future` which can be used to wait until the task finishes. * * @tparam F The type of the function. * @tparam R The return type of the function (can be `void`). * @param task The function to submit. * @param priority The priority of the task. Should be between -128 and +127 (a signed 8-bit integer). The default is 0. Only taken into account if the flag `BS::tp::priority` is enabled in the template parameter, otherwise has no effect. * @return A future to be used later to wait for the function to finish executing and/or obtain its returned value if it has one. */ template >> [[nodiscard]] std::future submit_task(F&& task, const priority_t priority = 0) { task_and_future ft(std::forward(task)); detach_task(std::move(ft.task), priority); return std::move(ft.future); } /** * @brief Unpause the pool. The workers will resume retrieving new tasks out of the queue. Only enabled if the flag `BS::tp::pause` is enabled in the template parameter. */ BS_THREAD_POOL_IF_PAUSE_ENABLED void unpause() { { const std::scoped_lock tasks_lock(tasks_mutex); paused = false; } task_available_cv.notify_all(); } /** * @brief Wait for tasks to be completed. Normally, this function waits for all tasks, both those that are currently running in the threads and those that are still waiting in the queue. However, if the pool is paused, this function only waits for the currently running tasks (otherwise it would wait forever). Note: To wait for just one specific task, use `submit_task()` instead, and call the `wait()` member function of the generated future. * * @throws `wait_deadlock` if called from within a thread of the same pool, which would result in a deadlock. Only enabled if the flag `BS::tp::wait_deadlock_checks` is enabled in the template parameter. */ void wait() { #ifdef __cpp_exceptions if constexpr (wait_deadlock_checks_enabled) { if (this_thread::get_pool() == this) throw wait_deadlock(); } #endif std::unique_lock tasks_lock(tasks_mutex); waiting = true; tasks_done_cv.wait(tasks_lock, [this] { if constexpr (pause_enabled) return (tasks_running == 0) && (paused || tasks.empty()); else return (tasks_running == 0) && tasks.empty(); }); waiting = false; } /** * @brief Wait for tasks to be completed, but stop waiting after the specified duration has passed. * * @tparam R An arithmetic type representing the number of ticks to wait. * @tparam P An `std::ratio` representing the length of each tick in seconds. * @param duration The amount of time to wait. * @return `true` if all tasks finished running, `false` if the duration expired but some tasks are still running. * @throws `wait_deadlock` if called from within a thread of the same pool, which would result in a deadlock. Only enabled if the flag `BS::tp::wait_deadlock_checks` is enabled in the template parameter. */ template bool wait_for(const std::chrono::duration& duration) { #ifdef __cpp_exceptions if constexpr (wait_deadlock_checks_enabled) { if (this_thread::get_pool() == this) throw wait_deadlock(); } #endif std::unique_lock tasks_lock(tasks_mutex); waiting = true; const bool status = tasks_done_cv.wait_for(tasks_lock, duration, [this] { if constexpr (pause_enabled) return (tasks_running == 0) && (paused || tasks.empty()); else return (tasks_running == 0) && tasks.empty(); }); waiting = false; return status; } /** * @brief Wait for tasks to be completed, but stop waiting after the specified time point has been reached. * * @tparam C The type of the clock used to measure time. * @tparam D An `std::chrono::duration` type used to indicate the time point. * @param timeout_time The time point at which to stop waiting. * @return `true` if all tasks finished running, `false` if the time point was reached but some tasks are still running. * @throws `wait_deadlock` if called from within a thread of the same pool, which would result in a deadlock. Only enabled if the flag `BS::tp::wait_deadlock_checks` is enabled in the template parameter. */ template bool wait_until(const std::chrono::time_point& timeout_time) { #ifdef __cpp_exceptions if constexpr (wait_deadlock_checks_enabled) { if (this_thread::get_pool() == this) throw wait_deadlock(); } #endif std::unique_lock tasks_lock(tasks_mutex); waiting = true; const bool status = tasks_done_cv.wait_until(tasks_lock, timeout_time, [this] { if constexpr (pause_enabled) return (tasks_running == 0) && (paused || tasks.empty()); else return (tasks_running == 0) && tasks.empty(); }); waiting = false; return status; } private: // ======================== // Private member functions // ======================== /** * @brief Create the threads in the pool and assign a worker to each thread. * * @param num_threads The number of threads to use. * @param init An initialization function to run in each thread before it starts executing any submitted tasks. */ template void create_threads(const std::size_t num_threads, F&& init) { if constexpr (std::is_invocable_v) { init_func = std::forward(init); } else { init_func = [init = std::forward(init)](std::size_t) { init(); }; } thread_count = determine_thread_count(num_threads); threads = std::make_unique(thread_count); { const std::scoped_lock tasks_lock(tasks_mutex); tasks_running = thread_count; #ifndef __cpp_lib_jthread workers_running = true; #endif } for (std::size_t i = 0; i < thread_count; ++i) { threads[i] = thread_t( [this, i] #ifdef __cpp_lib_jthread (const std::stop_token& stop_token) { worker(stop_token, i); } #else { worker(i); } #endif ); } } #ifndef __cpp_lib_jthread /** * @brief Destroy the threads in the pool. */ void destroy_threads() { { const std::scoped_lock tasks_lock(tasks_mutex); workers_running = false; } task_available_cv.notify_all(); for (std::size_t i = 0; i < thread_count; ++i) threads[i].join(); } #endif /** * @brief Determine how many threads the pool should have, based on the parameter passed to the constructor or reset(). * * @param num_threads The parameter passed to the constructor or `reset()`. If the parameter is a positive number, then the pool will be created with this number of threads. If the parameter is zero, or a parameter was not supplied (in which case it will have the default value of 0), then the pool will be created with the total number of hardware threads available, as obtained from `thread_t::hardware_concurrency()`. If the latter returns zero for some reason, then the pool will be created with just one thread. If the native extensions are enabled, the pool will instead use the number of threads available to the process, as obtained from `BS::get_os_process_affinity()`, which can be less than the number of hardware threads. */ [[nodiscard]] static std::size_t determine_thread_count(const std::size_t num_threads) noexcept(!thread_pool_native_extensions) { if (num_threads > 0) return num_threads; #ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS const std::optional> affinity = BS::get_os_process_affinity(); if (affinity.has_value()) { const std::size_t affinity_thread_count = static_cast(std::count(affinity->begin(), affinity->end(), true)); return (affinity_thread_count > 0) ? affinity_thread_count : 1; } #endif if (thread_t::hardware_concurrency() > 0) return thread_t::hardware_concurrency(); return 1; } /** * @brief A helper function for `detach_blocks()` and `submit_blocks()`. * * @tparam T The type of the indices. * @tparam F The type of the block function. * @tparam R The return type of the block function (can be `void`). * @tparam submit `true` if called from `submit_blocks()`, `false` if called from `detach_blocks()`. * @tparam N The return type of this helper function. * @param first_index The first index in the loop. * @param index_after_last The index after the last index in the loop. * @param block A function that will be called once per block. * @param num_blocks The maximum number of blocks to split the loop into. * @param priority The priority of the tasks. * @return A `BS::multi_future` if `submit` is `true`, or `void` if `submit` is `false`. */ template , void>> [[nodiscard]] N enqueue_blocks(const T first_index, const T index_after_last, F&& block, std::size_t num_blocks, const priority_t priority = 0) { if (index_after_last > first_index) { using block_task_t = block_task; const std::shared_ptr> block_ptr = std::make_shared>(std::forward(block)); const blocks blks(first_index, index_after_last, num_blocks ? num_blocks : thread_count); num_blocks = blks.get_num_blocks(); std::vector> all_tasks; all_tasks.reserve(num_blocks); for (std::size_t i = 0; i < num_blocks; ++i) all_tasks.emplace_back(block_task_t{block_ptr, blks.start(i), blks.end(i)}); if constexpr (submit) return submit_bulk(all_tasks, priority); else detach_bulk(all_tasks, priority); } return N(); } /** * @brief A helper function for `detach_loop()` and `submit_loop()`. * * @tparam T The type of the indices. * @tparam F The type of the loop function. * @tparam submit `true` if called from `submit_loop()`, `false` if called from `detach_loop()`. * @tparam N The return type of this helper function. * @param first_index The first index in the loop. * @param index_after_last The index after the last index in the loop. * @param loop A function that will be called once per index, many times per block. * @param num_blocks The maximum number of blocks to split the loop into. * @param priority The priority of the tasks. * @return A `BS::multi_future` if `submit` is `true`, or `void` if `submit` is `false`. */ template , void>> [[nodiscard]] N enqueue_loop(const T first_index, const T index_after_last, F&& loop, std::size_t num_blocks, const priority_t priority = 0) { if (index_after_last > first_index) { using loop_task_t = loop_task; const std::shared_ptr> loop_ptr = std::make_shared>(std::forward(loop)); const blocks blks(first_index, index_after_last, num_blocks ? num_blocks : thread_count); num_blocks = blks.get_num_blocks(); std::vector> all_tasks; all_tasks.reserve(num_blocks); for (std::size_t i = 0; i < num_blocks; ++i) all_tasks.emplace_back(loop_task_t{loop_ptr, blks.start(i), blks.end(i)}); if constexpr (submit) return submit_bulk(all_tasks, priority); else detach_bulk(all_tasks, priority); } return N(); } /** * @brief A helper function for `detach_sequence()` and `submit_sequence()`. * * @tparam T The type of the indices. * @tparam F The type of the sequence function. * @tparam R The return type of the sequence function (can be `void`). * @tparam submit `true` if called from `submit_sequence()`, `false` if called from `detach_sequence()`. * @tparam N The return type of this helper function. * @param first_index The first index in the sequence. * @param index_after_last The index after the last index in the sequence. * @param sequence A function that will be called once per index. * @param priority The priority of the tasks. * @return A `BS::multi_future` if `submit` is `true`, or `void` if `submit` is `false`. */ template , void>> [[nodiscard]] N enqueue_sequence(const T first_index, const T index_after_last, F&& sequence, const priority_t priority = 0) { if (index_after_last > first_index) { using sequence_task_t = sequence_task; const std::shared_ptr> sequence_ptr = std::make_shared>(std::forward(sequence)); std::vector> all_tasks; all_tasks.reserve(static_cast(index_after_last - first_index)); for (T i = first_index; i < index_after_last; ++i) all_tasks.emplace_back(sequence_task_t{sequence_ptr, i}); if constexpr (submit) return submit_bulk(all_tasks, priority); else detach_bulk(all_tasks, priority); } return N(); } /** * @brief Pop a task from the queue. * * @return The task. */ [[nodiscard]] task_t pop_task() { task_t task; if constexpr (priority_enabled) task = std::move(tasks.top().task); else task = std::move(tasks.front()); tasks.pop(); return task; } /** * @brief Reset the pool with a new number of threads and a new initialization function. This member function implements the actual reset, while the public member function `reset()` also handles the case where the pool is paused. * * @param num_threads The number of threads to use. * @param init An initialization function to run in each thread before it starts executing any submitted tasks. */ template void reset_pool(const std::size_t num_threads, F&& init) { wait(); #ifndef __cpp_lib_jthread destroy_threads(); #endif create_threads(num_threads, std::forward(init)); } /** * @brief A worker function to be assigned to each thread in the pool. Waits until it is notified by `detach_task()` that a task is available, and then retrieves the task from the queue and executes it. Once the task finishes, the worker notifies `wait()` in case it is waiting. * * @param idx The index of this thread. */ void worker(BS_THREAD_POOL_WORKER_TOKEN const std::size_t idx) { this_thread::my_pool = this; this_thread::my_index = idx; init_func(idx); while (true) { std::unique_lock tasks_lock(tasks_mutex); --tasks_running; if constexpr (pause_enabled) { if (waiting && (tasks_running == 0) && (paused || tasks.empty())) tasks_done_cv.notify_all(); } else { if (waiting && (tasks_running == 0) && tasks.empty()) tasks_done_cv.notify_all(); } task_available_cv.wait(tasks_lock BS_THREAD_POOL_WAIT_TOKEN, [this] { if constexpr (pause_enabled) return !(paused || tasks.empty()) BS_THREAD_POOL_OR_STOP_CONDITION; else return !tasks.empty() BS_THREAD_POOL_OR_STOP_CONDITION; }); if (BS_THREAD_POOL_STOP_CONDITION) break; { task_t task = pop_task(); // NOLINT(misc-const-correctness) In C++23 this cannot be const since `std::move_only_function::operator()` is not a const member function. ++tasks_running; tasks_lock.unlock(); #ifdef __cpp_exceptions try { #endif task(); #ifdef __cpp_exceptions } catch (...) { } #endif } } cleanup_func(idx); this_thread::my_index = std::nullopt; this_thread::my_pool = std::nullopt; } // ============ // Private data // ============ /** * @brief A mutex to synchronize access to the task queue by different threads. */ mutable std::mutex tasks_mutex; /** * @brief A condition variable to notify `worker()` that a new task has become available. */ #ifdef __cpp_lib_jthread std::condition_variable_any #else std::condition_variable #endif task_available_cv; /** * @brief A condition variable to notify `wait()` that the tasks are done. */ std::condition_variable tasks_done_cv; /** * @brief A cleanup function to run in each thread right before it is destroyed, which will happen when the pool is destructed or reset. The function must have no return value, and can either take one argument, the thread index of type `std::size_t`, or zero arguments. The cleanup function must not throw any exceptions, as that will result in program termination. Any exceptions must be handled explicitly within the function. The default is an empty function, i.e., no cleanup will be performed. */ move_only_function cleanup_func = [](std::size_t) {}; /** * @brief An initialization function to run in each thread before it starts executing any submitted tasks. The function must have no return value, and can either take one argument, the thread index of type `std::size_t`, or zero arguments. It will be executed exactly once per thread, when the thread is first constructed. The initialization function must not throw any exceptions, as that will result in program termination. Any exceptions must be handled explicitly within the function. The default is an empty function, i.e., no initialization will be performed. */ move_only_function init_func = [](std::size_t) {}; /** * @brief A queue of tasks to be executed by the threads. */ std::conditional_t, std::queue> tasks; /** * @brief A counter for the total number of currently running tasks. */ std::size_t tasks_running = 0; /** * @brief The number of threads in the pool. */ std::size_t thread_count = 0; /** * @brief A smart pointer to manage the memory allocated for the threads. */ std::unique_ptr threads = nullptr; /** * @brief A flag indicating whether the workers should pause. When set to `true`, the workers temporarily stop retrieving new tasks out of the queue, although any tasks already executing will keep running until they are finished. When set to `false` again, the workers resume retrieving tasks. Only enabled if the flag `BS::tp::pause` is enabled in the template parameter. */ std::conditional_t paused = {}; /** * @brief A flag indicating that `wait()` is active and expects to be notified whenever a task is done. */ bool waiting = false; #ifndef __cpp_lib_jthread /** * @brief A flag indicating to the workers to keep running. When set to `false`, the workers terminate permanently. */ bool workers_running = false; #endif }; // class thread_pool /** * @brief A utility class to synchronize printing to one or more output streams by different threads. */ class [[nodiscard]] synced_stream { public: /** * @brief Construct a new synced stream which prints to `std::cout`. */ explicit synced_stream() { add_stream(std::cout); } /** * @brief Construct a new synced stream which prints to the given output stream(s). * * @tparam T The types of the output streams to print to. * @param streams The output streams to print to. */ template explicit synced_stream(T&... streams) { (add_stream(streams), ...); } /** * @brief Add a stream to the list of output streams to print to. * * @param stream The stream. */ void add_stream(std::ostream& stream) { out_streams.push_back(&stream); } /** * @brief Get a reference to a vector containing pointers to the output streams to print to. * * @return The output streams. */ std::vector& get_streams() noexcept { return out_streams; } /** * @brief Print any number of items into each output stream. Ensures that no other threads print to the streams simultaneously, as long as they all exclusively use the same `BS::synced_stream` object to print. * * @tparam T The types of the items. * @param items The items to print. */ template void print(const T&... items) { const std::scoped_lock stream_lock(stream_mutex); for (std::ostream* const stream : out_streams) (*stream << ... << items); } /** * @brief Print any number of items into each output stream, followed by a newline character. Ensures that no other threads print to the streams simultaneously, as long as they all exclusively use the same `BS::synced_stream` object to print. * * @tparam T The types of the items. * @param items The items to print. */ template void println(T&&... items) { print(std::forward(items)..., '\n'); } /** * @brief Remove a stream from the list of output streams to print to. * * @param stream The stream. */ void remove_stream(std::ostream& stream) { out_streams.erase(std::remove(out_streams.begin(), out_streams.end(), &stream), out_streams.end()); } /** * @brief A stream manipulator to pass to a `BS::synced_stream` (an explicit cast of `std::endl`). Prints a newline character to the stream, and then flushes it. Should only be used if flushing is desired, otherwise a newline character should be used instead. */ inline static std::ostream& (&endl)(std::ostream&) = static_cast(std::endl); /** * @brief A stream manipulator to pass to a `BS::synced_stream` (an explicit cast of `std::flush`). Used to flush the stream. */ inline static std::ostream& (&flush)(std::ostream&) = static_cast(std::flush); private: /** * @brief A mutex to synchronize printing. */ mutable std::mutex stream_mutex; /** * @brief The output streams to print to. */ std::vector out_streams; }; // class synced_stream } // namespace BS #endif // BS_THREAD_POOL_HPP thread-pool-5.1.0/modules/000077500000000000000000000000001512633616700154155ustar00rootroot00000000000000thread-pool-5.1.0/modules/BS.thread_pool.cppm000066400000000000000000000062561512633616700211120ustar00rootroot00000000000000/** * ██████ ███████ ████████ ██ ██ ██████ ███████ █████ ██████ ██████ ██████ ██████ ██ * ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ * ██████ ███████ ██ ███████ ██████ █████ ███████ ██ ██ ██████ ██ ██ ██ ██ ██ * ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ * ██████ ███████ ██ ██ ██ ██ ██ ███████ ██ ██ ██████ ███████ ██ ██████ ██████ ███████ * * @file BS.thread_pool.cppm * @author Barak Shoshany (baraksh@gmail.com) (https://baraksh.com/) * @version 5.1.0 * @date 2026-01-03 * @copyright Copyright (c) 2021-2026 Barak Shoshany. Licensed under the MIT license. If you found this project useful, please consider starring it on GitHub! If you use this library in software of any kind, please provide a link to the GitHub repository https://github.com/bshoshany/thread-pool in the source code and documentation. If you use this library in published research, please cite it as follows: Barak Shoshany, "A C++17 Thread Pool for High-Performance Scientific Computing", doi:10.1016/j.softx.2024.101687, SoftwareX 26 (2024) 101687, arXiv:2105.00613 * * @brief `BS::thread_pool`: a fast, lightweight, modern, and easy-to-use C++17/C++20/C++23 thread pool library. This module file wraps the header file BS_thread_pool.hpp inside a C++20 module so it can be imported using `import BS.thread_pool`. */ module; // A macro indicating to the library that it is being imported as a module, as well as the version of the module file, which must match the version of the header file. #define BS_THREAD_POOL_MODULE 5, 1, 0 #include "BS_thread_pool.hpp" export module BS.thread_pool; export namespace BS { using BS::common_index_type_t; using BS::light_thread_pool; using BS::multi_future; using BS::operator&; using BS::operator^; using BS::operator|; using BS::operator~; using BS::pause_thread_pool; using BS::pr; using BS::priority_t; using BS::priority_thread_pool; using BS::synced_stream; using BS::this_thread; using BS::thread_pool; using BS::thread_pool_import_std; using BS::thread_pool_module; using BS::thread_pool_native_extensions; using BS::thread_pool_version; using BS::tp; using BS::version; using BS::wdc_thread_pool; #ifdef __cpp_exceptions using BS::wait_deadlock; #endif #ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS using BS::get_os_process_affinity; using BS::get_os_process_priority; using BS::os_process_priority; using BS::os_thread_priority; using BS::set_os_process_affinity; using BS::set_os_process_priority; #endif } // namespace BS thread-pool-5.1.0/pyproject.toml000066400000000000000000000225631512633616700166710ustar00rootroot00000000000000[tool.pyright] analyzeUnannotatedFunctions = true deprecateTypingAliases = true disableBytesTypePromotions = true enableExperimentalFeatures = false enableReachabilityAnalysis = true enableTypeIgnoreComments = true extraPaths = ["."] pythonPlatform = "All" pythonVersion = "3.13" reportAbstractUsage = "error" reportArgumentType = "error" reportAssertAlwaysTrue = "error" reportAssertTypeFailure = "error" reportAssignmentType = "error" reportAttributeAccessIssue = "error" reportCallInDefaultInitializer = "error" reportCallIssue = "error" reportConstantRedefinition = "error" reportDeprecated = "error" reportDuplicateImport = "error" reportFunctionMemberAccess = "error" reportGeneralTypeIssues = "error" reportImplicitOverride = "error" reportImplicitStringConcatenation = "error" reportImportCycles = "error" reportIncompatibleMethodOverride = "error" reportIncompatibleVariableOverride = "error" reportIncompleteStub = "error" reportInconsistentConstructor = "error" reportInconsistentOverload = "error" reportIndexIssue = "error" reportInvalidStringEscapeSequence = "error" reportInvalidStubStatement = "error" reportInvalidTypeArguments = "error" reportInvalidTypeForm = "error" reportInvalidTypeVarUse = "error" reportMatchNotExhaustive = "error" reportMissingImports = "error" reportMissingModuleSource = "error" reportMissingParameterType = "error" reportMissingTypeArgument = "error" reportMissingTypeStubs = "error" reportNoOverloadImplementation = "error" reportOperatorIssue = "none" reportOptionalCall = "error" reportOptionalContextManager = "error" reportOptionalIterable = "error" reportOptionalMemberAccess = "error" reportOptionalOperand = "error" reportOptionalSubscript = "error" reportOverlappingOverload = "error" reportPossiblyUnboundVariable = "error" reportPrivateImportUsage = "error" reportPrivateUsage = "error" reportPropertyTypeMismatch = "none" reportRedeclaration = "error" reportReturnType = "error" reportSelfClsParameterName = "error" reportShadowedImports = "error" reportTypeCommentUsage = "error" reportTypedDictNotRequiredAccess = "error" reportUnboundVariable = "error" reportUndefinedVariable = "error" reportUnhashable = "error" reportUninitializedInstanceVariable = "error" reportUnknownArgumentType = "error" reportUnknownLambdaType = "error" reportUnknownMemberType = "none" reportUnknownParameterType = "error" reportUnknownVariableType = "error" reportUnnecessaryCast = "error" reportUnnecessaryComparison = "error" reportUnnecessaryContains = "error" reportUnnecessaryIsInstance = "error" reportUnnecessaryTypeIgnoreComment = "error" reportUnsupportedDunderAll = "error" reportUntypedBaseClass = "error" reportUntypedClassDecorator = "error" reportUntypedFunctionDecorator = "error" reportUntypedNamedTuple = "error" reportUnusedCallResult = "error" reportUnusedClass = "warning" reportUnusedCoroutine = "error" reportUnusedExcept = "error" reportUnusedExpression = "error" reportUnusedFunction = "warning" reportUnusedImport = "warning" reportUnusedVariable = "warning" reportWildcardImportFromLibrary = "error" strictDictionaryInference = true strictListInference = true strictParameterNoneValue = true strictSetInference = true typeCheckingMode = "strict" useLibraryCodeForTypes = true [tool.ruff] indent-width = 4 line-length = 320 target-version = "py313" [tool.ruff.format] docstring-code-format = false docstring-code-line-length = "dynamic" indent-style = "space" line-ending = "lf" quote-style = "double" skip-magic-trailing-comma = false [tool.ruff.lint] dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" fixable = ["ALL"] ignore = [ "BLE001", "C901", "D200", "D203", "D205", "D212", "D400", "D402", "D415", "E501", "INP001", "N814", "N999", "PLR0912", "PLR0913", "PLR2004", "RUF009", "S310", "S602", "S603", "S607", "SIM108", "SIM112", "SLF001", "T201", "TCH003", "TD002", "TD003", "UP015", ] select = ["ALL"] unfixable = [] [tool.ruff.lint.per-file-ignores] "Interactive*" = ["ALL"] [tool.pylint.main] analyse-fallback-blocks = false clear-cache-post-run = false exit-zero = false extension-pkg-allow-list = [] extension-pkg-whitelist = [] fail-on = "" fail-under = 10 from-stdin = false ignore-paths = [] ignore-patterns = [] ignored-modules = [] init-hook = "" jobs = 0 limit-inference-results = 100 load-plugins = [] persistent = true prefer-stubs = true py-version = "3.13" recursive = false source-roots = [] unsafe-load-any-extension = false [tool.pylint.basic] argument-naming-style = "snake_case" argument-rgx = "" attr-naming-style = "snake_case" attr-rgx = "" bad-names = [] bad-names-rgxs = "" class-attribute-naming-style = "any" class-attribute-rgx = "" class-const-naming-style = "UPPER_CASE" class-const-rgx = "" class-naming-style = "PascalCase" class-rgx = "" const-naming-style = "UPPER_CASE" const-rgx = "" docstring-min-length = -1 function-naming-style = "snake_case" function-rgx = "" good-names = ["_"] good-names-rgxs = "" include-naming-hint = true inlinevar-naming-style = "any" inlinevar-rgx = "" method-naming-style = "snake_case" method-rgx = "" module-naming-style = "snake_case" module-rgx = "" name-group = [] no-docstring-rgx = "" property-classes = ["abc.abstractproperty"] typealias-rgx = "" typevar-rgx = "" variable-naming-style = "snake_case" variable-rgx = "" [tool.pylint.classes] check-protected-access-in-special-methods = true defining-attr-methods = [ "__init__", "__new__", "__post_init__", "asyncSetUp", "setUp", ] exclude-protected = [ "_asdict", "_fields", "_make", "_replace", "_source", "os._exit", ] valid-classmethod-first-arg = ["cls"] valid-metaclass-classmethod-first-arg = ["mcs"] [tool.pylint.design] exclude-too-few-public-methods = [] ignored-parents = [] max-args = 5 max-attributes = 7 max-bool-expr = 5 max-branches = 12 max-locals = 15 max-parents = 7 max-public-methods = 20 max-returns = 6 max-statements = 50 min-public-methods = 2 [tool.pylint.exceptions] overgeneral-exceptions = ["builtins.BaseException", "builtins.Exception"] [tool.pylint.format] expected-line-ending-format = "LF" ignore-long-lines = "^\\s*(# )??$" indent-after-paren = 4 indent-string = " " max-line-length = 1024 max-module-lines = 8192 single-line-class-stmt = false single-line-if-stmt = false [tool.pylint.imports] allow-any-import-level = [] allow-reexport-from-package = false allow-wildcard-with-all = false deprecated-modules = [] ext-import-graph = "" import-graph = "" int-import-graph = "" known-standard-library = [] known-third-party = [] preferred-modules = [] [tool.pylint.logging] logging-format-style = "new" logging-modules = ["logging"] [tool.pylint."messages control"] confidence = [] disable = [ "broad-exception-caught", "consider-using-enumerate", "expression-not-assigned", "import-error", "invalid-unary-operand-type", "missing-module-docstring", "named-expr-without-context", "not-callable", "pointless-statement", "protected-access", "too-few-public-methods", "too-many-arguments", "too-many-boolean-expressions", "too-many-branches", "too-many-instance-attributes", "too-many-locals", "too-many-nested-blocks", "too-many-public-methods", "ungrouped-imports", "use-implicit-booleaness-not-comparison-to-string", "use-implicit-booleaness-not-comparison-to-zero", "wrong-import-order", "wrong-import-position", ] enable = ["all"] [tool.pylint.method_args] timeout-methods = [ "requests.api.delete", "requests.api.get", "requests.api.head", "requests.api.options", "requests.api.patch", "requests.api.post", "requests.api.put", "requests.api.request", ] [tool.pylint.miscellaneous] notes = ["TODO"] notes-rgx = "" [tool.pylint.refactoring] max-nested-blocks = 5 never-returning-functions = ["argparse.parse_error", "sys.exit"] suggest-join-with-non-empty-separator = true [tool.pylint.reports] evaluation = "max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))" msg-template = "" output-format = "text" reports = true score = true [tool.pylint.similarities] ignore-comments = true ignore-docstrings = true ignore-imports = true ignore-signatures = true min-similarity-lines = 4 [tool.pylint.spelling] max-spelling-suggestions = 4 spelling-dict = "" spelling-ignore-comment-directives = "fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:" spelling-ignore-words = "" spelling-private-dict-file = "" spelling-store-unknown-words = false [tool.pylint.typecheck] contextmanager-decorators = ["contextlib.contextmanager"] generated-members = [] ignore-mixin-members = true ignore-none = true ignore-on-opaque-inference = false ignored-checks-for-mixins = [ "attribute-defined-outside-init", "no-member", "not-async-context-manager", "not-context-manager", ] ignored-classes = [ "_thread._local", "argparse.Namespace", "optparse.Values", "thread._local", ] missing-member-hint = true missing-member-hint-distance = 1 missing-member-max-choices = 1 mixin-class-rgx = ".*[Mm]ixin" signature-mutators = [] [tool.pylint.variables] additional-builtins = [] allow-global-unused-variables = true allowed-redefined-builtins = [] callbacks = ["_cb", "cb_"] dummy-variables-rgx = "^_.*" ignored-argument-names = "^_.*" init-import = true redefining-builtins-modules = [ "builtins", "future.builtins", "io", "past.builtins", "six.moves", ] thread-pool-5.1.0/scripts/000077500000000000000000000000001512633616700154345ustar00rootroot00000000000000thread-pool-5.1.0/scripts/compile_cpp.py000066400000000000000000001127211512633616700203040ustar00rootroot00000000000000""" ██████ ███████ ████████ ██ ██ ██████ ███████ █████ ██████ ██████ ██████ ██████ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██████ ███████ ██ ███████ ██████ █████ ███████ ██ ██ ██████ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██████ ███████ ██ ██ ██ ██ ██ ███████ ██ ██ ██████ ███████ ██ ██████ ██████ ███████ `BS::thread_pool`: a fast, lightweight, modern, and easy-to-use C++17/C++20/C++23 thread pool library v5.1.0 (2026-01-03) By Barak Shoshany Copyright (c) 2021-2026 Barak Shoshany. Licensed under the MIT license. If you found this project useful, please consider starring it on GitHub! If you use this library in software of any kind, please provide a link to the GitHub repository https://github.com/bshoshany/thread-pool in the source code and documentation. If you use this library in published research, please cite it as follows: Barak Shoshany, "A C++17 Thread Pool for High-Performance Scientific Computing", doi:10.1016/j.softx.2024.101687, SoftwareX 26 (2024) 101687, arXiv:2105.00613 This Python script can be used to compile simple C++ programs (with only a few source and/or header files) using a variety of compilers, C++ standards, and other options. It also includes support for C++20 modules and C++23 Standard Library modules. It is used in the thread pool library's development environment to compile and run the test program using different compilers and C++ standards. It is not part of the library itself, but users of the library may find it useful, especially if they wish to use the library as a C++20 module. """ import argparse import os import pathlib import platform import re import shutil import subprocess import sys import time from enum import Enum from typing import Any, Never, cast import yaml # Install with `pip install pyyaml`. class Args: """A class to collect the command line arguments with proper type checking.""" def __init__(self, parsed_ns: argparse.Namespace) -> None: """Store the parsed arguments.""" self.files: list[str] = parsed_ns.files self.arch: str = parsed_ns.arch self.as_module: bool = parsed_ns.as_module self.clear_output: bool = parsed_ns.clear_output self.compiler: str | None = parsed_ns.compiler self.define: list[str] = parsed_ns.define if parsed_ns.define is not None else [] self.deps: list[str] = parsed_ns.deps if parsed_ns.deps is not None else [] self.disable_exceptions: str | None = parsed_ns.disable_exceptions self.flag: list[str] = parsed_ns.flag if parsed_ns.flag is not None else [] self.force: bool = parsed_ns.force self.ignore_config: bool = parsed_ns.ignore_config self.include: list[str] = parsed_ns.include if parsed_ns.include is not None else [] self.module: list[str] = parsed_ns.module if parsed_ns.module is not None else [] self.output: str | None = parsed_ns.output self.pass_args: list[str] = parsed_ns.pass_args if parsed_ns.pass_args is not None else [] self.run: bool = parsed_ns.run self.std_module: str | None = parsed_ns.std_module self.std: str = parsed_ns.std self.try_all: bool = parsed_ns.try_all self.type: str = parsed_ns.type self.verbose: bool = parsed_ns.verbose # Parse the command-line arguments. parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) _ = parser.add_argument("files", action="store", nargs="*", help="the source file(s) to compile") _ = parser.add_argument("-a", "--arch", action="store", choices=["amd64", "arm64"], default="amd64", help="the target architecture (MSVC only)") _ = parser.add_argument("-b", "--clear-output", action="store_true", help="clear the output folder before compiling (if no source files are specified, just clear and exit)") _ = parser.add_argument("-c", "--compiler", action="store", choices=["cl", "clang++", "g++"], help="which compiler to use (auto determined if not specified)") _ = parser.add_argument("-d", "--define", action="append", help="macros to define (use multiple times if more than one) [in addition to those in compile_cpp.yaml]") _ = parser.add_argument("-e", "--force", action="store_true", help="force recompilation even if the compiled file is up to date") _ = parser.add_argument("-f", "--flag", action="append", help="extra compiler flags to add (use multiple times if more than one) [in addition to those in compile_cpp.yaml]") _ = parser.add_argument("-g", "--ignore-config", action="store_true", help="ignore the compile_cpp.yaml configuration file") _ = parser.add_argument("-i", "--include", action="append", help="the include folder to use (use multiple times if more than one) [in addition to those in compile_cpp.yaml]") _ = parser.add_argument("-l", "--as-module", action="store_true", help="compile file as module") _ = parser.add_argument("-m", "--module", action="append", help='C++20 module files to use if desired, in the format "module_name=module_file,dependencies,..." (use multiple times if more than one) [in addition to those in compile_cpp.yaml]') _ = parser.add_argument("-n", "--deps", action="append", help="dependencies used to detect if recompilation is needed (use multiple times if more than one) [in addition to modules and those in compile_cpp.yaml]") _ = parser.add_argument("-o", "--output", action="store", help="the output folder (end with / to create, taken from compile_cpp.yaml if not specified) and/or binary name (auto determined if not specified)") _ = parser.add_argument("-p", "--pass", action="append", dest="pass_args", help="pass command line arguments to the compiled program when running it, if -r/--run is specified (use multiple times if more than one) [in addition to those in compile_cpp.yaml]") _ = parser.add_argument("-r", "--run", action="store_true", help="run the program after compiling it") _ = parser.add_argument("-s", "--std", action="store", choices=["c++17", "c++20", "c++23"], default="c++23", help="which C++ standard to use") _ = parser.add_argument("-t", "--type", action="store", choices=["debug", "release"], default="debug", help="whether to compile in debug or release mode") _ = parser.add_argument("-u", "--std-module", action="store", help="path to the standard library module (C++23 only, taken from compile_cpp.yaml if not specified, use 'auto' to auto-detect, 'disable' to explicitly disable)") _ = parser.add_argument("-v", "--verbose", action="store_true", help="whether to print this script's diagnostic messages") _ = parser.add_argument("-x", "--disable-exceptions", action="store", choices=["true", "false"], help="whether to disable exceptions [overrides compile_cpp.yaml]") _ = parser.add_argument("-y", "--try-all", action="store_true", help="test compilation using all possible combinations of available compilers and C++ standards (also runs each compiled program if -r/--run is specified)") args = Args(parser.parse_args(args=None if len(sys.argv) > 1 else ["--help"])) class ANSI(Enum): """Enumeration of ANSI color codes for terminal output.""" reset = 0 bold = 1 dim = 2 italic = 3 underline = 4 invert = 7 strike = 9 double_underline = 21 fg_black = 30 fg_red = 31 fg_green = 32 fg_yellow = 33 fg_blue = 34 fg_magenta = 35 fg_cyan = 36 fg_white = 37 bg_black = 40 bg_red = 41 bg_green = 42 bg_yellow = 43 bg_blue = 44 bg_magenta = 45 bg_cyan = 46 bg_white = 47 fg_bright_black = 90 fg_bright_red = 91 fg_bright_green = 92 fg_bright_yellow = 93 fg_bright_blue = 94 fg_bright_magenta = 95 fg_bright_cyan = 96 fg_bright_white = 97 bg_bright_black = 100 bg_bright_red = 101 bg_bright_green = 102 bg_bright_yellow = 103 bg_bright_blue = 104 bg_bright_magenta = 105 bg_bright_cyan = 106 bg_bright_white = 107 ANSI_INFO = ANSI.fg_bright_blue ANSI_ERROR = ANSI.fg_bright_red ANSI_SUCCESS = ANSI.fg_bright_green ANSI_SEPARATOR = ANSI.fg_bright_yellow ANSI_TRY_ALL = ANSI.fg_bright_magenta no_color: bool = os.environ.get("NO_COLOR", "") != "" def print_ansi(message: str, *codes: ANSI) -> None: """Print a message. Uses the given ANSI codes if specified. Does not add newline. Does not print color if the `NO_COLOR` environment variable is set.""" if len(codes) > 0 and not no_color: seq = ";".join(str(c.value) for c in codes) print(f"\033[{seq}m{message}\033[{ANSI.reset.value}m", end="") else: print(message, end="") def print_if_verbose(message: str, *codes: ANSI) -> None: """Print a message followed by a newline, but only if the verbose flag is set. Uses the given ANSI codes if specified.""" if args.verbose: print_ansi(message + "\n", *codes) def print_separator() -> None: """Print a separator line, but only if the verbose flag is set.""" print_if_verbose("=" * 60, ANSI_SEPARATOR) def print_key_values(key: str, values: list[str], length: int) -> None: """Print a key-value pair with the key left-aligned to the given length, but only if the verbose flag is set.""" if args.verbose: print_ansi((key + ":").ljust(length), ANSI.fg_bright_white, ANSI.bold) if len(values) == 0: print("") else: print(values[0]) for value in values[1:]: print(" " * length + f"{value}") def print_error_and_exit(message: str) -> Never: """Print an error message followed by a newline in red and exit.""" print_ansi(message + "\n", ANSI_ERROR) sys.exit(1) def relative_or_full_path(path: pathlib.Path) -> pathlib.Path: """If the path is relative to the current working directory, return it as a relative path; otherwise, return it as a full path.""" return path.relative_to(pathlib.Path.cwd()) if path.is_relative_to(pathlib.Path.cwd()) else path def parse_llvm_version(path_str: str) -> tuple[int, ...] | None: """Extract the LLVM version from a path and returns it as a tuple of integers.""" match: re.Match[str] | None = re.search(r"/llvm[-/](\d+(?:\.\d+)*)", path_str) if match is None: return None return tuple(int(part) for part in match.group(1).split(".")) def get_llvm_std_module(search: str) -> str | None: """Get the path to the LLVM standard library module on Linux or macOS. If multiple paths are found, return the one with the latest LLVM version.""" try: llvm_path: str = subprocess.check_output([f"find {search} -name std.cppm"], text=True, shell=True).strip() except subprocess.CalledProcessError: llvm_path: str = "" all_paths: list[str] = [line for line in llvm_path.splitlines() if line.strip()] if len(all_paths) == 0: return None return max(all_paths, key=lambda p: parse_llvm_version(p) or (0,)) def find_vs_path() -> pathlib.Path | None: """Find the Visual Studio installation path, if it exists.""" if platform.system() != "Windows": return None pf86: str = os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)") pf: str = os.environ.get("ProgramFiles", r"C:\Program Files") try_paths: list[pathlib.Path] = [pathlib.Path(p) / "Microsoft Visual Studio" / "Installer" / "vswhere.exe" for p in [pf86, pf]] vswhere: pathlib.Path | None = next((p for p in try_paths if p.exists()), None) if vswhere is None: return None try: install_root: str = subprocess.check_output( [ str(vswhere), "-latest", "-property", "installationPath", ], text=True, ).strip() except subprocess.CalledProcessError: return None if install_root == "": return None return pathlib.Path(install_root) def get_module_flags(name: str, module_output_path: pathlib.Path) -> list[str]: """Get the appropriate flags to import a module with the given name from the given output path.""" if compiler == "cl": return ["/reference", f"{name}={module_output_path.resolve()}", *([str(build_folder / f"{module_output_path.stem}.obj")] if (name != "std" or not args.as_module) else [])] if compiler == "clang++": return [f"-fmodule-file={name}={module_output_path.resolve()}"] # Otherwise, compiler == "g++". return ["-fmodules", f"-fmodule-mapper=|@g++-mapper-server -r{build_folder}"] def compile_module(name: str, paths: list[pathlib.Path], module_output_path: pathlib.Path) -> bool: """Compile a module with the given name. The first path in `paths` is the module file, and the rest are its dependencies. Returns True if the module was recompiled, False if it was up to date.""" module_file: pathlib.Path = paths[0] if module_output_path.exists(): module_output_mod: float = module_output_path.stat().st_mtime if not any((path.exists() and path.stat().st_mtime > module_output_mod) for path in paths): print_if_verbose(f'Module "{name}" is up to date, skipping compilation.', ANSI_INFO) return False try: module_command: list[str] = [ sys.executable, str(pathlib.Path(__file__).resolve()), # In the special case of GCC and the `std` module, `bits/std.cc` must be entered without the full path. str(module_file) if not (compiler == "g++" and name == "std") else gcc_std_path, f"--output={module_output_path.resolve()}", f"--arch={args.arch}", f"--compiler={compiler}", f"--std={args.std}", f"--type={args.type}", # Note: Not adding the extra options from the configuration file, since they will be added by the script anyway. *[f"--define={define}" for define in args.define], *[f"--include={include}" for include in args.include], *[f"--flag={flag}" for flag in args.flag], # For the `std` module in GCC, we also need to add `-fsearch-include-path`. *(["--flag=-fsearch-include-path"] if (compiler == "g++" and name == "std") else []), # If compiling the `std` module itself, we need to pass `-u=disable` to avoid infinite recursion. Otherwise, we pass along the specified module path if it exists. *(["--std-module=disable"] if name == "std" else [f"--std-module={std_module}"] if std_module is not None else []), "--as-module", *(["--verbose"] if args.verbose else []), *(["--disable-exceptions=true"] if disable_exceptions else ["--disable-exceptions=false"]), ] print_separator() print_if_verbose(f'Compiling module "{name}" with command: {subprocess.list2cmdline(module_command)}', ANSI_INFO) _ = sys.stdout.flush() module_result = subprocess.run( args=module_command, check=False, text=True, ) if module_result.returncode != 0: print_error_and_exit(f"Module compilation failed with return code: {module_result.returncode}.") except Exception as exc: print_error_and_exit(f"Could not compile module due to exception: {exc}.") else: return True # Collect the full path(s) to the source file(s). source_paths: list[pathlib.Path] = [pathlib.Path(file) for file in args.files] # If the `try_all` flag is set, run this script recursively using all possible combinations of compilers and C++ standards available in the system. if args.try_all: if "-c" in sys.argv or "--compiler" in sys.argv: print_error_and_exit("Error: The -y/--try-all flag cannot be used together with the -c/--compiler flag.") if "-s" in sys.argv or "--std" in sys.argv: print_error_and_exit("Error: The -y/--try-all flag cannot be used together with the -s/--std flag.") # We pass all the arguments to the child processes except for the one that enables this mode, to avoid infinite recursion. child_args: list[str] = [arg for arg in sys.argv[1:] if arg not in ("-y", "--try-all")] # Determine which compilers are available. compilers: list[str] = [] if find_vs_path() is not None: compilers.append("cl") if shutil.which("clang++") is not None: compilers.append("clang++") # On macOS, g++ is by default just an alias for clang++, so we skip it. if shutil.which("g++") is not None and platform.system() != "Darwin": compilers.append("g++") # Compile using all available compilers using all relevant C++ standards. standards: list[str] = ["c++17", "c++20", "c++23"] workspace_path: pathlib.Path = pathlib.Path(__file__).parent.parent.resolve() try: compile_start: float = time.perf_counter() for compiler in compilers: for std in standards: warnings_as_errors: list[str] = ["-f/WX"] if compiler == "cl" else ["-f-Werror"] command: list[str] = [ sys.executable, str(pathlib.Path(__file__).resolve()), "-c", compiler, "-s", std, *warnings_as_errors, *child_args, ] print_if_verbose(f"Compiling with {compiler} using {std.upper()} standard with command: {subprocess.list2cmdline(command)}", ANSI_TRY_ALL) compile_result = subprocess.run( args=command, check=False, text=True, ) if compile_result.returncode != 0: print_if_verbose("Compilation failed, aborting!", ANSI_TRY_ALL) sys.exit(1) except Exception as exc: print_if_verbose(f"Could not compile due to exception: {exc}, aborting!", ANSI_TRY_ALL) sys.exit(1) print_if_verbose(f"All compilations completed successfully in {time.perf_counter() - compile_start:.2f} seconds.", ANSI_TRY_ALL) sys.exit(0) # Determine the compiler if it is not given. compiler: str = "" vs_path: pathlib.Path | None = find_vs_path() if args.compiler is not None: compiler = args.compiler elif platform.system() == "Windows": # On Windows, we default to MSVC if the Visual Studio installation path exists, otherwise we fall back to Clang, and then GCC. if vs_path is not None and vs_path.exists(): compiler = "cl" elif shutil.which("clang++") is not None: compiler = "clang++" elif shutil.which("g++") is not None: compiler = "g++" elif platform.system() == "Linux": # On Linux, we default to GCC if it is available, otherwise we fall back to Clang. if shutil.which("g++") is not None: compiler = "g++" elif shutil.which("clang++") is not None: compiler = "clang++" elif platform.system() == "Darwin" and shutil.which("clang++") is not None: # On macOS, we just check if Clang is available. compiler = "clang++" if compiler == "": print_error_and_exit("Error: No compiler found!") # If a file named `compile_cpp.yaml` exists in the current working directory, read the configuration from it. All options are added to those from the command line, except the output file/folder and path to the standard library module, which are only used if not provided in the command line. Note that all folders should be specified relative to the current working directory. defines: list[str] = args.define[:] deps: list[str] = args.deps[:] disable_exceptions: bool = args.disable_exceptions == "true" flags: list[str] = args.flag[:] includes: list[str] = args.include[:] try: modules: dict[str, list[str]] = {name: files.split(",") for module in args.module for name, files in (module.split("="),)} except ValueError: print_error_and_exit('Error: Module specification must be in the format "module_name=module_file,dependencies,...".') output: str | None = args.output pass_args: list[str] = args.pass_args[:] std_module: str | None = args.std_module compile_yaml: pathlib.Path = pathlib.Path.cwd() / "compile_cpp.yaml" if not args.ignore_config and compile_yaml.exists(): with compile_yaml.open("r") as file: raw_config: dict[str, Any] | Any = yaml.safe_load(file) or {} if not isinstance(raw_config, dict): print_error_and_exit("Error: compile_cpp.yaml must be a dictionary of options.") compile_config = cast("dict[str, Any]", raw_config) if "defines" in compile_config: defines.extend(compile_config["defines"]) if "deps" in compile_config: deps.extend(compile_config["deps"]) if args.disable_exceptions is None and "disable_exceptions" in compile_config: disable_exceptions = compile_config["disable_exceptions"] is True if "flags" in compile_config and compiler in compile_config["flags"]: flags.extend(compile_config["flags"][compiler]) if "includes" in compile_config: includes.extend(compile_config["includes"]) if "modules" in compile_config: modules.update(compile_config["modules"]) if output is None and "output" in compile_config: output = compile_config["output"] if "pass_args" in compile_config: pass_args.extend(compile_config["pass_args"]) if std_module is None and "std_module" in compile_config and platform.system() in compile_config["std_module"] and compiler in compile_config["std_module"][platform.system()] and len(compile_config["std_module"][platform.system()][compiler]) > 0: std_module = compile_config["std_module"][platform.system()][compiler] # Determine the name of the binary file and the build folder. binary_path: pathlib.Path | None = None build_folder: pathlib.Path auto_binary: bool = False if output is not None: # Calculate the output path relative to the current working directory. Note that if the path is absolute, `pathlib` will automatically use the absolute path instead of a relative path. output_path: pathlib.Path = (pathlib.Path.cwd() / output).resolve() if output.endswith(("/", "\\")) or output_path.is_dir(): # If the output path is a directory, we use it as the build folder, and automatically determine the name of the binary file. build_folder = output_path.resolve() auto_binary = True elif output_path.is_absolute(): # If the output path is an absolute path to a file, the build folder is the file's folder. build_folder = output_path.parent.resolve() binary_path = output_path else: # If the output path is just a file name, the build folder is the current working directory by default. build_folder = pathlib.Path.cwd() binary_path = build_folder / output_path else: # If there is no output path at all, the build folder is the current working directory by default, and we automatically determine the name of the binary file. build_folder = pathlib.Path.cwd() auto_binary = True # Clear and recreate the build folder if requested. if args.clear_output: if build_folder == pathlib.Path.cwd(): print_error_and_exit("Error: Cannot clear the output path if it is the current working directory.") if build_folder.exists(): print_if_verbose(f"Clearing output folder: {relative_or_full_path(build_folder)}", ANSI_INFO) shutil.rmtree(build_folder) pathlib.Path(build_folder).mkdir(exist_ok=True, parents=True) print_if_verbose("Cleared successfully!", ANSI_SUCCESS) else: print_if_verbose(f"Creating empty output folder: {relative_or_full_path(build_folder)}", ANSI_INFO) pathlib.Path(build_folder).mkdir(exist_ok=True, parents=True) print_if_verbose("Created successfully!", ANSI_SUCCESS) if len(source_paths) == 0: # If no source files are specified, just clear and exit. sys.exit(0) else: # Otherwise, just create the build folder if it does not exist. pathlib.Path(build_folder).mkdir(exist_ok=True, parents=True) # Add the appropriate flags to disable exceptions if requested. if disable_exceptions: if compiler == "cl": flags.extend(["/EHs-c-", "/D_HAS_EXCEPTIONS=0"]) else: flags.append("-fno-exceptions") elif compiler == "cl": flags.append("/EHsc") # Importing the C++ Standard Library is only available in C++23 mode. If "disable" is specified for the standard library module, we skip it; this is used to avoid infinite recursion. use_std_module: bool = not (std_module is None or std_module == "disable" or args.std != "c++23") if use_std_module and std_module is not None: modules = {"std": [std_module], **modules} # Figure out the path to the std module, if relevant. gcc_std_path: str = "bits/std.cc" if use_std_module and "std" in modules and modules["std"][0].strip() == "auto": success: bool = False if platform.system() == "Windows" and compiler == "cl" and vs_path is not None: vc_version_path: pathlib.Path = vs_path / "VC" / "Auxiliary" / "Build" / "Microsoft.VCToolsVersion.default.txt" if vc_version_path.exists(): with vc_version_path.open("r", encoding="utf-8") as vc_version_file: vc_version: str = vc_version_file.read().strip() vc_tools_path: pathlib.Path = vs_path / "VC" / "Tools" / "MSVC" / vc_version modules["std"][0] = rf"{vc_tools_path}\modules\std.ixx" success = True elif compiler == "clang++": # Note: The Clang `std` module is only available with libc++. if platform.system() == "Windows": # On Windows, libc++ is most likely installed via MSYS2, so the `std` module should be at `C:\msys64\clang64\share\libc++\v1\std.cppm`. We calculate it relative to the path where `clang++.exe` is located, in case the MSYS2 installation folder is different. clang_path: str | None = shutil.which("clang++") if clang_path: std_path: pathlib.Path = pathlib.Path(clang_path).parent.parent / "share" / "libc++" / "v1" / "std.cppm" if std_path.exists(): modules["std"][0] = str(std_path.absolute()) success = True elif platform.system() == "Darwin": # On macOS, the `std` module should be at `/usr/local/Cellar/llvm//share/libc++/v1/std.cppm`. llvm_std_path: str | None = get_llvm_std_module("/usr/local/Cellar/llvm") if llvm_std_path: modules["std"][0] = llvm_std_path success = True elif platform.system() == "Linux": # On Linux, the `std` module should be at `/usr/lib/llvm-/share/libc++/v1/std.cppm`. llvm_std_path: str | None = get_llvm_std_module("/usr/lib/llvm-*") if llvm_std_path: modules["std"][0] = llvm_std_path success = True else: # compiler == "g++" # In GCC the module file is always at `bits/std.cc`. modules["std"][0] = gcc_std_path success = True if not success: print_error_and_exit('Error: "auto" specified for the standard library module path, but the script could not locate it. Please specify the path manually.') # Determine the appropriate extension for modules. module_extension: str = "" if compiler == "cl": module_extension = ".ifc" elif compiler == "clang++": module_extension = ".pcm" else: # compiler == "g++" # For GCC, the extension is `.gcm`, but we do not have control over that; however, we also create an object file with the `.o` extension so we can check if the module is up to date (see below). module_extension = ".o" # If we are compiling as a module, add the appropriate flags. if args.as_module: if compiler == "cl": flags.extend(["/interface", "/TP", "/c"]) elif compiler == "clang++": flags.extend(["--precompile", "-Wno-include-angled-in-module-purview", "-Wno-reserved-module-identifier", "-xc++-module"]) else: # compiler == "g++" # Note: Creating an object file can be disabled with `-fmodule-only`, and it doesn't seem like the object file is actually needed, only the `.gcm` file. However, we create the object file anyway so we can check if the module is up to date, since there appears to be no way to control the name of the `.gcm` file. flags.extend(["-fmodules", "-c", f"-fmodule-mapper=|@g++-mapper-server -r{build_folder}", "-xc++"]) # If the user did not provide an output file name, we use the name of the first source file, appending the compiler, mode, and C++ standard, as well as the appropriate extension. short_compiler: str = "clang" if compiler == "clang++" else "gcc" if compiler == "g++" else "msvc" suffix: str = f"{args.type}-{short_compiler}-cpp{args.std[-2:]}" if auto_binary: extension: str = module_extension if args.as_module else ".exe" if platform.system() == "Windows" else "" module_indicator: str = "module_" if args.as_module else "" binary_name: str = f"{source_paths[0].stem}_{module_indicator}{suffix}{extension}" binary_path = build_folder / binary_name if binary_path is None: print_error_and_exit("Error: Could not determine binary file name!") # If modules are specified, pre-compile them by calling this script recursively. If using the `std` module, pre-compile it first. If this is a module itself, don't compile any other modules to avoid infinite recursion. (Note: This assumes that modules do not depend on each other.) recompiled_modules: bool = False module_paths: dict[str, list[pathlib.Path]] = {name: [(pathlib.Path.cwd() / file).resolve() for file in files] for name, files in modules.items()} module_output_paths: dict[str, pathlib.Path] = {name: build_folder / f"{path[0].stem}_module_{suffix}{module_extension}" for name, path in module_paths.items()} if use_std_module and "std" in modules: flags.extend(get_module_flags("std", module_output_paths["std"])) if not args.as_module: if use_std_module and "std" in modules: recompiled_modules |= compile_module("std", [(pathlib.Path.cwd() / modules["std"][0]).resolve()], module_output_paths["std"]) if len(modules) > 0 and (args.std in ["c++20", "c++23"]): for n, p in module_paths.items(): if n == "std": continue flags.extend(get_module_flags(n, module_output_paths[n])) recompiled_modules |= compile_module(n, p, module_output_paths[n]) print_if_verbose("Compiling program...", ANSI_INFO) # If the output binary already exists, check if we need to recompile based on whether any of the source files or their dependencies have changed. If we recompiled any modules, we need to recompile anyway, so we don't need to check the dependencies. The `force` flag overrides this check. need_recompile: bool = True deps_paths: list[pathlib.Path] = [(pathlib.Path.cwd() / file).resolve() for file in deps] if not args.force and not recompiled_modules and binary_path.exists(): binary_mod: float = binary_path.stat().st_mtime if not any((path.exists() and path.stat().st_mtime > binary_mod) for path in (source_paths + deps_paths)): print_if_verbose("Binary is up to date, skipping compilation.", ANSI_INFO) need_recompile = False if need_recompile: include_paths: list[pathlib.Path] = [(pathlib.Path.cwd() / folder).resolve() for folder in includes] # On macOS, make sure we are using Homebrew Clang, if available, instead of Apple Clang, which does not support C++20 modules. compiler_path: str | None if compiler == "clang++" and platform.system() == "Darwin": compiler_path = "/usr/local/opt/llvm/bin/clang++" if not pathlib.Path(compiler_path).exists(): compiler_path = shutil.which(compiler) else: compiler_path = shutil.which(compiler) # Determine the command to execute based on the chosen compiler and parameters. command: list[str] if compiler == "clang++": command = [ compiler_path if compiler_path is not None else compiler, *flags, *[str(path.resolve()) for path in source_paths], f"-std={args.std}", "-g3" if args.type == "debug" else "-O3", "-o", str(binary_path), *[item for include in [["-I", str(path.resolve())] for path in include_paths] for item in include], *([f"-D{define}" for define in defines]), ] elif compiler == "g++": command = [ compiler_path if compiler_path is not None else compiler, *flags, # In the special case of GCC and the `std` module, `bits/std.cc` must be entered without the full path. *[(str(path.resolve()) if path != pathlib.Path(gcc_std_path) else gcc_std_path) for path in source_paths], f"-std={args.std}", "-ggdb3" if args.type == "debug" else "-O3", "-o", str(binary_path), *[item for include in [["-I", str(path.resolve())] for path in include_paths] for item in include], *([f"-D{define}" for define in defines]), ] else: # compiler == "cl" command = [ compiler_path if compiler_path is not None else compiler, *flags, *[str(path.resolve()) for path in source_paths], f"/std:{'c++latest' if args.std == 'c++23' else args.std}", *(["/Zi", f"/Fd:{binary_path.with_suffix('.pdb')}"] if args.type == "debug" else ["/O2"]), *([f"/Fe:{binary_path}"] if not args.as_module else ["/ifcOutput", str(binary_path)]), # If compiling multiple source files, we cannot specify the names of each object file, only the output folder. f"/Fo:{binary_path.with_suffix('.obj')}" if len(source_paths) == 1 else f"/Fo:{build_folder}\\", *[item for include in [["/I", str(path.resolve())] for path in include_paths] for item in include], "/permissive-", "/nologo", "/Zc:__cplusplus", *([f"/D{define}" for define in defines]), ] # For MSVC we also need to invoke the Visual Studio Developer PowerShell script. if compiler == "cl" and vs_path is not None: vs_pwsh_path: pathlib.Path = vs_path / "Common7" / "Tools" / "Launch-VsDevShell.ps1" command = ["pwsh.exe", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", f"& '{vs_pwsh_path}' -Arch {args.arch} -HostArch {args.arch} | Out-Null; {subprocess.list2cmdline(command)}"] print_separator() max_length = 16 print_key_values("Compiler", [compiler], max_length) print_key_values("C++ Standard", [args.std.upper()], max_length) print_key_values("Type", [args.type.title()], max_length) print_key_values("Source file(s)", [str(relative_or_full_path(path)) for path in source_paths], max_length) print_key_values("Defines", defines, max_length) print_key_values("Dependencies", [str(relative_or_full_path(path)) for path in deps_paths], max_length) print_key_values("Flags", flags, max_length) print_key_values("Includes", ([str(relative_or_full_path(path)) for path in include_paths]), max_length) if args.std in ["c++20", "c++23"]: print_key_values("Modules", [f"{name}={relative_or_full_path(path[0])}" for name, path in module_paths.items()] if not args.as_module else [""], max_length) else: print_key_values("Modules", [""], max_length) print_key_values("Build folder", [str(relative_or_full_path(build_folder))], max_length) print_key_values("Binary file", [str(relative_or_full_path(binary_path))], max_length) print_key_values("Command", [subprocess.list2cmdline(command)], max_length) print_separator() # Perform the actual compilation. print_if_verbose("Compiling...", ANSI_INFO) try: _ = sys.stdout.flush() compile_start: float = time.perf_counter() compile_result = subprocess.run( args=command, check=False, text=True, ) if compile_result.returncode == 0: print_if_verbose(f"Compilation completed successfully in {time.perf_counter() - compile_start:.2f} seconds.", ANSI_SUCCESS) else: print_error_and_exit(f"Compilation failed with return code: {compile_result.returncode}.") except Exception as exc: print_error_and_exit(f"Could not compile due to exception: {exc}.") if args.run: # We run the program in the build folder. os.chdir(build_folder) print_if_verbose(f"Running program{f' with arguments {" ".join(pass_args)}' if len(pass_args) > 0 else ''}...", ANSI_INFO) print_separator() try: _ = sys.stdout.flush() run_start: float = time.perf_counter() run_result: subprocess.CompletedProcess[str] = subprocess.run( args=[binary_path, *pass_args], check=False, text=True, ) print_separator() if run_result.returncode == 0: print_if_verbose(f"Program executed successfully in {time.perf_counter() - run_start:.2f} seconds.", ANSI_SUCCESS) else: print_error_and_exit(f"Program failed with return code: {run_result.returncode}.") except Exception as exc: print_separator() print_error_and_exit(f"Could not run program due to exception: {exc}.") thread-pool-5.1.0/tests/000077500000000000000000000000001512633616700151075ustar00rootroot00000000000000thread-pool-5.1.0/tests/BS_thread_pool_test.cpp000066400000000000000000005135131512633616700215460ustar00rootroot00000000000000/** * ██████ ███████ ████████ ██ ██ ██████ ███████ █████ ██████ ██████ ██████ ██████ ██ * ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ * ██████ ███████ ██ ███████ ██████ █████ ███████ ██ ██ ██████ ██ ██ ██ ██ ██ * ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ * ██████ ███████ ██ ██ ██ ██ ██ ███████ ██ ██ ██████ ███████ ██ ██████ ██████ ███████ * * @file BS_thread_pool_test.cpp * @author Barak Shoshany (baraksh@gmail.com) (https://baraksh.com/) * @version 5.1.0 * @date 2026-01-03 * @copyright Copyright (c) 2021-2026 Barak Shoshany. Licensed under the MIT license. If you found this project useful, please consider starring it on GitHub! If you use this library in software of any kind, please provide a link to the GitHub repository https://github.com/bshoshany/thread-pool in the source code and documentation. If you use this library in published research, please cite it as follows: Barak Shoshany, "A C++17 Thread Pool for High-Performance Scientific Computing", doi:10.1016/j.softx.2024.101687, SoftwareX 26 (2024) 101687, arXiv:2105.00613 * * @brief `BS::thread_pool`: a fast, lightweight, modern, and easy-to-use C++17/C++20/C++23 thread pool library. This program tests all aspects of the library, but is not needed in order to use the library. */ // We need to include since if we're using `import std` it will not define any feature-test macros. #ifdef __has_include #if __has_include() #include // NOLINT(misc-include-cleaner) #endif #endif // If the macro `BS_THREAD_POOL_IMPORT_STD` is defined, import the C++ Standard Library as a module. Otherwise, include the relevant Standard Library header files. #if defined(BS_THREAD_POOL_IMPORT_STD) && (__cplusplus >= 202004L) import std; constexpr bool using_import_std = true; #else #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __cpp_exceptions #include #include #endif #ifdef __cpp_lib_format #include #endif #ifdef __cpp_lib_semaphore #include #endif constexpr bool using_import_std = false; #endif // If the macro `BS_THREAD_POOL_TEST_IMPORT_MODULE` is defined, import the thread pool library as a module. Otherwise, include the header file. We also check that we are in C++20 or later. We can't use `__cpp_modules` to check if modules are supported, because Clang does not define it even in C++20 mode; its support for C++20 modules is only partial, but it does seem to work for this particular library. #define BS_THREAD_POOL_TEST_VERSION 5, 1, 0 #if defined(BS_THREAD_POOL_TEST_IMPORT_MODULE) && (__cplusplus >= 202002L) import BS.thread_pool; static_assert(BS::thread_pool_module, "The flag BS::thread_pool_module is set to false, but the library was imported as a module. Aborting compilation."); static_assert(BS::thread_pool_version == BS::version(BS_THREAD_POOL_TEST_VERSION), "The versions of BS_thread_pool_test.cpp and the BS.thread_pool module do not match. Aborting compilation."); #else #include "BS_thread_pool.hpp" static_assert(!BS::thread_pool_module, "The flag BS::thread_pool_module is set to true, but the library was not imported as a module. Aborting compilation."); static_assert(BS::thread_pool_version == BS::version(BS_THREAD_POOL_TEST_VERSION), "The versions of BS_thread_pool_test.cpp and BS_thread_pool.hpp do not match. Aborting compilation."); #endif #ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS static_assert(BS::thread_pool_native_extensions, "Cannot test the native extensions, as the thread pool module was compiled without enabling them using the macro BS_THREAD_POOL_NATIVE_EXTENSIONS. Aborting compilation."); #endif namespace { // A global synced stream which prints to the standard output. BS::synced_stream sync_cout; // A global synced stream which prints to a log file. BS::synced_stream sync_log; // A flag to enable or disable printing to the standard output. bool use_stdout = true; // A flag to enable or disable printing to the log file. bool use_log = false; // A flag to enable or disable colored output. bool no_color = false; // ================================ // std::counting_semaphore polyfill // ================================ #ifdef __cpp_lib_semaphore using std::binary_semaphore; using std::counting_semaphore; #else /** * @brief A polyfill for `std::counting_semaphore`, to be used if C++20 features are not available. A `counting_semaphore` is a synchronization primitive that allows more than one concurrent access to the same resource. The number of concurrent accessors is limited by the semaphore's counter, which is decremented when a thread acquires the semaphore and incremented when a thread releases the semaphore. If the counter is zero, a thread trying to acquire the semaphore will be blocked until another thread releases the semaphore. * * @tparam LeastMaxValue The least maximum value of the counter. (In this implementation, it is also the actual maximum value.) */ template ::max()> class [[nodiscard]] counting_semaphore { static_assert(LeastMaxValue >= 0, "The least maximum value for a counting semaphore must not be negative."); public: /** * @brief Construct a new counting semaphore with the given initial counter value. * * @param desired The initial counter value. */ constexpr explicit counting_semaphore(const std::ptrdiff_t desired) : counter(desired) {} // The copy and move constructors and assignment operators are deleted. The semaphore cannot be copied or moved. counting_semaphore(const counting_semaphore&) = delete; counting_semaphore(counting_semaphore&&) = delete; counting_semaphore& operator=(const counting_semaphore&) = delete; counting_semaphore& operator=(counting_semaphore&&) = delete; ~counting_semaphore() = default; /** * @brief Returns the internal counter's maximum possible value, which in this implementation is equal to `LeastMaxValue`. * * @return The internal counter's maximum possible value. */ [[nodiscard]] static constexpr std::ptrdiff_t max() noexcept { return LeastMaxValue; } /** * @brief Atomically decrements the internal counter by 1 if it is greater than 0; otherwise blocks until it is greater than 0 and can successfully decrement the internal counter. */ void acquire() { std::unique_lock lock(mutex); cv.wait(lock, [this] { return counter > 0; }); --counter; } /** * @brief Atomically increments the internal counter. Any thread(s) waiting for the counter to be greater than 0, such as due to being blocked in `acquire()`, will subsequently be unblocked. * * @param update The amount to increment the internal counter by. Defaults to 1. */ void release(const std::ptrdiff_t update = 1) { { const std::scoped_lock lock(mutex); counter += update; } cv.notify_all(); } /** * @brief Tries to atomically decrement the internal counter by 1 if it is greater than 0; no blocking occurs regardless. * * @return `true` if decremented the internal counter, `false` otherwise. */ bool try_acquire() { std::scoped_lock lock(mutex); if (counter > 0) { --counter; return true; } return false; } /** * @brief Tries to atomically decrement the internal counter by 1 if it is greater than 0; otherwise blocks until it is greater than 0 and can successfully decrement the internal counter, or the `rel_time` duration has been exceeded. * * @tparam Rep An arithmetic type representing the number of ticks to wait. * @tparam Period An `std::ratio` representing the length of each tick in seconds. * @param rel_time The duration the function must wait. Note that the function may wait for longer. * @return `true` if decremented the internal counter, `false` otherwise. */ template bool try_acquire_for(const std::chrono::duration& rel_time) { std::unique_lock lock(mutex); if (!cv.wait_for(lock, rel_time, [this] { return counter > 0; })) return false; --counter; return true; } /** * @brief Tries to atomically decrement the internal counter by 1 if it is greater than 0; otherwise blocks until it is greater than 0 and can successfully decrement the internal counter, or the `abs_time` time point has been passed. * * @tparam Clock The type of the clock used to measure time. * @tparam Duration An `std::chrono::duration` type used to indicate the time point. * @param abs_time The earliest time the function must wait until. Note that the function may wait for longer. * @return `true` if decremented the internal counter, `false` otherwise. */ template bool try_acquire_until(const std::chrono::time_point& abs_time) { std::unique_lock lock(mutex); if (!cv.wait_until(lock, abs_time, [this] { return counter > 0; })) return false; --counter; return true; } private: /** * @brief A mutex used to synchronize access to the counter. */ mutable std::mutex mutex; /** * @brief A condition variable used to wait for the counter. */ std::condition_variable cv; /** * @brief The semaphore's counter. */ std::ptrdiff_t counter; }; /** * @brief A polyfill for `std::binary_semaphore`, to be used if C++20 features are not available. */ using binary_semaphore = counting_semaphore<1>; #endif // ====================== // Functions for printing // ====================== /** * @brief An enumeration class of ANSI escape codes for formatting terminal output. */ enum class ansi : std::uint8_t { reset = 0, bold = 1, dim = 2, italic = 3, underline = 4, invert = 7, strike = 9, double_underline = 21, normal_intensity = 22, not_italic = 23, not_underlined = 24, fg_black = 30, fg_red = 31, fg_green = 32, fg_yellow = 33, fg_blue = 34, fg_magenta = 35, fg_cyan = 36, fg_white = 37, bg_black = 40, bg_red = 41, bg_green = 42, bg_yellow = 43, bg_blue = 44, bg_magenta = 45, bg_cyan = 46, bg_white = 47, fg_bright_black = 90, fg_bright_red = 91, fg_bright_green = 92, fg_bright_yellow = 93, fg_bright_blue = 94, fg_bright_magenta = 95, fg_bright_cyan = 96, fg_bright_white = 97, bg_bright_black = 100, bg_bright_red = 101, bg_bright_green = 102, bg_bright_yellow = 103, bg_bright_blue = 104, bg_bright_magenta = 105, bg_bright_cyan = 106, bg_bright_white = 107 }; // Constants for ANSI styles used in this program. constexpr std::initializer_list ansi_error = {ansi::fg_bright_red}; constexpr std::initializer_list ansi_info = {ansi::fg_bright_blue}; constexpr std::initializer_list ansi_separator = {ansi::fg_bright_yellow}; constexpr std::initializer_list ansi_subtitle = {ansi::fg_bright_cyan, ansi::bold}; constexpr std::initializer_list ansi_success = {ansi::fg_bright_green}; constexpr std::initializer_list ansi_title = {ansi::fg_bright_white, ansi::bold}; constexpr std::initializer_list ansi_title_italic = {ansi::fg_bright_white, ansi::bold, ansi::italic}; constexpr std::initializer_list ansi_title_underline = {ansi::fg_bright_white, ansi::bold, ansi::underline}; /** * @brief Output an ANSI escape code to a stream. * * @param out The output stream. * @param code The ANSI code. * @return The output stream. */ inline std::ostream& operator<<(std::ostream& out, const ansi code) { if (no_color) return out; return out << "\033[" << static_cast(code) << 'm'; } /** * @brief Output multiple ANSI escape codes to a stream. * * @param out The output stream. * @param codes The ANSI codes. * @return The output stream. */ inline std::ostream& operator<<(std::ostream& out, const std::initializer_list codes) { if (no_color) return out; out << "\033["; bool first = true; for (const ansi code : codes) { if (!first) out << ';'; else first = false; out << static_cast(code); } return out << 'm'; } /** * @brief Print any number of items to the standard output and/or the log file. * * @tparam T The types of the items. * @param items The items to print. */ template void log(const T&... items) { if (use_stdout) sync_cout.print(items...); if (use_log) sync_log.print(items...); } /** * @brief Print any number of items to the standard output and/or the log file, followed by a newline character. * * @tparam T The types of the items. * @param items The items to print. */ template void logln(T&&... items) { log(std::forward(items)..., '\n'); } /** * @brief Print any number of items to the standard output and/or the log file with the specified ANSI styles. The ANSI codes are only printed to the standard output, but both streams receive the printed items. * * @tparam T The types of the items. * @param codes The ANSI codes. * @param items The items to print. */ template void log_ansi(const std::initializer_list codes, const T&... items) { if (use_stdout) sync_cout.print(codes, items..., ansi::reset); if (use_log) sync_log.print(items...); } /** * @brief Print any number of items to the standard output and/or the log file with the specified ANSI styles, followed by a newline character. The ANSI codes are only printed to the standard output, but both streams receive the printed items. * * @tparam T The types of the items. * @param codes The ANSI codes. * @param items The items to print. */ template void logln_ansi(const std::initializer_list codes, T&&... items) { log_ansi(codes, std::forward(items)..., '\n'); } /** * @brief Print a stylized header with the specified ANSI styles. * * @param text The text of the header. Will appear between two lines. * @param symbol The symbol to use for the lines. Default is '='. * @param codes The ANSI codes. Default is `ansi_separator`. */ void print_header(const std::string_view text, const char symbol = '=', const std::initializer_list codes = ansi_separator) { const std::string separator(text.length(), symbol); logln_ansi(codes, BS::synced_stream::flush, '\n', separator, '\n', text, '\n', separator); } /** * @brief Print a key followed by values with different ANSI styles. * * @tparam T The types of the values. * @param key The key. * @param values The values. */ template void print_key_values(const std::string_view key, T&&... values) { log_ansi(ansi_title, key); logln_ansi(ansi_subtitle, std::forward(values)...); } // ================================= // Functions for checking conditions // ================================= /** * @brief A struct to keep count of the number of tests that succeeded and failed. */ struct test_results { inline static std::size_t tests_succeeded = 0; inline static std::size_t tests_failed = 0; }; /** * @brief Check if a condition is met, report the result, and keep count of the total number of successes and failures. * * @param condition The condition to check. */ void check(const bool condition) { if (condition) { logln_ansi(ansi_success, "-> passed."); ++test_results::tests_succeeded; } else { logln_ansi(ansi_error, "-> FAILED!"); ++test_results::tests_failed; } } /** * @brief Check if the expected result has been obtained, report the result, and keep count of the total number of successes and failures. * * @param expected The expected result. * @param obtained The obtained result. */ template void check(const T1& expected, const T2& obtained) { const bool passed = (expected == static_cast(obtained)); log_ansi(passed ? ansi_success : ansi_error, "- Expected: ", expected, ", obtained: ", obtained, ' '); check(passed); } /** * @brief Check if all of the flags in a container are equal to the desired value. * * @param flags The container. * @param value The desired value. * @return `true` if all flags are equal to the desired value, or `false` otherwise. */ template bool all_flags_equal(const T& flags, const V& value) { return std::all_of(flags.begin(), flags.end(), [&value](const auto& flag) { return flag == value; }); } /** * @brief Check if all of the flags in a container are set. * * @param flags The container. * @return `true` if all flags are set, or `false` otherwise. */ template bool all_flags_set(const T& flags) { return std::all_of(flags.begin(), flags.end(), [](const bool flag) { return flag; }); } /** * @brief Check if none of the flags in a container are set. * * @param flags The container. * @return `true` if no flags are set, or `false` otherwise. */ template bool no_flags_set(const T& flags) { return std::none_of(flags.begin(), flags.end(), [](const bool flag) { return flag; }); } // ======================================= // Functions for generating random numbers // ======================================= /** * @brief Obtain a random number in a specified range. * * @tparam T The type of the values in the range. * @param min The minimum value of the range. * @param max The maximum value of the range. * @return The random number. */ template T random(const T min, const T max) { static std::random_device rand_device; static std::mt19937_64 twister(rand_device()); std::uniform_int_distribution dist(min, max); return dist(twister); } /** * @brief Obtain an ordered pair of two distinct random numbers in a specified range. * * @tparam T The type of the values in the range. * @param min The minimum value of the range. * @param max The maximum value of the range. Must be larger than `min`. * @return The random numbers. */ template std::pair random_pair(const T min, const T max) { static std::random_device rand_device; static std::mt19937_64 twister(rand_device()); std::uniform_int_distribution dist(min, max); T first = dist(twister); T second; do second = dist(twister); while (second == first); if (second < first) return {second, first}; return {first, second}; } // ======================================== // Functions for detecting various features // ======================================== /** * @brief Make a string out of items of various types. * * @tparam T The types of the items. * @param items The items. * @return The generated string. */ template std::string make_string(const T&... items) { std::ostringstream out; (out << ... << items); return out.str(); } /** * @brief Detect the compiler used to compile this program. * * @return A string describing the compiler. */ std::string detect_compiler() { #if defined(__apple_build_version__) return make_string("Apple Clang v", __clang_major__, '.', __clang_minor__, '.', __clang_patchlevel__); #elif defined(__clang__) return make_string("Clang v", __clang_major__, '.', __clang_minor__, '.', __clang_patchlevel__); #elif defined(__GNUC__) return make_string("GCC v", __GNUC__, '.', __GNUC_MINOR__, '.', __GNUC_PATCHLEVEL__); #elif defined(_MSC_FULL_VER) std::string msvc_full_ver = make_string(_MSC_FULL_VER); return make_string("MSVC v", msvc_full_ver.substr(0, 2), '.', msvc_full_ver.substr(2, 2), '.', msvc_full_ver.substr(4)); #else return "Other"; #endif } /** * @brief Detect the C++ standard used to compile this program. * * @return A string describing the C++ standard. */ std::string detect_cpp_standard() { #if __cplusplus == 201703L return "C++17"; #elif __cplusplus == 202002L return "C++20"; #elif (__cplusplus == 202302L) || (defined(_MSC_VER) && __cplusplus > 202002L) // MSVC with `/std:c++latest` only guarantees `__cplusplus` is "at least one higher" than the highest supported version. return "C++23"; #else return "Other"; #endif } /** * @brief Detect the C++ standard library used to compile this program. * * @return A string describing the C++ standard library. */ std::string detect_lib() { #if defined(_LIBCPP_VERSION) return make_string("LLVM libc++ v", _LIBCPP_VERSION / 10000, '.', (_LIBCPP_VERSION / 100) % 100, '.', _LIBCPP_VERSION % 100); // NOLINT(readability-magic-numbers) #elif defined(_GLIBCXX_RELEASE) std::string out = make_string("GNU libstdc++ v", _GLIBCXX_RELEASE); #ifdef __GLIBCXX__ out += make_string(" (", __GLIBCXX__, ')'); #endif return out; #elif defined(_MSVC_STL_VERSION) std::string out = make_string("Microsoft STL v", _MSVC_STL_VERSION); #ifdef _MSVC_STL_UPDATE out += make_string(" (", _MSVC_STL_UPDATE, ')'); #endif return out; #else return "Other"; #endif } /** * @brief Detect the operating system used to compile this program. * * @return A string describing the operating system. */ std::string detect_os() { #if defined(__linux__) return "Linux"; #elif defined(_WIN32) return "Windows"; #elif defined(__APPLE__) return "macOS"; #else return "Other"; #endif } /** * @brief Detect available C++ features and print them out. */ void print_features() { constexpr int width = 35; log(std::left); log_ansi(ansi_title, std::setw(width), "* __cpp_concepts:"); #ifdef __cpp_concepts logln_ansi(ansi_subtitle, __cpp_concepts); #else logln_ansi(ansi_subtitle, "N/A"); #endif log_ansi(ansi_title, std::setw(width), "* __cpp_exceptions:"); #ifdef __cpp_exceptions logln_ansi(ansi_subtitle, __cpp_exceptions); #else logln_ansi(ansi_subtitle, "N/A"); #endif log_ansi(ansi_title, std::setw(width), "* __cpp_impl_three_way_comparison:"); #ifdef __cpp_impl_three_way_comparison logln_ansi(ansi_subtitle, __cpp_impl_three_way_comparison); #else logln_ansi(ansi_subtitle, "N/A"); #endif log_ansi(ansi_title, std::setw(width), "* __cpp_lib_format:"); #ifdef __cpp_lib_format logln_ansi(ansi_subtitle, __cpp_lib_format); #else logln_ansi(ansi_subtitle, "N/A"); #endif log_ansi(ansi_title, std::setw(width), "* __cpp_lib_int_pow2:"); #ifdef __cpp_lib_int_pow2 logln_ansi(ansi_subtitle, __cpp_lib_int_pow2); #else logln_ansi(ansi_subtitle, "N/A"); #endif log_ansi(ansi_title, std::setw(width), "* __cpp_lib_jthread:"); #ifdef __cpp_lib_jthread logln_ansi(ansi_subtitle, __cpp_lib_jthread); #else logln_ansi(ansi_subtitle, "N/A"); #endif log_ansi(ansi_title, std::setw(width), "* __cpp_lib_modules:"); #ifdef __cpp_lib_modules logln_ansi(ansi_subtitle, __cpp_lib_modules); #else logln_ansi(ansi_subtitle, "N/A"); #endif log_ansi(ansi_title, std::setw(width), "* __cpp_lib_move_only_function:"); #ifdef __cpp_lib_move_only_function logln_ansi(ansi_subtitle, __cpp_lib_move_only_function); #else logln_ansi(ansi_subtitle, "N/A"); #endif log_ansi(ansi_title, std::setw(width), "* __cpp_lib_semaphore:"); #ifdef __cpp_lib_semaphore logln_ansi(ansi_subtitle, __cpp_lib_semaphore); #else logln_ansi(ansi_subtitle, "N/A"); #endif log_ansi(ansi_title, std::setw(width), "* __cpp_modules:"); #ifdef __cpp_modules logln_ansi(ansi_subtitle, __cpp_modules); #else logln_ansi(ansi_subtitle, "N/A"); #endif log_ansi(ansi_title, std::setw(width), "* __has_include():"); #if __has_include() logln_ansi(ansi_subtitle, "true"); #else logln_ansi(ansi_subtitle, "false"); #endif logln(std::right); } // ========================================= // Functions to verify the number of threads // ========================================= /** * @brief Obtain a list of unique thread IDs in the pool. Submits a number of tasks equal to twice the thread count into the pool. Each task stores the ID of the thread running it, and then waits until as many tasks as the thread count are finished. This ensures that each thread in the pool runs at least one task, as the pool gets filled completely. * * @param pool The thread pool to check. */ std::vector obtain_unique_threads(BS::thread_pool<>& pool) { const std::size_t num_tasks = pool.get_thread_count() * 2; std::vector thread_ids(num_tasks); std::atomic total_count = 0; counting_semaphore sem(0); for (std::thread::id& tid : thread_ids) { pool.detach_task( [&total_count, &tid, &sem, thread_count = pool.get_thread_count(), num_tasks] { tid = std::this_thread::get_id(); if (++total_count == thread_count) sem.release(static_cast(num_tasks)); sem.acquire(); }); } pool.wait(); std::sort(thread_ids.begin(), thread_ids.end()); const std::vector::iterator last = std::unique(thread_ids.begin(), thread_ids.end()); thread_ids.erase(last, thread_ids.end()); return thread_ids; } /** * @brief Check that the constructor works. Also checks that get_thread_ids() reports the correct IDs. */ void check_constructor() { BS::thread_pool pool; logln("Checking that the thread pool reports a number of threads equal to the hardware concurrency..."); check(std::thread::hardware_concurrency(), pool.get_thread_count()); logln("Checking that the manually counted number of unique thread IDs is equal to the reported number of threads..."); const std::vector unique_threads = obtain_unique_threads(pool); check(pool.get_thread_count(), unique_threads.size()); logln("Checking that the unique thread IDs obtained are the same as those reported by get_thread_ids()..."); std::vector threads_from_pool = pool.get_thread_ids(); std::sort(threads_from_pool.begin(), threads_from_pool.end()); check(threads_from_pool == unique_threads); } /** * @brief Check that reset() works. */ void check_reset() { BS::thread_pool pool; pool.reset(static_cast(std::thread::hardware_concurrency()) * 2); logln("Checking that after reset() the thread pool reports a number of threads equal to double the hardware concurrency..."); check(std::thread::hardware_concurrency() * 2, pool.get_thread_count()); logln("Checking that after reset() the manually counted number of unique thread IDs is equal to the reported number of threads..."); check(pool.get_thread_count(), obtain_unique_threads(pool).size()); pool.reset(std::thread::hardware_concurrency()); logln("Checking that after a second reset() the thread pool reports a number of threads equal to the hardware concurrency..."); check(std::thread::hardware_concurrency(), pool.get_thread_count()); logln("Checking that after a second reset() the manually counted number of unique thread IDs is equal to the reported number of threads..."); check(pool.get_thread_count(), obtain_unique_threads(pool).size()); } // ======================================= // Functions to verify submission of tasks // ======================================= /** * @brief A class used to detect when a copy or move constructor has been invoked. */ class [[nodiscard]] detect_copy_move { public: detect_copy_move() = default; detect_copy_move(const detect_copy_move& /*other*/) : copied(true) {} detect_copy_move(detect_copy_move&& /*other*/) noexcept : moved(true) {} detect_copy_move& operator=(const detect_copy_move&) = delete; detect_copy_move& operator=(detect_copy_move&&) = delete; ~detect_copy_move() = default; [[nodiscard]] bool get_copied() const { return copied; }; [[nodiscard]] bool get_moved() const { return moved; }; private: bool copied = false; bool moved = false; }; // class detect_copy_move /** * @brief Check that detach_task() or submit_task() work. * * @param which_func A string naming the function to check. */ void check_task(const std::string_view which_func) { BS::thread_pool pool; logln("Checking that ", which_func, " works for a function with no arguments or return value..."); { bool flag = false; const auto func = [&flag] { flag = true; }; if (which_func == "detach_task()") { pool.detach_task(func); pool.wait(); } else { pool.submit_task(func).wait(); } check(flag); } logln("Checking that ", which_func, " works for a function with one argument and no return value..."); { bool flag = false; const auto func = [](bool& flag_) { flag_ = true; }; if (which_func == "detach_task()") { pool.detach_task( [&func, &flag] { func(flag); }); pool.wait(); } else { pool.submit_task( [&func, &flag] { func(flag); }) .wait(); } check(flag); } logln("Checking that ", which_func, " works for a function with two arguments and no return value..."); { bool flag1 = false; bool flag2 = false; const auto func = [](bool& flag1_, bool& flag2_) { flag1_ = flag2_ = true; }; if (which_func == "detach_task()") { pool.detach_task( [&func, &flag1, &flag2] { func(flag1, flag2); }); pool.wait(); } else { pool.submit_task( [&func, &flag1, &flag2] { func(flag1, flag2); }) .wait(); } check(flag1 && flag2); } if (which_func == "submit_task()") { logln("Checking that submit_task() works for a function with no arguments and a return value..."); { bool flag = false; const auto func = [&flag] { return (flag = true); }; std::future flag_future = pool.submit_task(func); check(flag_future.get() && flag); } logln("Checking that submit_task() works for a function with one argument and a return value..."); { bool flag = false; const auto func = [](bool& flag_) { return (flag_ = true); }; std::future flag_future = pool.submit_task( [&func, &flag] { return func(flag); }); check(flag_future.get() && flag); } logln("Checking that submit_task() works for a function with two arguments and a return value..."); { bool flag1 = false; bool flag2 = false; const auto func = [](bool& flag1_, bool& flag2_) { return (flag1_ = flag2_ = true); }; std::future flag_future = pool.submit_task( [&func, &flag1, &flag2] { return func(flag1, flag2); }); check(flag_future.get() && flag1 && flag2); } } logln("Checking that ", which_func, " does not create unnecessary copies of the function object..."); { bool copied = false; bool moved = false; auto test_copy = [detect = detect_copy_move(), &copied, &moved] { copied = detect.get_copied(); moved = detect.get_moved(); }; if (which_func == "detach_task()") { pool.detach_task(std::move(test_copy)); pool.wait(); } else { pool.submit_task(std::move(test_copy)).wait(); } check(!copied && moved); } logln("Checking that ", which_func, " correctly accepts arguments passed by value, reference, and constant reference..."); { { logln("Value:"); const std::int64_t pass_me_by_value = 0; const auto func_value = [](std::int64_t passed_by_value) { if (++passed_by_value != 1) static_cast(0); }; if (which_func == "detach_task()") { pool.detach_task( [&func_value, pbv = pass_me_by_value] { func_value(pbv); }); pool.wait(); } else { pool.submit_task( [&func_value, pbv = pass_me_by_value] { func_value(pbv); }) .wait(); } check(pass_me_by_value == 0); } { logln("Reference:"); std::int64_t pass_me_by_ref = 0; const auto func_ref = [](std::int64_t& passed_by_ref) { ++passed_by_ref; }; if (which_func == "detach_task()") { pool.detach_task( [&func_ref, &pass_me_by_ref] { func_ref(pass_me_by_ref); }); pool.wait(); } else { pool.submit_task( [&func_ref, &pass_me_by_ref] { func_ref(pass_me_by_ref); }) .wait(); } check(pass_me_by_ref == 1); } { logln("Constant reference:"); std::int64_t pass_me_by_cref = 0; binary_semaphore sem(0); const auto func_cref = [&sem](const std::int64_t& passed_by_cref) { sem.acquire(); check(passed_by_cref == 1); }; if (which_func == "detach_task()") { pool.detach_task( [&func_cref, &pass_me_by_cref = std::as_const(pass_me_by_cref)] { func_cref(pass_me_by_cref); }); ++pass_me_by_cref; sem.release(); pool.wait(); } else { const std::future future = pool.submit_task( [&func_cref, &pass_me_by_cref = std::as_const(pass_me_by_cref)] { func_cref(pass_me_by_cref); }); ++pass_me_by_cref; sem.release(); future.wait(); } } } } /** * @brief A class to facilitate checking that member functions of different types have been successfully submitted. */ class [[nodiscard]] flag_class { public: explicit flag_class(BS::thread_pool<>& pool_) : pool(&pool_) {} void set_flag_no_args() { flag = true; } void set_flag_one_arg(const bool arg) { flag = arg; } [[nodiscard]] bool set_flag_no_args_return() { return (flag = true); } [[nodiscard]] bool set_flag_one_arg_return(const bool arg) { return (flag = arg); } [[nodiscard]] bool get_flag() const { return flag; } void detach_test_flag_no_args() { pool->detach_task( [this] { set_flag_no_args(); }); pool->wait(); check(get_flag()); } void detach_test_flag_one_arg() { pool->detach_task( [this] { set_flag_one_arg(true); }); pool->wait(); check(get_flag()); } void submit_test_flag_no_args() { pool->submit_task( [this] { set_flag_no_args(); }) .wait(); check(get_flag()); } void submit_test_flag_one_arg() { pool->submit_task( [this] { set_flag_one_arg(true); }) .wait(); check(get_flag()); } void submit_test_flag_no_args_return() { std::future flag_future = pool->submit_task( [this] { return set_flag_no_args_return(); }); check(flag_future.get() && get_flag()); } void submit_test_flag_one_arg_return() { std::future flag_future = pool->submit_task( [this] { return set_flag_one_arg_return(true); }); check(flag_future.get() && get_flag()); } private: bool flag = false; BS::thread_pool<>* pool; }; // class flag_class /** * @brief Check that submitting member functions works. */ void check_member_function() { BS::thread_pool pool; logln("Checking that detach_task() works for a member function with no arguments or return value..."); { flag_class flag(pool); pool.detach_task( [&flag] { flag.set_flag_no_args(); }); pool.wait(); check(flag.get_flag()); } logln("Checking that detach_task() works for a member function with one argument and no return value..."); { flag_class flag(pool); pool.detach_task( [&flag] { flag.set_flag_one_arg(true); }); pool.wait(); check(flag.get_flag()); } logln("Checking that submit_task() works for a member function with no arguments or return value..."); { flag_class flag(pool); pool.submit_task( [&flag] { flag.set_flag_no_args(); }) .wait(); check(flag.get_flag()); } logln("Checking that submit_task() works for a member function with one argument and no return value..."); { flag_class flag(pool); pool.submit_task( [&flag] { flag.set_flag_one_arg(true); }) .wait(); check(flag.get_flag()); } logln("Checking that submit_task() works for a member function with no arguments and a return value..."); { flag_class flag(pool); std::future flag_future = pool.submit_task( [&flag] { return flag.set_flag_no_args_return(); }); check(flag_future.get() && flag.get_flag()); } logln("Checking that submit_task() works for a member function with one argument and a return value..."); { flag_class flag(pool); std::future flag_future = pool.submit_task( [&flag] { return flag.set_flag_one_arg_return(true); }); check(flag_future.get() && flag.get_flag()); } } /** * @brief Check that submitting member functions within an object works. */ void check_member_function_within_object() { BS::thread_pool pool; logln("Checking that detach_task() works within an object for a member function with no arguments or return value..."); { flag_class flag(pool); flag.detach_test_flag_no_args(); } logln("Checking that detach_task() works within an object for a member function with one argument and no return value..."); { flag_class flag(pool); flag.detach_test_flag_one_arg(); } logln("Checking that submit_task() works within an object for a member function with no arguments or return value..."); { flag_class flag(pool); flag.submit_test_flag_no_args(); } logln("Checking that submit_task() works within an object for a member function with one argument and no return value..."); { flag_class flag(pool); flag.submit_test_flag_one_arg(); } logln("Checking that submit_task() works within an object for a member function with no arguments and a return value..."); { flag_class flag(pool); flag.submit_test_flag_no_args_return(); } logln("Checking that submit_task() works within an object for a member function with one argument and a return value..."); { flag_class flag(pool); flag.submit_test_flag_one_arg_return(); } } std::atomic check_callables_flag = false; void normal_func() { check_callables_flag = true; } struct function_object { void operator()() { check_callables_flag = true; } }; struct has_member_function { static void member_function() { check_callables_flag = true; } }; /** * @brief Check that different callable types are accepted by the thread pool. */ void check_callables() { BS::thread_pool pool; logln("Checking normal function..."); pool.submit_task(normal_func).wait(); check(check_callables_flag); logln("Checking function pointer..."); check_callables_flag = false; void (*const func_ptr)() = normal_func; // NOLINT(misc-const-correctness) pool.submit_task(func_ptr).wait(); check(check_callables_flag); logln("Checking pointer to static member function..."); check_callables_flag = false; auto member_func_ptr = has_member_function::member_function; pool.submit_task(member_func_ptr).wait(); check(check_callables_flag); logln("Checking lambda expression..."); check_callables_flag = false; const auto lambda = [] { check_callables_flag = true; }; pool.submit_task(lambda).wait(); check(check_callables_flag); logln("Checking std::function..."); check_callables_flag = false; const std::function function = [] { check_callables_flag = true; }; pool.submit_task(function).wait(); check(check_callables_flag); #ifdef __cpp_lib_move_only_function logln("Checking std::move_only_function..."); check_callables_flag = false; std::move_only_function move_only_function = [] { check_callables_flag = true; }; pool.submit_task(std::move(move_only_function)).wait(); check(check_callables_flag); #else logln_ansi(ansi_info, "Note: std::move_only_function not available, skipping the corresponding test."); #endif logln("Checking std::bind..."); check_callables_flag = false; const auto lambda_for_bind = [](std::atomic& flag) { flag = true; }; pool.submit_task(std::bind(lambda_for_bind, std::ref(check_callables_flag))).wait(); check(check_callables_flag); logln("Checking function object..."); check_callables_flag = false; const function_object func_obj_instance; pool.submit_task(func_obj_instance).wait(); check(check_callables_flag); } // ===================================== // Functions to verify waiting for tasks // ===================================== /** * @brief Check that wait() works. */ void check_wait() { constexpr std::chrono::milliseconds sleep_time(10); BS::thread_pool pool; const std::size_t num_tasks = pool.get_thread_count() * 10; std::vector> flags(num_tasks); for (std::size_t i = 0; i < num_tasks; ++i) { pool.detach_task( [&flags, i, sleep_time] { std::this_thread::sleep_for(sleep_time); flags[i] = true; }); } logln("Waiting for tasks..."); pool.wait(); check(all_flags_set(flags)); } /** * @brief Check that wait() correctly blocks all external threads that call it. */ void check_wait_blocks() { constexpr std::chrono::milliseconds sleep_time(100); constexpr std::size_t num_waiting_tasks = 4; BS::thread_pool pool; binary_semaphore sem(0); logln("Checking that wait() correctly blocks all external threads that call it..."); pool.detach_task( [&sem] { logln("Task submitted to pool 1 and waiting to be released..."); sem.acquire(); logln("Task released."); }); BS::thread_pool temp_pool(num_waiting_tasks); std::vector> flags(num_waiting_tasks); const auto waiting_task = [&flags, &pool](const std::size_t task_num) { logln("Task ", task_num, " submitted to pool 2 and waiting for pool 1's task to finish..."); pool.wait(); logln("Task ", task_num, " finished waiting."); flags[task_num] = true; }; for (std::size_t i = 0; i < num_waiting_tasks; ++i) { temp_pool.detach_task( [&waiting_task, i] { waiting_task(i); }); } std::this_thread::sleep_for(sleep_time); check(no_flags_set(flags)); sem.release(); temp_pool.wait(); check(all_flags_set(flags)); } /** * @brief Check that wait_for() works. */ void check_wait_for() { constexpr std::chrono::milliseconds long_sleep_time(250); constexpr std::chrono::milliseconds short_sleep_time(10); BS::thread_pool pool; logln("Checking that wait_for() works..."); std::atomic done = false; pool.detach_task( [&done, long_sleep_time] { std::this_thread::sleep_for(long_sleep_time); done = true; }); logln("Task that lasts ", long_sleep_time.count(), "ms submitted. Waiting for ", short_sleep_time.count(), "ms..."); pool.wait_for(short_sleep_time); check(!done); logln("Waiting for ", long_sleep_time.count() * 2, "ms..."); pool.wait_for(long_sleep_time * 2); check(done); } /** * @brief Check that wait_until() works. */ void check_wait_until() { constexpr std::chrono::milliseconds long_sleep_time(250); constexpr std::chrono::milliseconds short_sleep_time(10); BS::thread_pool pool; logln("Checking that wait_until() works..."); std::atomic done = false; pool.detach_task( [&done, long_sleep_time] { std::this_thread::sleep_for(long_sleep_time); done = true; }); const std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); logln("Task that lasts ", long_sleep_time.count(), "ms submitted. Waiting until ", short_sleep_time.count(), "ms from submission time..."); pool.wait_until(now + short_sleep_time); check(!done); logln("Waiting until ", long_sleep_time.count() * 2, "ms from submission time..."); pool.wait_until(now + long_sleep_time * 2); check(done); } // An auxiliary thread pool used by check_wait_multiple_deadlock(). It's a global variable so that the program will not get stuck upon destruction of this pool if a deadlock actually occurs. BS::thread_pool check_wait_multiple_deadlock_pool; /** * @brief Check that calling wait() more than once doesn't create a deadlock. */ void check_wait_multiple_deadlock() { constexpr std::chrono::milliseconds sleep_time(500); constexpr std::size_t n_waiting_tasks = 1000; logln("Checking for deadlocks when waiting for tasks..."); BS::thread_pool pool(1); pool.detach_task( [sleep_time] { std::this_thread::sleep_for(sleep_time); }); std::atomic count = 0; for (std::size_t j = 0; j < n_waiting_tasks; ++j) { check_wait_multiple_deadlock_pool.detach_task( [&pool, &count] { pool.wait(); ++count; }); } bool passed = false; while (true) { const std::size_t old_count = count; check_wait_multiple_deadlock_pool.wait_for(sleep_time * 2); if (count == n_waiting_tasks) { logln_ansi(ansi_success, "All waiting tasks successfully finished!"); passed = true; break; } if (count == old_count) { logln_ansi(ansi_error, "Error: deadlock detected!"); passed = false; break; } logln(count, " tasks out of ", n_waiting_tasks, " finished waiting..."); } check(passed); } #ifdef __cpp_exceptions // An auxiliary thread pool used by check_wait_self_deadlock(). It's a global variable so that the program will not get stuck upon destruction of this pool if a deadlock actually occurs. BS::wdc_thread_pool check_wait_self_deadlock_pool; /** * @brief Check that calling wait() from within a thread of the same pool throws an exception instead of creating a deadlock. */ void check_wait_self_deadlock() { constexpr std::chrono::milliseconds sleep_time(100); logln("Checking for deadlocks when waiting from within a thread of the same pool..."); std::atomic passed = false; check_wait_self_deadlock_pool.detach_task( [&passed] { try { check_wait_self_deadlock_pool.wait(); } catch (const BS::wait_deadlock&) { passed = true; } }); check_wait_self_deadlock_pool.wait_for(sleep_time); check(passed); } #endif // ======================================== // Functions to verify loop parallelization // ======================================== /** * @brief Check that detach_loop() or submit_loop() work for a specific range of indices split over a specific number of tasks, with no return value. * * @param pool The thread pool to check. * @param random_start The first index in the loop. * @param random_end The last index in the loop plus 1. * @param num_tasks The number of tasks. * @param which_func A string naming the function to check. * @return `true` if the check succeeded, `false` otherwise. */ bool check_loop_no_return(BS::thread_pool<>& pool, const std::int64_t random_start, const std::int64_t random_end, const std::size_t num_tasks, const std::string_view which_func) { logln("Verifying that ", which_func, " from ", random_start, " to ", random_end, " with ", num_tasks, num_tasks == 1 ? " task" : " tasks", " modifies all indices exactly once..."); const std::size_t num_indices = static_cast(random_end - random_start); std::vector> flags(num_indices); std::atomic indices_out_of_range = false; const auto loop = [&flags, random_start, random_end, &indices_out_of_range](const std::int64_t idx) { if (idx < random_start || idx >= random_end) indices_out_of_range = true; else ++flags[static_cast(idx - random_start)]; }; if (which_func == "detach_loop()") { pool.detach_loop(random_start, random_end, loop, num_tasks); pool.wait(); } else { pool.submit_loop(random_start, random_end, loop, num_tasks).wait(); } if (indices_out_of_range) { logln_ansi(ansi_error, "Error: Loop indices out of range!"); return false; } return all_flags_equal(flags, 1); } /** * @brief Check that detach_loop() and submit_loop() work using several different random values for the range of indices and number of tasks. */ void check_loop() { constexpr std::int64_t range = 1000000; constexpr std::size_t repeats = 10; BS::thread_pool pool; for (std::size_t i = 0; i < repeats; ++i) { const std::pair indices = random_pair(-range, range); check(check_loop_no_return(pool, indices.first, indices.second, random(1, pool.get_thread_count()), "detach_loop()")); } for (std::size_t i = 0; i < repeats; ++i) { const std::pair indices = random_pair(-range, range); check(check_loop_no_return(pool, indices.first, indices.second, random(1, pool.get_thread_count()), "submit_loop()")); } logln("Verifying that detach_loop() with identical start and end indices does nothing..."); { std::atomic count = 0; const std::int64_t index = random(-range, range); logln("Range: ", index, " to ", index); pool.detach_loop(index, index, [&count](const std::int64_t) { ++count; }); pool.wait(); check(count == 0); } logln("Verifying that submit_loop() with identical start and end indices does nothing..."); { std::atomic count = 0; const std::int64_t index = random(-range, range); logln("Range: ", index, " to ", index); pool.submit_loop(index, index, [&count](const std::int64_t) { ++count; }) .wait(); check(count == 0); } logln("Verifying that detach_loop() with end index smaller than the start index does nothing..."); { std::atomic count = 0; const std::pair indices = random_pair(-range, range); logln("Range: ", indices.second, " to ", indices.first); pool.detach_loop(indices.second, indices.first, [&count](const std::int64_t) { ++count; }); pool.wait(); check(count == 0); } logln("Verifying that submit_loop() with end index smaller than the start index does nothing..."); { std::atomic count = 0; const std::pair indices = random_pair(-range, range); logln("Range: ", indices.second, " to ", indices.first); pool.submit_loop(indices.second, indices.first, [&count](const std::int64_t) { ++count; }) .wait(); check(count == 0); } logln("Trying detach_loop() with a number of tasks larger than the number of indices:"); { const std::int64_t start = random(-range, range); check(check_loop_no_return(pool, start, start + random(0, static_cast(pool.get_thread_count() * 2)), random(pool.get_thread_count() * 2, pool.get_thread_count() * 4), "detach_loop()")); } logln("Trying submit_loop() with a number of tasks larger than the number of indices:"); { const std::int64_t start = random(-range, range); check(check_loop_no_return(pool, start, start + random(0, static_cast(pool.get_thread_count() * 2)), random(pool.get_thread_count() * 2, pool.get_thread_count() * 4), "submit_loop()")); } } /** * @brief Check that detach_blocks() or submit_blocks() work for a specific range of indices split over a specific number of tasks, with no return value. * * @param pool The thread pool to check. * @param random_start The first index in the loop. * @param random_end The last index in the loop plus 1. * @param num_tasks The number of tasks. * @param which_func A string naming the function to check. * @return `true` if the check succeeded, `false` otherwise. */ bool check_blocks_no_return(BS::thread_pool<>& pool, const std::int64_t random_start, const std::int64_t random_end, const std::size_t num_tasks, const std::string_view which_func) { logln("Verifying that ", which_func, " from ", random_start, " to ", random_end, " with ", num_tasks, num_tasks == 1 ? " task" : " tasks", " modifies all indices exactly once..."); const std::size_t num_indices = static_cast(random_end - random_start); std::vector> flags(num_indices); std::atomic indices_out_of_range = false; const auto loop = [&flags, random_start, random_end, &indices_out_of_range](const std::int64_t start, const std::int64_t end) { if (start < random_start || end > random_end) { indices_out_of_range = true; } else { for (std::int64_t i = start; i < end; ++i) ++flags[static_cast(i - random_start)]; } }; if (which_func == "detach_blocks()") { pool.detach_blocks(random_start, random_end, loop, num_tasks); pool.wait(); } else { pool.submit_blocks(random_start, random_end, loop, num_tasks).wait(); } if (indices_out_of_range) { logln_ansi(ansi_error, "Error: Block indices out of range!"); return false; } return all_flags_equal(flags, 1); } /** * @brief Check that submit_blocks() works for a specific range of indices split over a specific number of tasks, with a return value. * * @param pool The thread pool to check. * @param random_start The first index in the loop. * @param random_end The last index in the loop plus 1. * @param num_tasks The number of tasks. */ void check_blocks_return(BS::thread_pool<>& pool, const std::int64_t random_start, const std::int64_t random_end, const std::size_t num_tasks) { logln("Verifying that submit_blocks() from ", random_start, " to ", random_end, " with ", num_tasks, num_tasks == 1 ? " task" : " tasks", " correctly sums all indices..."); const auto loop = [](const std::int64_t start, const std::int64_t end) { std::int64_t total = 0; for (std::int64_t i = start; i < end; ++i) total += i; return total; }; const std::vector sums_vector = pool.submit_blocks(random_start, random_end, loop, num_tasks).get(); std::int64_t sum = 0; for (const std::int64_t partial_sum : sums_vector) sum += partial_sum; check(std::abs(random_start - random_end) * (random_start + random_end - 1) / 2, sum); } /** * @brief Check that detach_blocks() and submit_blocks() work using several different random values for the range of indices and number of tasks. */ void check_blocks() { constexpr std::int64_t range = 1000000; constexpr std::size_t repeats = 10; BS::thread_pool pool; for (std::size_t i = 0; i < repeats; ++i) { const std::pair indices = random_pair(-range, range); check(check_blocks_no_return(pool, indices.first, indices.second, random(1, pool.get_thread_count()), "detach_blocks()")); } for (std::size_t i = 0; i < repeats; ++i) { const std::pair indices = random_pair(-range, range); check(check_blocks_no_return(pool, indices.first, indices.second, random(1, pool.get_thread_count()), "submit_blocks()")); } for (std::size_t i = 0; i < repeats; ++i) { const std::pair indices = random_pair(-range, range); check_blocks_return(pool, indices.first, indices.second, random(1, pool.get_thread_count())); } logln("Verifying that detach_blocks() with identical start and end indices does nothing..."); { std::atomic count = 0; const std::int64_t index = random(-range, range); logln("Range: ", index, " to ", index); pool.detach_blocks(index, index, [&count](const std::int64_t, const std::int64_t) { ++count; }); pool.wait(); check(count == 0); } logln("Verifying that submit_blocks() with identical start and end indices does nothing..."); { std::atomic count = 0; const std::int64_t index = random(-range, range); logln("Range: ", index, " to ", index); pool.submit_blocks(index, index, [&count](const std::int64_t, const std::int64_t) { ++count; }) .wait(); check(count == 0); } logln("Verifying that detach_blocks() with end index smaller than the start index does nothing..."); { std::atomic count = 0; const std::pair indices = random_pair(-range, range); logln("Range: ", indices.second, " to ", indices.first); pool.detach_blocks(indices.second, indices.first, [&count](const std::int64_t, const std::int64_t) { ++count; }); pool.wait(); check(count == 0); } logln("Verifying that submit_blocks() with end index smaller than the start index does nothing..."); { std::atomic count = 0; const std::pair indices = random_pair(-range, range); logln("Range: ", indices.second, " to ", indices.first); pool.submit_blocks(indices.second, indices.first, [&count](const std::int64_t, const std::int64_t) { ++count; }) .wait(); check(count == 0); } logln("Trying detach_blocks() with a number of tasks larger than the number of indices:"); { const std::int64_t start = random(-range, range); check(check_blocks_no_return(pool, start, start + random(0, static_cast(pool.get_thread_count() * 2)), random(pool.get_thread_count() * 2, pool.get_thread_count() * 4), "detach_blocks()")); } logln("Trying submit_blocks() with a number of tasks larger than the number of indices:"); { const std::int64_t start = random(-range, range); check(check_blocks_no_return(pool, start, start + random(0, static_cast(pool.get_thread_count() * 2)), random(pool.get_thread_count() * 2, pool.get_thread_count() * 4), "submit_blocks()")); } } // ============================================ // Functions to verify sequence parallelization // ============================================ /** * @brief Check that detach_sequence() or submit_sequence() work for a specific range of indices, with no return value. * * @param pool The thread pool to check. * @param random_start The first index in the sequence. * @param random_end The last index in the sequence plus 1. * @param which_func A string naming the function to check. * @return `true` if the check succeeded, `false` otherwise. */ bool check_sequence_no_return(BS::thread_pool<>& pool, const std::int64_t random_start, const std::int64_t random_end, const std::string_view which_func) { logln("Verifying that ", which_func, " from ", random_start, " to ", random_end, " modifies all indices exactly once..."); const std::size_t num_indices = static_cast(random_end - random_start); std::vector> flags(num_indices); std::atomic indices_out_of_range = false; const auto sequence = [&flags, random_start, random_end, &indices_out_of_range](const std::int64_t index) { if (index < random_start || index >= random_end) indices_out_of_range = true; else ++flags[static_cast(index - random_start)]; }; if (which_func == "detach_sequence()") { pool.detach_sequence(random_start, random_end, sequence); pool.wait(); } else { pool.submit_sequence(random_start, random_end, sequence).wait(); } if (indices_out_of_range) { logln_ansi(ansi_error, "Error: Sequence indices out of range!"); return false; } return all_flags_equal(flags, 1); } /** * @brief Check that submit_sequence() works for a specific range of indices, with a return value. * * @param pool The thread pool to check. * @param random_start The first index in the sequence. * @param random_end The last index in the sequence plus 1. */ void check_sequence_return(BS::thread_pool<>& pool, const std::int64_t random_start, const std::int64_t random_end) { logln("Verifying that submit_sequence() from ", random_start, " to ", random_end, " correctly sums all squares of indices..."); const auto sequence = [](const std::int64_t index) { return index * index; }; const std::vector sums_vector = pool.submit_sequence(random_start, random_end, sequence).get(); std::int64_t sum = 0; for (const std::int64_t partial_sum : sums_vector) sum += partial_sum; std::int64_t correct_sum = 0; for (std::int64_t i = random_start; i < random_end; i++) correct_sum += i * i; check(correct_sum, sum); } /** * @brief Check that detach_sequence() and submit_sequence() work using several different random values for the range of indices. */ void check_sequence() { constexpr std::int64_t range = 1000; constexpr std::size_t repeats = 10; BS::thread_pool pool; for (std::size_t i = 0; i < repeats; ++i) { const std::pair indices = random_pair(-range, range); check(check_sequence_no_return(pool, indices.first, indices.second, "detach_sequence()")); } for (std::size_t i = 0; i < repeats; ++i) { const std::pair indices = random_pair(-range, range); check(check_sequence_no_return(pool, indices.first, indices.second, "submit_sequence()")); } for (std::size_t i = 0; i < repeats; ++i) { const std::pair indices = random_pair(-range, range); check_sequence_return(pool, indices.first, indices.second); } logln("Verifying that detach_sequence() with identical start and end indices does nothing..."); { std::atomic count = 0; const std::int64_t index = random(-range, range); logln("Range: ", index, " to ", index); pool.detach_sequence(index, index, [&count](const std::int64_t) { ++count; }); pool.wait(); check(count == 0); } logln("Verifying that submit_sequence() with identical start and end indices does nothing..."); { std::atomic count = 0; const std::int64_t index = random(-range, range); logln("Range: ", index, " to ", index); pool.submit_sequence(index, index, [&count](const std::int64_t) { ++count; }) .wait(); check(count == 0); } logln("Verifying that detach_sequence() with end index smaller than the start index does nothing..."); { std::atomic count = 0; const std::pair indices = random_pair(-range, range); logln("Range: ", indices.second, " to ", indices.first); pool.detach_sequence(indices.second, indices.first, [&count](const std::int64_t) { ++count; }); pool.wait(); check(count == 0); } logln("Verifying that submit_sequence() with end index smaller than the start index does nothing..."); { std::atomic count = 0; const std::pair indices = random_pair(-range, range); logln("Range: ", indices.second, " to ", indices.first); pool.submit_sequence(indices.second, indices.first, [&count](const std::int64_t) { ++count; }) .wait(); check(count == 0); } } // =============================================== // Functions to verify task monitoring and control // =============================================== /** * @brief Check that task monitoring works. */ void check_task_monitoring() { constexpr std::chrono::milliseconds sleep_time(300); const std::size_t num_threads = std::min(std::thread::hardware_concurrency(), 4); logln("Creating pool with ", num_threads, " threads."); BS::thread_pool pool(num_threads); logln("Submitting ", num_threads * 3, " tasks."); counting_semaphore sem(0); for (std::size_t i = 0; i < num_threads * 3; ++i) { pool.detach_task( [i, &sem] { sem.acquire(); logln("Task ", i, " released."); }); } std::this_thread::sleep_for(sleep_time); logln("After submission, should have: ", num_threads * 3, " tasks total, ", num_threads, " tasks running, ", num_threads * 2, " tasks queued..."); log("Result: ", pool.get_tasks_total(), " tasks total, ", pool.get_tasks_running(), " tasks running, ", pool.get_tasks_queued(), " tasks queued "); check(pool.get_tasks_total() == num_threads * 3 && pool.get_tasks_running() == num_threads && pool.get_tasks_queued() == num_threads * 2); sem.release(static_cast(num_threads)); std::this_thread::sleep_for(sleep_time); logln("After releasing ", num_threads, " tasks, should have: ", num_threads * 2, " tasks total, ", num_threads, " tasks running, ", num_threads, " tasks queued..."); log("Result: ", pool.get_tasks_total(), " tasks total, ", pool.get_tasks_running(), " tasks running, ", pool.get_tasks_queued(), " tasks queued "); check(pool.get_tasks_total() == num_threads * 2 && pool.get_tasks_running() == num_threads && pool.get_tasks_queued() == num_threads); sem.release(static_cast(num_threads)); std::this_thread::sleep_for(sleep_time); logln("After releasing ", num_threads, " more tasks, should have: ", num_threads, " tasks total, ", num_threads, " tasks running, ", 0, " tasks queued..."); log("Result: ", pool.get_tasks_total(), " tasks total, ", pool.get_tasks_running(), " tasks running, ", pool.get_tasks_queued(), " tasks queued "); check(pool.get_tasks_total() == num_threads && pool.get_tasks_running() == num_threads && pool.get_tasks_queued() == 0); sem.release(static_cast(num_threads)); std::this_thread::sleep_for(sleep_time); logln("After releasing the final ", num_threads, " tasks, should have: ", 0, " tasks total, ", 0, " tasks running, ", 0, " tasks queued..."); log("Result: ", pool.get_tasks_total(), " tasks total, ", pool.get_tasks_running(), " tasks running, ", pool.get_tasks_queued(), " tasks queued "); check(pool.get_tasks_total() == 0 && pool.get_tasks_running() == 0 && pool.get_tasks_queued() == 0); } /** * @brief Check that pausing works. */ void check_pausing() { constexpr std::chrono::milliseconds sleep_time(200); BS::pause_thread_pool pool; logln("Checking that the pool correctly reports that it is not paused after construction..."); check(!pool.is_paused()); logln("Pausing pool."); pool.pause(); logln("Checking that the pool correctly reports that it is paused..."); check(pool.is_paused()); logln("Submitting task and waiting."); std::atomic flag = false; pool.detach_task( [&flag] { flag = true; logln("Task executed."); }); std::this_thread::sleep_for(sleep_time); logln("Verifying that the task has not been executed..."); check(!flag); logln("Unpausing pool and waiting."); pool.unpause(); std::this_thread::sleep_for(sleep_time); logln("Verifying that the task has been executed..."); check(flag); logln("Checking that the pool correctly reports that it is not paused..."); check(!pool.is_paused()); } /** * @brief Check that purge() works. */ void check_purge() { constexpr std::chrono::milliseconds long_sleep_time(200); constexpr std::chrono::milliseconds short_sleep_time(100); constexpr std::size_t num_tasks = 10; BS::thread_pool pool(1); logln("Submitting ", num_tasks, " tasks to the pool."); std::vector> flags(num_tasks); for (std::size_t i = 0; i < num_tasks; ++i) { pool.detach_task( [&flags, i, long_sleep_time] { std::this_thread::sleep_for(long_sleep_time); logln("Task ", i, " done."); flags[i] = true; }); } std::this_thread::sleep_for(short_sleep_time); logln("Purging the pool and waiting for tasks..."); pool.purge(); pool.wait(); logln("Checking that only the first task was executed..."); flags[0] = !flags[0]; check(no_flags_set(flags)); } #ifdef __cpp_exceptions // ====================================== // Functions to verify exception handling // ====================================== /** * @brief An exception class to be thrown when testing exception handling. */ struct test_exception : public std::runtime_error { test_exception() : std::runtime_error("Exception thrown!") {}; }; /** * @brief A function that throws a `test_exception`. */ void throws() { logln("Throwing exception..."); throw test_exception(); }; /** * @brief Check that exceptions are forwarded correctly by submit_task(). */ void check_exceptions_submit() { BS::thread_pool pool; logln("Checking that exceptions are forwarded correctly by submit_task()..."); bool caught = false; std::future future = pool.submit_task(throws); try { future.get(); } catch (const test_exception&) { caught = true; } check(caught); } /** * @brief Check that exceptions are forwarded correctly by `BS::multi_future`. */ void check_exceptions_multi_future() { BS::thread_pool pool; logln("Checking that exceptions are forwarded correctly by BS::multi_future..."); bool caught = false; BS::multi_future future; future.push_back(pool.submit_task(throws)); future.push_back(pool.submit_task(throws)); try { future.get(); } catch (const test_exception&) { caught = true; } check(caught); } #endif // ===================================== // Functions to verify vector operations // ===================================== /** * @brief Check that parallelized vector operations work as expected by calculating the sum of two randomized vectors of a specific size in two ways, single-threaded and multithreaded, and comparing the results. * * @param pool The thread pool to check. * @param vector_size The size of the vectors. * @param num_tasks The number of tasks to split the calculation into. * @return `true` if the single-threaded and multithreaded results are equal, `false` otherwise. */ bool check_vector_of_size(BS::thread_pool<>& pool, const std::size_t vector_size, const std::size_t num_tasks) { constexpr std::int64_t value_range = 1000000; std::vector vector_1(vector_size); std::vector vector_2(vector_size); for (std::size_t i = 0; i < vector_size; ++i) { vector_1[i] = random(-value_range, value_range); vector_2[i] = random(-value_range, value_range); } logln("Adding two vectors with ", vector_size, " elements using ", num_tasks, " tasks..."); std::vector sum_single(vector_size); for (std::size_t i = 0; i < vector_size; ++i) sum_single[i] = vector_1[i] + vector_2[i]; std::vector sum_multi(vector_size); pool.submit_blocks( 0, vector_size, [&sum_multi, &vector_1, &vector_2](const std::size_t start, const std::size_t end) { for (std::size_t i = start; i < end; ++i) sum_multi[i] = vector_1[i] + vector_2[i]; }, num_tasks) .wait(); for (std::size_t i = 0; i < vector_size; ++i) { if (sum_single[i] != sum_multi[i]) return false; } return true; } /** * @brief Check that parallelized vector operations work as expected. */ void check_vectors() { constexpr std::size_t size_range = 1000000; constexpr std::size_t repeats = 10; BS::thread_pool pool; for (std::size_t i = 0; i < repeats; ++i) check(check_vector_of_size(pool, random(0, size_range), random(1, pool.get_thread_count()))); } // ================================= // Functions to verify task priority // ================================= // Priorities are 8-bit integers, but `std::uniform_int_distribution` needs at least a 16-bit integer. using rand_priority_t = std::int16_t; /** * @brief Check that task priority works as expected with all task submission methods. */ void check_priority() { constexpr std::chrono::milliseconds sleep_time(200); constexpr std::size_t num_tasks = 10; // Set the pool to have only 1 thread, so it can only run 1 task at a time. This will ensure the tasks will be executed in priority order. BS::thread_pool pool(1); pool.pause(); // Create a shuffled list of priorities. std::vector priorities; priorities.reserve(num_tasks - 1); for (std::size_t i = 0; i < num_tasks - 1; ++i) priorities.push_back(static_cast((i % 2 == 0) ? random(0, BS::pr::highest) : random(BS::pr::lowest, 0))); priorities.push_back(BS::pr::lowest); priorities.push_back(0); priorities.push_back(BS::pr::highest); std::shuffle(priorities.begin(), priorities.end(), std::mt19937_64(std::random_device()())); // Submit tasks using various methods in random priority order. std::vector execution_order; std::mutex exec_mutex; const auto execute_task_priority = [&execution_order, &exec_mutex](const BS::priority_t priority) { const std::scoped_lock lock(exec_mutex); logln("Task with priority ", static_cast(priority), " executed."); execution_order.push_back(priority); }; const std::vector functions = {"detach_task", "submit_task", "detach_sequence", "submit_sequence", "detach_loop", "submit_loop", "detach_blocks", "submit_blocks"}; for (const BS::priority_t priority : priorities) { const std::string_view func = functions[random(0, functions.size() - 1)]; logln("Launching ", func, "() with priority ", static_cast(priority), "..."); if (func == "detach_task") { pool.detach_task( [priority, &execute_task_priority] { execute_task_priority(priority); }, priority); } else if (func == "submit_task") { std::ignore = pool.submit_task( [priority, &execute_task_priority] { execute_task_priority(priority); }, priority); } else if (func == "detach_sequence") { pool.detach_sequence( 0, 1, [priority, &execute_task_priority](std::int64_t) { execute_task_priority(priority); }, priority); } else if (func == "submit_sequence") { std::ignore = pool.submit_sequence( 0, 1, [priority, &execute_task_priority](std::int64_t) { execute_task_priority(priority); }, priority); } else if (func == "detach_loop") { pool.detach_loop( 0, 1, [priority, &execute_task_priority](std::int64_t) { execute_task_priority(priority); }, 0, priority); } else if (func == "submit_loop") { std::ignore = pool.submit_loop( 0, 1, [priority, &execute_task_priority](std::int64_t) { execute_task_priority(priority); }, 0, priority); } else if (func == "detach_blocks") { pool.detach_blocks( 0, 1, [priority, &execute_task_priority](std::int64_t, std::int64_t) { execute_task_priority(priority); }, 0, priority); } else if (func == "submit_blocks") { std::ignore = pool.submit_blocks( 0, 1, [priority, &execute_task_priority](std::int64_t, std::int64_t) { execute_task_priority(priority); }, 0, priority); } } // Unpause the pool so the tasks can be executed, then check that they were executed in the correct order. logln("Checking execution order..."); std::this_thread::sleep_for(sleep_time); pool.unpause(); pool.wait(); std::sort(priorities.rbegin(), priorities.rend()); check(execution_order == priorities); } // ======================================================================= // Functions to verify thread initialization, cleanup, and BS::this_thread // ======================================================================= /** * @brief Check that thread initialization functions and get_index() work. */ void check_init() { logln("Comparing thread indices reported by get_index() using an initialization function passed to reset():"); std::vector> thread_indices(std::thread::hardware_concurrency()); std::atomic correct = true; BS::thread_pool pool; pool.reset( [&thread_indices, &correct](std::size_t idx) { const std::optional reported_idx = BS::this_thread::get_index(); if (reported_idx.has_value()) thread_indices[idx] = reported_idx.value(); else correct = false; }); pool.wait(); logln("Checking that all reported indices have values..."); check(correct); correct = true; for (std::size_t i = 0; i < thread_indices.size(); ++i) { if (thread_indices[i] != i) { correct = false; break; } } logln("Checking that all reported indices are correct..."); check(correct); logln("Verifying that the index of the main thread has no value..."); const std::optional main_idx = BS::this_thread::get_index(); check(!main_idx.has_value()); logln("Verifying that the index of an independent thread has no value..."); std::thread test_thread( [] { const std::optional ind_idx = BS::this_thread::get_index(); check(!ind_idx.has_value()); }); test_thread.join(); } /** * @brief Check that thread cleanup functions work. */ void check_cleanup() { logln("Comparing thread indices reported by get_index() using a cleanup function passed to set_cleanup_func():"); std::vector> thread_indices(std::thread::hardware_concurrency()); std::atomic correct = true; { BS::thread_pool pool; pool.set_cleanup_func( [&thread_indices, &correct](std::size_t idx) { const std::optional reported_idx = BS::this_thread::get_index(); if (reported_idx.has_value()) thread_indices[idx] = reported_idx.value(); else correct = false; }); } logln("Checking that all reported indices have values..."); check(correct); correct = true; for (std::size_t i = 0; i < thread_indices.size(); ++i) { if (thread_indices[i] != i) { correct = false; break; } } logln("Checking that all reported indices are correct..."); check(correct); } /** * @brief Check that get_pool() works. */ void check_get_pool() { logln("Checking that all threads report the correct pool..."); std::vector> thread_pool_ptrs1(std::thread::hardware_concurrency()); std::vector> thread_pool_ptrs2(std::thread::hardware_concurrency()); const auto store_pointers = [](std::vector>& ptrs) { const auto ptr = BS::this_thread::get_pool(); if (ptr.has_value()) ptrs[*BS::this_thread::get_index()] = *ptr; else check(false); }; BS::thread_pool pool1( [&thread_pool_ptrs1, &store_pointers] { store_pointers(thread_pool_ptrs1); }); BS::thread_pool pool2( [&thread_pool_ptrs2, &store_pointers] { store_pointers(thread_pool_ptrs2); }); pool1.wait(); pool2.wait(); const auto check_pointers = [](const std::vector>& ptrs, const BS::thread_pool<>& pool) { check(all_flags_equal(ptrs, (void*)&pool)); }; check_pointers(thread_pool_ptrs1, pool1); check_pointers(thread_pool_ptrs2, pool2); { logln("Verifying that the pool pointer of the main thread has no value..."); const auto ptr = BS::this_thread::get_pool(); check(!ptr.has_value()); } { logln("Verifying that the pool pointer of an independent thread has no value..."); std::thread test_thread( [] { const auto ptr = BS::this_thread::get_pool(); check(!ptr.has_value()); }); test_thread.join(); } } // ========================================================= // Functions to verify proper handling of parallelized tasks // ========================================================= /** * @brief A class used to count how many times the copy and move constructors have been invoked since the creation of the initial object. */ class [[nodiscard]] count_copy_move { public: count_copy_move(std::atomic* copied_, std::atomic* moved_) : copied(copied_), moved(moved_) {} count_copy_move(const count_copy_move& other) : copied(other.copied), moved(other.moved) { ++(*copied); } count_copy_move(count_copy_move&& other) noexcept : copied(other.copied), moved(other.moved) { ++(*moved); } count_copy_move& operator=(const count_copy_move&) = delete; count_copy_move& operator=(count_copy_move&&) = delete; ~count_copy_move() = default; private: std::atomic* copied = nullptr; std::atomic* moved = nullptr; }; // class count_copy_move /** * @brief Check, for a specific member function which parallelizes loops or sequences of tasks, that the callable object does not get copied in the process. * * @param which_func A string naming the function to check. */ void check_copy(const std::string_view which_func) { BS::thread_pool pool; const std::size_t num_tasks = pool.get_thread_count() * 10; logln("Checking ", which_func, "..."); std::atomic copied = 0; std::atomic moved = 0; auto task = [detect = count_copy_move(&copied, &moved)](auto&&...) {}; if (which_func == "detach_blocks()") pool.detach_blocks(0, num_tasks, std::move(task), num_tasks); else if (which_func == "detach_loop()") pool.detach_loop(0, num_tasks, std::move(task)); else if (which_func == "detach_sequence()") pool.detach_sequence(0, num_tasks, std::move(task)); else if (which_func == "submit_blocks()") std::ignore = pool.submit_blocks(0, num_tasks, std::move(task), num_tasks); else if (which_func == "submit_loop()") std::ignore = pool.submit_loop(0, num_tasks, std::move(task)); else if (which_func == "submit_sequence()") std::ignore = pool.submit_sequence(0, num_tasks, std::move(task)); pool.wait(); logln("Copy count: "); check(0, copied.load()); // Note: Move count will be unpredictable if priority is on, so we don't check it. } /** * @brief Check, for all member functions which parallelize loops or sequences of tasks, that the callable object does not get copied in the process. */ void check_copy_all() { check_copy("detach_blocks()"); check_copy("detach_loop()"); check_copy("detach_sequence()"); check_copy("submit_blocks()"); check_copy("submit_loop()"); check_copy("submit_sequence()"); } /** * @brief A class used to detect if an object was destructed prematurely. */ class detect_destruct { public: explicit detect_destruct(std::atomic* object_exists_) : object_exists(object_exists_) { *object_exists = true; }; detect_destruct(const detect_destruct&) = delete; detect_destruct(detect_destruct&&) noexcept = delete; detect_destruct& operator=(const detect_destruct&) = delete; detect_destruct& operator=(detect_destruct&&) = delete; ~detect_destruct() { *object_exists = false; }; private: std::atomic* object_exists = nullptr; }; /** * @brief Check, for a specific member function which parallelizes loops or sequences of tasks, that if a task that captures a shared pointer is submitted, the pointer is correctly shared between all the iterations of the task. * * @param which_func A string naming the function to check. */ void check_shared_ptr(const std::string_view which_func) { BS::thread_pool pool; constexpr std::chrono::milliseconds sleep_time(10); const std::size_t num_tasks = pool.get_thread_count() * 10; std::atomic object_exists = false; std::atomic uses_before_destruct = 0; std::atomic uses_after_destruct = 0; logln("Checking ", which_func, "..."); { std::shared_ptr ptr = std::make_shared(&object_exists); auto task = [ptr, &object_exists, &uses_before_destruct, &uses_after_destruct, &sleep_time](auto&&...) { std::this_thread::sleep_for(sleep_time); if (object_exists) ++uses_before_destruct; else ++uses_after_destruct; }; if (which_func == "detach_blocks()") pool.detach_blocks(0, num_tasks, std::move(task), num_tasks); else if (which_func == "detach_loop()") pool.detach_loop(0, num_tasks, std::move(task)); else if (which_func == "detach_sequence()") pool.detach_sequence(0, num_tasks, std::move(task)); else if (which_func == "submit_blocks()") std::ignore = pool.submit_blocks(0, num_tasks, std::move(task), num_tasks); else if (which_func == "submit_loop()") std::ignore = pool.submit_loop(0, num_tasks, std::move(task)); else if (which_func == "submit_sequence()") std::ignore = pool.submit_sequence(0, num_tasks, std::move(task)); ptr.reset(); } pool.wait(); std::this_thread::sleep_for(sleep_time); logln("Uses before destruct:"); check(num_tasks, uses_before_destruct.load()); logln("Uses after destruct:"); check(0, uses_after_destruct.load()); } /** * @brief Check, for all member functions which parallelize loops or sequences of tasks, that if a task that captures a shared pointer is submitted, the pointer is correctly shared between all the iterations of the task. */ void check_shared_ptr_all() { check_shared_ptr("detach_blocks()"); check_shared_ptr("detach_loop()"); check_shared_ptr("detach_sequence()"); check_shared_ptr("submit_blocks()"); check_shared_ptr("submit_loop()"); check_shared_ptr("submit_sequence()"); } /** * @brief Check that a task is destructed immediately after it executes, and therefore does not artificially extend the lifetime of any captured objects. */ void check_task_destruct() { constexpr std::chrono::milliseconds sleep_time(20); BS::thread_pool pool; std::atomic object_exists = false; { const std::shared_ptr ptr = std::make_shared(&object_exists); pool.submit_task([ptr] {}).wait(); } std::this_thread::sleep_for(sleep_time); check(!object_exists); } /** * @brief Check that the type trait `BS::common_index_type` works as expected. */ void check_common_index_type() { // NOLINTBEGIN(misc-redundant-expression) logln("Checking std::int8_t..."); check(std::is_same_v, std::int8_t> && std::is_same_v, std::int16_t> && std::is_same_v, std::int32_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::int16_t> && std::is_same_v, std::int32_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::uint64_t>); logln("Checking std::int16_t..."); check(std::is_same_v, std::int16_t> && std::is_same_v, std::int16_t> && std::is_same_v, std::int32_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::int16_t> && std::is_same_v, std::int32_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::uint64_t>); logln("Checking std::int32_t..."); check(std::is_same_v, std::int32_t> && std::is_same_v, std::int32_t> && std::is_same_v, std::int32_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::int32_t> && std::is_same_v, std::int32_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::uint64_t>); logln("Checking std::int64_t..."); check(std::is_same_v, std::int64_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::uint64_t>); logln("Checking std::uint8_t..."); check(std::is_same_v, std::int16_t> && std::is_same_v, std::int16_t> && std::is_same_v, std::int32_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::uint8_t> && std::is_same_v, std::uint16_t> && std::is_same_v, std::uint32_t> && std::is_same_v, std::uint64_t>); logln("Checking std::uint16_t..."); check(std::is_same_v, std::int32_t> && std::is_same_v, std::int32_t> && std::is_same_v, std::int32_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::uint16_t> && std::is_same_v, std::uint16_t> && std::is_same_v, std::uint32_t> && std::is_same_v, std::uint64_t>); logln("Checking std::uint32_t..."); check(std::is_same_v, std::int64_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::int64_t> && std::is_same_v, std::uint32_t> && std::is_same_v, std::uint32_t> && std::is_same_v, std::uint32_t> && std::is_same_v, std::uint64_t>); logln("Checking std::uint64_t..."); check(std::is_same_v, std::uint64_t> && std::is_same_v, std::uint64_t> && std::is_same_v, std::uint64_t> && std::is_same_v, std::uint64_t> && std::is_same_v, std::uint64_t> && std::is_same_v, std::uint64_t> && std::is_same_v, std::uint64_t> && std::is_same_v, std::uint64_t>); // NOLINTEND(misc-redundant-expression) } // ================================ // Functions to check for deadlocks // ================================ // An auxiliary thread pool used by check_deadlock(). It's a global variable so that the program will not get stuck upon destruction of this pool if a deadlock actually occurs. BS::thread_pool check_deadlock_pool; /** * @brief Check that the specified function does not create deadlocks. The function will be run many times to increase the probability of encountering a deadlock as a result of subtle timing issues. Uses an auxiliary pool so the whole test doesn't get stuck if a deadlock is encountered. * * @tparam F The type of the function. * @param task The function to try. */ template void check_deadlock(const F&& task) { constexpr std::chrono::milliseconds sleep_time(200); constexpr std::size_t tries = 10000; std::size_t try_n = 0; check_deadlock_pool.detach_task( [&try_n, &task] { do task(); while (++try_n < tries); }); bool passed = false; while (true) { const std::size_t old_try_n = try_n; check_deadlock_pool.wait_for(sleep_time); if (try_n == tries) { logln_ansi(ansi_success, "Successfully finished all tries!"); passed = true; break; } if (try_n == old_try_n) { logln_ansi(ansi_error, "Error: deadlock detected!"); passed = false; break; } logln("Finished ", try_n, " tries out of ", tries, "..."); } check(passed); } #ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS // ==================================== // Functions to check native extensions // ==================================== /** * @brief A map between pre-defined OS process priorities and their string representations. */ const std::map os_process_priority_map = {{BS::os_process_priority::idle, "idle"}, {BS::os_process_priority::below_normal, "below_normal"}, {BS::os_process_priority::normal, "normal"}, {BS::os_process_priority::above_normal, "above_normal"}, {BS::os_process_priority::high, "high"}, {BS::os_process_priority::realtime, "realtime"}}; /** * @brief Get the string representation of an OS process priority. * * @param priority A `std::optional` object. * @return A string containing the name of the priority, or "unknown" if the priority is not recognized, or "N/A" if the optional value is not set. */ std::string os_process_priority_name(const std::optional& priority) { if (priority.has_value()) { const std::map::const_iterator it = os_process_priority_map.find(*priority); return (it != os_process_priority_map.end()) ? it->second : "unknown"; } return "N/A"; } /** * @brief A map between pre-defined OS thread priorities and their string representations. */ const std::map os_thread_priority_map = {{BS::os_thread_priority::idle, "idle"}, {BS::os_thread_priority::lowest, "lowest"}, {BS::os_thread_priority::below_normal, "below_normal"}, {BS::os_thread_priority::normal, "normal"}, {BS::os_thread_priority::above_normal, "above_normal"}, {BS::os_thread_priority::highest, "highest"}, {BS::os_thread_priority::realtime, "realtime"}}; /** * @brief Get the string representation of an OS thread priority. * * @param priority A `std::optional` object. * @return A string containing the name of the priority, or "unknown" if the priority is not recognized, or "N/A" if the optional value is not set. */ std::string os_thread_priority_name(const std::optional& priority) { if (priority.has_value()) { const std::map::const_iterator it = os_thread_priority_map.find(*priority); return (it != os_thread_priority_map.end()) ? it->second : "unknown"; } return "N/A"; } /** * @brief Check if a condition is met, report the result, but do not keep count of the total number of successes and failures, because failure is expected if the test is not run as root. * * @param condition The condition to check. */ void check_root(const bool condition) { if (condition) { logln("-> passed."); ++test_results::tests_succeeded; } else { logln("-> failed, most likely due to insufficient permissions; ignoring."); } } /** * @brief Check if the expected result has been obtained, report the result, but do not keep count of the total number of successes and failures, because failure is expected if the test is not run as root. * * @param expected The expected result. * @param obtained The obtained result. */ template void check_root(const T1& expected, const T2& obtained) { log("- Expected: ", expected, ", obtained: ", obtained, ' '); check_root(expected == static_cast(obtained)); } /** * @brief Check that getting and setting OS process priorities works. */ void check_os_process_priorities() { logln("Checking OS process priorities..."); logln_ansi(ansi_info, "NOTE: This test must be run as admin/root, otherwise it will fail!"); // We go over the priorities in reverse order because on Linux, a non-root user can only decrease the priority, so if we start from the lowest priority, all tests will fail except the first one. const std::vector priorities = {BS::os_process_priority::realtime, BS::os_process_priority::high, BS::os_process_priority::above_normal, BS::os_process_priority::normal, BS::os_process_priority::below_normal, BS::os_process_priority::idle}; for (BS::os_process_priority priority : priorities) { log("Setting OS process priority to ", os_process_priority_name(priority), ' '); // On Windows we should be able to set all the priorities even as non-admin; realtime will "succeed" but actually set the priority to high. On Linux, only root can increase the priority beyond normal. #ifdef _WIN32 check(BS::set_os_process_priority(priority)); #else if (priority >= BS::os_process_priority::normal) check(BS::set_os_process_priority(priority)); else check_root(BS::set_os_process_priority(priority)); #endif const std::optional new_priority = BS::get_os_process_priority(); log("Obtaining new OS process priority "); check(new_priority.has_value()); #ifdef _WIN32 if (priority != BS::os_process_priority::realtime) check(os_process_priority_name(priority), os_process_priority_name(new_priority)); else check_root(os_process_priority_name(priority), os_process_priority_name(new_priority)); #else if (priority >= BS::os_process_priority::normal) check(os_process_priority_name(priority), os_process_priority_name(new_priority)); else check_root(os_process_priority_name(priority), os_process_priority_name(new_priority)); #endif } // Set the priority back to normal after the test ends. This will fail on Linux if not root. logln("Setting priority back to normal..."); #ifdef _WIN32 check(BS::set_os_process_priority(BS::os_process_priority::normal)); #else check_root(BS::set_os_process_priority(BS::os_process_priority::normal)); #endif } /** * @brief Check that getting and setting OS thread priorities works. */ void check_os_thread_priorities() { BS::thread_pool pool; pool.detach_task( [] { logln("Checking OS thread priorities for pool threads..."); #ifdef __linux__ logln_ansi(ansi_info, "NOTE: On Linux, this test must be run as root, otherwise it will fail!"); #endif const std::vector priorities = {BS::os_thread_priority::realtime, BS::os_thread_priority::highest, BS::os_thread_priority::above_normal, BS::os_thread_priority::normal, BS::os_thread_priority::below_normal, BS::os_thread_priority::lowest, BS::os_thread_priority::idle}; for (BS::os_thread_priority priority : priorities) { log("Setting OS thread priority to ", os_thread_priority_name(priority), ' '); // On Windows we should be able to set all the priorities even as non-admin, including realtime. On Linux, only root can increase the priority beyond normal. (Also, note that on WSL, even root cannot set the priority to highest or above.) #ifdef _WIN32 check(BS::this_thread::set_os_thread_priority(priority)); #else if (priority <= BS::os_thread_priority::normal) check(BS::this_thread::set_os_thread_priority(priority)); else check_root(BS::this_thread::set_os_thread_priority(priority)); #endif const std::optional new_priority = BS::this_thread::get_os_thread_priority(); log("Obtaining new OS thread priority "); check(new_priority.has_value()); #ifdef _WIN32 check(os_thread_priority_name(priority), os_thread_priority_name(new_priority)); #else check_root(os_thread_priority_name(priority), os_thread_priority_name(new_priority)); #endif } // Set the priority back to normal after the test ends. This will fail on Linux/macOS if not running as root. logln("Setting priority back to normal..."); #ifdef _WIN32 check(BS::this_thread::set_os_thread_priority(BS::os_thread_priority::normal)); #else check_root(BS::this_thread::set_os_thread_priority(BS::os_thread_priority::normal)); #endif }); } /** * @brief Check that getting and setting OS thread names works. */ void check_os_thread_names() { logln("Checking OS thread names..."); const std::string name = "BS_thread_pool"; logln("Setting main thread name to \"", name, "\"..."); check(BS::this_thread::set_os_thread_name(name)); logln("Obtaining new OS thread name..."); std::optional new_name = BS::this_thread::get_os_thread_name(); if (new_name.has_value()) { check(true); check(name, *new_name); } else { check(false); } } #if defined(_WIN32) || defined(__linux__) /** * @brief Convert a `std::vector` representing CPU affinity to a string of 0s and 1s. * * @param affinity The affinity. * @return The string. */ std::string affinity_to_string(const std::optional>& affinity) { if (affinity.has_value()) { const std::size_t num_bits = affinity->size(); std::string str(num_bits, ' '); for (std::size_t i = 0; i < num_bits; ++i) str[num_bits - i - 1] = (*affinity)[i] ? '1' : '0'; return str; } return "N/A"; } /** * @brief Check that getting and setting OS process affinity works. */ void check_os_process_affinity() { logln("Checking OS process affinity..."); log("Obtaining initial process affinity "); const std::optional> initial_affinity = BS::get_os_process_affinity(); check(initial_affinity.has_value()); logln("Initial affinity is: ", affinity_to_string(initial_affinity)); const std::size_t num_bits = initial_affinity.has_value() ? initial_affinity->size() : std::thread::hardware_concurrency(); log("Setting affinity to CPU 1 only "); std::vector cpu_1_in(num_bits, false); cpu_1_in[0] = true; check(BS::set_os_process_affinity(cpu_1_in)); log("Obtaining new affinity "); const std::optional> cpu_1_out = BS::get_os_process_affinity(); check(cpu_1_out.has_value()); check(affinity_to_string(cpu_1_in), affinity_to_string(cpu_1_out)); log("Setting affinity to alternating CPUs "); std::vector alternating_in(num_bits, false); for (std::size_t i = 0; i < num_bits; ++i) alternating_in[i] = (i % 2 == 1); check(BS::set_os_process_affinity(alternating_in)); log("Obtaining new affinity "); const std::optional> alternating_out = BS::get_os_process_affinity(); check(alternating_out.has_value()); check(affinity_to_string(alternating_in), affinity_to_string(alternating_out)); if (initial_affinity.has_value()) { log("Setting affinity back to initial value "); check(BS::set_os_process_affinity(*initial_affinity)); log("Obtaining new affinity "); const std::optional> initial_out = BS::get_os_process_affinity(); check(initial_out.has_value()); check(affinity_to_string(initial_affinity), affinity_to_string(initial_out)); } } /** * @brief Check that getting and setting OS thread affinity works. */ void check_os_thread_affinity() { BS::thread_pool pool; pool.detach_task( [] { // Since the thread affinity must be a subset of the process affinity, we first set its affinity to all CPUs if it wasn't already. const std::optional> initial_process_affinity = BS::get_os_process_affinity(); const std::size_t num_process_bits = initial_process_affinity.has_value() ? initial_process_affinity->size() : std::thread::hardware_concurrency(); const std::vector all_enabled(num_process_bits, true); BS::set_os_process_affinity(all_enabled); logln("Checking OS thread affinity for pool threads..."); log("Obtaining initial thread affinity "); const std::optional> initial_affinity = BS::this_thread::get_os_thread_affinity(); check(initial_affinity.has_value()); logln("Initial affinity is: ", affinity_to_string(initial_affinity)); const std::size_t num_bits = initial_affinity.has_value() ? initial_affinity->size() : std::thread::hardware_concurrency(); log("Setting affinity to CPU 1 only "); std::vector cpu_1_in(num_bits, false); cpu_1_in[0] = true; check(BS::this_thread::set_os_thread_affinity(cpu_1_in)); log("Obtaining new affinity "); const std::optional> cpu_1_out = BS::this_thread::get_os_thread_affinity(); check(cpu_1_out.has_value()); check(affinity_to_string(cpu_1_in), affinity_to_string(cpu_1_out)); log("Setting affinity to alternating CPUs "); std::vector alternating_in(num_bits, false); for (std::size_t i = 0; i < num_bits; ++i) alternating_in[i] = (i % 2 == 1); check(BS::this_thread::set_os_thread_affinity(alternating_in)); log("Obtaining new affinity "); const std::optional> alternating_out = BS::this_thread::get_os_thread_affinity(); check(alternating_out.has_value()); check(affinity_to_string(alternating_in), affinity_to_string(alternating_out)); if (initial_affinity.has_value()) { log("Setting affinity back to initial value "); check(BS::this_thread::set_os_thread_affinity(*initial_affinity)); log("Obtaining new affinity "); const std::optional> initial_out = BS::this_thread::get_os_thread_affinity(); check(initial_out.has_value()); check(affinity_to_string(initial_affinity), affinity_to_string(initial_out)); } if (initial_process_affinity.has_value()) BS::set_os_process_affinity(*initial_process_affinity); }); } #endif /** * @brief Try to set the OS priority of this thread to the highest possible value. Also set the name of the thread for debugging purposes. */ void try_os_thread_priority() { if (!BS::this_thread::set_os_thread_priority(BS::os_thread_priority::realtime)) if (!BS::this_thread::set_os_thread_priority(BS::os_thread_priority::highest)) BS::this_thread::set_os_thread_priority(BS::os_thread_priority::above_normal); std::optional idx = BS::this_thread::get_index(); if (idx.has_value()) BS::this_thread::set_os_thread_name(make_string("Benchmark #", *idx)); else BS::this_thread::set_os_thread_name("Benchmark main"); } #endif // ======================== // Functions for benchmarks // ======================== /** * @brief A struct to store the mean and standard deviation of the results of a test. */ struct [[nodiscard]] mean_sd { mean_sd(const double mean_, const double sd_) : mean(mean_), sd(sd_) {} double mean = 0; double sd = 0; }; /** * @brief Print the timing of a specific test. * * @param stats A struct containing the mean and standard deviation. * @param pixels_per_ms The number of pixels per millisecond. */ void print_timing(const mean_sd& stats, const double pixels_per_ms) { constexpr int width_mean = 6; constexpr int width_sd = 4; constexpr int width_pms = 7; logln("-> Mean: ", std::setw(width_mean), stats.mean, " ms, standard deviation: ", std::setw(width_sd), stats.sd, " ms, speed: ", std::setw(width_pms), pixels_per_ms, " pixels/ms."); } /** * @brief Find the index of the minimum element in a vector. * * @tparam T The type of elements in the vector. * @param vec The vector. * @return The index of the smallest element in the vector. */ template std::size_t min_element_index(const std::vector& vec) { return static_cast(std::distance(vec.begin(), std::min_element(vec.begin(), vec.end()))); } /** * @brief Calculate and print the speedup obtained by multithreading. * * @param timings A vector of the timings corresponding to different numbers of tasks. * @param try_tasks A vector containing the numbers of tasks tried. */ void print_speedup(const std::vector& timings, const std::vector& try_tasks) { const std::size_t min_el = min_element_index(timings); const double max_speedup = std::round((timings[0] / timings[min_el]) * 10) / 10; const std::size_t num_tasks = try_tasks[min_el]; logln("Maximum speedup obtained by multithreading vs. single-threading: ", max_speedup, "x, using ", num_tasks, " tasks."); } /** * @brief Calculate the mean and standard deviation of a set of integers. * * @param timings The integers. * @return A struct containing the mean and standard deviation. */ mean_sd analyze(const std::vector& timings) { // First, calculate the mean and the mean of the square . double mean = 0; double mean_sq = 0; for (const std::chrono::milliseconds::rep timing : timings) { mean += static_cast(timing); mean_sq += static_cast(timing * timing); } mean /= static_cast(timings.size()); mean_sq /= static_cast(timings.size()); // The variance is given by <(X - )^2> = - ^2. The standard deviation is the square root of the variance. return {mean, std::sqrt(mean_sq - (mean * mean))}; } /** * @brief A class to save the Mandelbrot image in. Note that rows and columns are inverted compared to the usual matrix syntax, so that `image(x, y)` corresponds to the pixel at coordinates (x, y) where x is the horizontal axis (i.e. column number) and y is the vertical axis (i.e. row number). The width is the number of columns and the height is the number of rows. */ template class [[nodiscard]] image_matrix { public: image_matrix() = default; image_matrix(const std::size_t width_, const std::size_t height_) : width(width_), height(height_), pixels(std::make_unique(width_ * height_)) {} [[nodiscard]] T& operator()(std::size_t x, std::size_t y) { return pixels[(y * width) + x]; } [[nodiscard]] T operator()(std::size_t x, std::size_t y) const { return pixels[(y * width) + x]; } [[nodiscard]] T& operator[](std::size_t i) { return pixels[i]; } [[nodiscard]] T operator[](std::size_t i) const { return pixels[i]; } [[nodiscard]] std::size_t get_height() const { return height; } [[nodiscard]] std::size_t get_width() const { return width; } private: std::size_t width = 0; std::size_t height = 0; std::unique_ptr pixels = nullptr; }; // class matrix // The maximum number of iterations to try before deciding whether a point is in the Mandelbrot set. constexpr std::size_t max_iter = 2000; /** * @brief Find the escape time of a point. * * @param c The point. * @return The escape time, that is, the number of iterations before the point escapes the Mandelbrot set, with an additional fractional part to eliminate color banding; or `max_iter` if the point doesn't escape within the maximum number of iterations. */ double mandelbrot_escape(const std::complex c) { // Define the escape radius. A point c is considered to have "escaped" the Mandelbrot set if, after fewer than `max_iter` iterations of the formula z = z^2 + c starting at z = 0, we get |z| > r. Since the Mandelbrot set is contained within a closed disk of radius 2, the escape radius must be at least 2. However, with that choice we will see the actual disk in the image, because any point outside the disk (but still in the output image) will automatically have an iteration count of 1. For the region plotted by this program by default, an escape radius of 4 is enough, but a higher radius generally produces smoother color gradients. constexpr double r = 1024; std::complex z = c; std::size_t iter = 1; while (std::norm(z) <= (r * r) && iter < max_iter) { z = z * z + c; ++iter; }; // If the point did not escape within the maximum number of iterations, then it is (most likely) in the Mandelbrot set, and we return the maximum number of iterations as is. if (iter == max_iter) return static_cast(max_iter); // If the point escapes, calculate a continuous value to be used for coloring that points in the image. The iteration count is an integer, which would cause color banding in the final image, as there are large regions with the same iteration count and therefore the same color. We resolve this by adding a fractional part. After the loop ends, z has just escaped the radius r, so we are guaranteed that |r| < |z| < |r^2 + c|. Neglecting c (which we can do if r is large enough), this means log_r(|z|) is in the range [1, 2], and therefore log_2(log_r(|z|)) is in the range [0, 1]. Hence, the quantity log_2(log_r(|z|)) = log_2(log(|z|)/log(r)) = log_2(log(|z|^2)/log(r^2)) provides a fractional part that we can simply add to the integer iteration count to make it continuous and eliminate the banding. We subtract from the iteration count instead of adding to it, because larger values of z have smaller iteration counts. return static_cast(iter) - std::log2(std::log(std::norm(z)) / std::log(r * r)); } /** * @brief A helper struct to store the RGB values of a pixel. */ struct [[nodiscard]] color { constexpr color() = default; template constexpr color(const T r_, const T g_, const T b_) : r(static_cast(r_)), g(static_cast(g_)), b(static_cast(b_)) { } std::uint8_t r = 0; std::uint8_t g = 0; std::uint8_t b = 0; }; /** * @brief Interpolate between two colors. * * @param first The first color. * @param second The second color. * @param t The interpolation point, in the range [0, 1] where 0 corresponds to the first color, 1 corresponds to the second color, and any other value combines the two colors. * @return The interpolated color. */ color interpolate_colors(const color& first, const color& second, const double t) { return {first.r + (t * (second.r - first.r)), first.g + (t * (second.g - first.g)), first.b + (t * (second.b - first.b))}; } /** * @brief Convert the escape time of a point into a color. * * @param iterations The fractional number of iterations before the point escapes the Mandelbrot set. * @return The color. */ color iter_to_color(const double iterations) { // Define a nice color palette for the image. static constexpr std::array palette = {{{66, 30, 15}, {25, 7, 26}, {9, 1, 47}, {4, 4, 73}, {0, 7, 100}, {12, 44, 138}, {24, 82, 177}, {57, 125, 209}, {134, 181, 229}, {211, 236, 248}, {241, 233, 191}, {248, 201, 95}, {255, 170, 0}, {204, 128, 0}, {153, 87, 0}, {106, 52, 3}}}; // Points that are in the set (or at least, suspected to be in the set because they did not diverge after the maximum number of iterations) will be black. if (iterations == max_iter) return {0, 0, 0}; // Get the integer and fractional parts of the number of iterations. double int_part = 0; const double frac_part = std::modf(iterations, &int_part); // Choose two adjacent colors from the palette based on the integer part. We cycle through the palette, so the same colors will repeat many times (`max_iter` is much larger than the number of colors). const color color1 = palette[static_cast(int_part) % palette.size()]; const color color2 = palette[(static_cast(int_part) + 1) % palette.size()]; // Use the fractional part to interpolate smoothly between the two colors. return interpolate_colors(color1, color2, frac_part); } /** * @brief Calculate the colors of a range of pixels in an image, enumerated as a range of indices in a 1-dimensional array containing the flattened matrix in row-major order. * * @param image The matrix storing the image. * @param start The first index to calculate. * @param end The index after the last index to calculate. * @param jump How many pixels to jump over each iteration, to allow for splitting the work between different runs of the same test. * @param offset How many pixels to shift the calculation by. */ void calculate_mandelbrot(image_matrix& image, const std::size_t start, const std::size_t end, const std::size_t jump, const std::size_t offset) { // Define the ranges of real and imaginary values to consider for the Mandelbrot set. The aspect ratio should be exactly 1:1 (width:height) to prevent stretching, since the benchmark always outputs square images for simplicity. constexpr double re_min = -2.01; constexpr double re_max = 0.51; constexpr double im_min = -1.26; constexpr double im_max = 1.26; // Get the width and height of the image. const std::size_t width = image.get_width(); const std::size_t height = image.get_height(); for (std::size_t i = start + offset; i < end; i += jump) { // Convert the pixel index to the corresponding x and y coordinates. const std::size_t x = i % width; const std::size_t y = i / width; // Convert the pixel coordinates, integers (x, y) such that x is in [0, width-1] and y is in [0, height-1], to a complex number c such that Re(c) is in [re_min, re_max] and Im(c) is in [im_min, im_max]}. (Note: We also need to invert the y axis because the y value increases downwards in the image, but the imaginary part increases upwards in the complex plane. However, to avoid doing any extra calculations, we do that later when we save the image.) const std::complex c = {(static_cast(x) / static_cast(width) * (re_max - re_min)) + re_min, (static_cast(y) / static_cast(height) * (im_max - im_min)) + im_min}; // Calculate the pixel's escape time and convert it to a color. image[i] = iter_to_color(mandelbrot_escape(c)); } } // A macro to unpack a 16-bit integer into 2 bytes. #define UNPACK_2_BYTES(value) static_cast(value), static_cast((value) >> 8) // A macro to unpack a 32-bit integer into 4 bytes. #define UNPACK_4_BYTES(value) static_cast(value), static_cast((value) >> 8), static_cast((value) >> 16), static_cast((value) >> 24) /** * @brief Save an image to a BMP file. * * @param image The matrix containing the pixels. * @param filename The output file name. */ void save_bmp(const image_matrix& image, const std::string& filename) { // Create the file. std::ofstream file(filename, std::ios::binary); if (!file.is_open()) { logln_ansi(ansi_error, "Error: Could not create the file ", filename, '.'); return; } log("Saving image to a BMP file: ["); // Calculate the size of the BMP file in bytes. const std::uint32_t width = static_cast(image.get_width()); const std::uint32_t height = static_cast(image.get_height()); const std::uint32_t total_pixels = width * height; constexpr std::uint32_t file_header_size = 14; constexpr std::uint32_t info_header_size = 40; constexpr std::uint32_t bytes_per_pixel = 3; constexpr std::uint32_t bits_per_pixel = bytes_per_pixel * 8; const std::uint32_t file_size = file_header_size + info_header_size + (bytes_per_pixel * total_pixels); // The file header of the BMP file: 2 bytes for the "BM" signature, 4 bytes for the file size, 4 bytes reserved, 4 bytes for the start offset of the pixel array. Note that all integers are stored in little-endian format (least-significant byte first), hence the bit shifts (from the macro UNPACK_4_BYTES). We specify the values explicitly to avoid issues with padding. const std::uint8_t bmp_file_header[file_header_size] = {'B', 'M', UNPACK_4_BYTES(file_size), UNPACK_4_BYTES(0), UNPACK_4_BYTES(file_header_size + info_header_size)}; // The information header of the BMP file: 4 bytes for the header size, 4 bytes for the image width, 4 bytes for the image height, 2 bytes for the number of color planes, 2 bytes for the number of bits per pixel, 4 bytes for the compression method (0 = no compression), 4 bytes for the image size (can be 0 if no compression), 4 bytes for the horizontal pixels per meter, 4 bytes for the vertical pixels per meter, 4 bytes for the number of colors (0 = default), 4 bytes for the number of "important colors" (generally ignored). const std::uint8_t bmp_info_header[info_header_size] = {UNPACK_4_BYTES(info_header_size), UNPACK_4_BYTES(width), UNPACK_4_BYTES(height), UNPACK_2_BYTES(1), UNPACK_2_BYTES(bits_per_pixel), UNPACK_4_BYTES(0), UNPACK_4_BYTES(0), UNPACK_4_BYTES(0), UNPACK_4_BYTES(0), UNPACK_4_BYTES(0), UNPACK_4_BYTES(0)}; // Write the headers. file.write(reinterpret_cast(bmp_file_header), file_header_size); file.write(reinterpret_cast(bmp_info_header), info_header_size); // Create padding bytes for later use. const std::uint8_t padding_bytes[3] = {0, 0, 0}; const std::streamsize num_padding_bytes = (4 - ((width * bytes_per_pixel) % 4)) % 4; // Write the pixels. Note that they are stored "bottom-up", starting in the lower left corner, going from left to right and then row by row. However, we need to invert the y axis anyway, because the y value increases downwards in the image, but the imaginary part increases upwards in the complex plane. Therefore, we just use the normal y values when saving the image. for (std::size_t y = 0; y < height; ++y) { for (std::size_t x = 0; x < width; ++x) { const color col = image(x, y); // BMP format stores the colors in BGR order. file.write(reinterpret_cast(&col.b), 1); file.write(reinterpret_cast(&col.g), 1); file.write(reinterpret_cast(&col.r), 1); } if (num_padding_bytes != 0) { // BMP format requires that each row is a multiple of 4 bytes long, so we add padding if necessary. file.write(reinterpret_cast(padding_bytes), num_padding_bytes); } if (y % (height / 10) == 0) log('.'); } file.close(); logln("]\nMandelbrot image saved successfully as ", filename, '.'); } /** * @brief A utility class to measure execution time for benchmarking purposes. */ class [[nodiscard]] timer { public: /** * @brief Get the number of milliseconds that have elapsed since the object was constructed or since `start()` was last called, but keep the timer ticking. * * @return The number of milliseconds. */ [[nodiscard]] std::chrono::milliseconds::rep current_ms() const { return (std::chrono::duration_cast(std::chrono::steady_clock::now() - start_time)).count(); } /** * @brief Start (or restart) measuring time. Note that the timer starts ticking as soon as the object is created, so this is only necessary if we want to restart the clock later. */ void start() { start_time = std::chrono::steady_clock::now(); } /** * @brief Stop measuring time and store the elapsed time since the object was constructed or since `start()` was last called. */ void stop() { elapsed_time = std::chrono::steady_clock::now() - start_time; } /** * @brief Get the number of milliseconds stored when `stop()` was last called. * * @return The number of milliseconds. */ [[nodiscard]] std::chrono::milliseconds::rep ms() const { return (std::chrono::duration_cast(elapsed_time)).count(); } private: /** * @brief The time point when measuring started. */ std::chrono::time_point start_time = std::chrono::steady_clock::now(); /** * @brief The duration that has elapsed between `start()` and `stop()`. */ std::chrono::duration elapsed_time = std::chrono::duration::zero(); }; // class timer /** * @brief Map a color to a monochrome Unicode block based on its brightness, using luma coefficients. * * @param col The color. * @return The Unicode block representing the brightness of the color, in UTF-8. */ std::string_view brightness_block(const color& col) { // Define Unicode blocks from darkest to brightest in UTF-8. constexpr std::array blocks = {" ", "\xE2\x96\x91", "\xE2\x96\x92", "\xE2\x96\x93", "\xE2\x96\x88"}; // Compute the perceived brightness using luma coefficients. Each color component is a number in the range 0-255, and the coefficients sum to 1, so the brightness is also in the range 0-255. const double brightness = (0.2126 * col.r) + (0.7152 * col.g) + (0.0722 * col.b); // Quantize the brightness into 5 levels. A brightness of 0 maps to level 0, and a brightness of 255 maps to level 4. const std::size_t level = static_cast(std::round(brightness * 4.0 / 255.0)); // Return the corresponding block character. return blocks[level]; } /** * @brief Create a plot of an image as characters, using either 24-bit ANSI colors or monochrome blocks of different brightness. * * @param image The image to plot. * @param out_width The plot width in terminal characters. Should be even, since in the monochrome case each pixel spans two characters horizontally. * @param use_color `true` to generate a colored plot, `false` to generate a monochrome plot. * @return The plot as a string. */ std::string plot_image_chars(const image_matrix& image, const std::size_t out_width, const bool use_color) { // Get the source image dimensions. const std::size_t src_width = image.get_width(); const std::size_t src_height = image.get_height(); // Compute the plot height in terminal characters, keeping in mind that characters have an aspect ratio of about 1:2 (width:height). const std::size_t out_height = static_cast(std::llround(0.5 * static_cast(src_height) * static_cast(out_width) / static_cast(src_width))); // Create a buffer for the plot. std::string plot; // For the colored plot we use Unicode lower half blocks to pack two pixels per character. The background color sets the color of the top (empty) half, and the foreground color sets the color of the bottom (filled) half. if (use_color) { // Each character row contains two pixel rows (top and bottom). const std::size_t out_height_pixels = out_height * 2; // Create a mapping of output x coordinates to source pixels, such that x_map[0] = 0 and x_map[out_width - 1] = src_width - 1. std::vector x_map(out_width); for (std::size_t x = 0; x < out_width; ++x) x_map[x] = (x * (src_width - 1)) / (out_width - 1); // Create a mapping of output pixel y coordinates to source pixels, such that y_map[0] = 0 and y_map[out_height_pixels - 1] = src_height - 1. std::vector y_map(out_height_pixels); for (std::size_t y = 0; y < out_height_pixels; ++y) y_map[y] = (y * (src_height - 1)) / (out_height_pixels - 1); // Reserve enough capacity for the entire plot to avoid reallocations. Each row contains ANSI codes of the form `\033[48;2;RRR;GGG;BBBm\033[38;2;RRR;GGG;BBBm` = up to 38 bytes, plus the Unicode lower half block U+2584 in UTF-8 = 3 bytes, for a total of 41 bytes per character of the plot, plus the `\033[0m` reset code at the end and the newline = 5 bytes. plot.reserve(out_height * ((out_width * 41) + 5)); // Iterate over the rows and columns. for (std::size_t y = 0; y < out_height; ++y) { for (std::size_t x = 0; x < out_width; ++x) { // Fetch the sampled source pixel for the top and bottom halves. const color col_top = image(x_map[x], y_map[2 * y]); const color col_bottom = image(x_map[x], y_map[(2 * y) + 1]); // Create the background color ANSI sequence for the top half. plot.append("\033[48;2;"); plot.append(std::to_string(col_top.r)); plot.push_back(';'); plot.append(std::to_string(col_top.g)); plot.push_back(';'); plot.append(std::to_string(col_top.b)); // Finish the sequence with `m` and create the foreground color ANSI sequence for the bottom half. plot.append("m\033[38;2;"); plot.append(std::to_string(col_bottom.r)); plot.push_back(';'); plot.append(std::to_string(col_bottom.g)); plot.push_back(';'); plot.append(std::to_string(col_bottom.b)); // Finish the sequence with `m` and append a lower half block (U+2584) in UTF-8, which is the actual character that will be displayed. plot.append("m\xE2\x96\x84"); } // Reset the ANSI style at the end of the row and add a newline character. plot.append("\033[0m\n"); } } // For the monochrome plot we use Unicode block characters of different brightness levels. Each pixel is rendered as two identical blocks side-by-side to make the pixels square (so essentially, the opposite of the colored plot). else { // Each pixel spans two terminal characters horizontally. const std::size_t out_width_pixels = out_width / 2; // Create a mapping of output x coordinates to source pixels, such that x_map[0] = 0 and x_map[out_width_pixels - 1] = src_width - 1. std::vector x_map(out_width_pixels); for (std::size_t x = 0; x < out_width_pixels; ++x) x_map[x] = (x * (src_width - 1)) / (out_width_pixels - 1); // Create a mapping of output y coordinates to source pixels, such that y_map[0] = 0 and y_map[out_height - 1] = src_height - 1. std::vector y_map(out_height); for (std::size_t y = 0; y < out_height; ++y) y_map[y] = (y * (src_height - 1)) / (out_height - 1); // Reserve enough capacity for the entire plot to avoid reallocations. The UTF-8 blocks can take up to 3 bytes each, plus a newline after each row. plot.reserve(out_height * ((out_width * 3) + 1)); // Iterate over the rows and columns. for (std::size_t y = 0; y < out_height; ++y) { for (std::size_t x = 0; x < out_width_pixels; ++x) { // Fetch the sampled source pixel and map to a Unicode block based on brightness. const color col = image(x_map[x], y_map[y]); const std::string_view block = brightness_block(col); plot.append(block); plot.append(block); } // Add a newline after each row. plot.push_back('\n'); } } return plot; } /** * @brief Benchmark multithreaded performance by calculating the Mandelbrot set. * * @param benchmark Whether to perform the full benchmarks. * @param plot Whether to perform quick benchmarks by just plotting the image once. * @param save Whether to save the image as a BMP file. */ void check_performance(const bool benchmark, const bool plot, const bool save) { print_header("Preparing benchmarks:"); #ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS // Try to give the process the highest possible priority, so that other processes do not interfere with the benchmarks. if (!BS::set_os_process_priority(BS::os_process_priority::realtime)) if (!BS::set_os_process_priority(BS::os_process_priority::high)) BS::set_os_process_priority(BS::os_process_priority::above_normal); const std::string process_priority = os_process_priority_name(BS::get_os_process_priority()); logln("Process priority set to: ", process_priority, "."); if (process_priority != "realtime") logln_ansi(ansi_info, "Note: Please run as admin/root to enable a higher process priority."); try_os_thread_priority(); const std::string thread_priority = os_thread_priority_name(BS::this_thread::get_os_thread_priority()); logln("Thread priority set to: ", thread_priority, "."); if (thread_priority != "realtime") logln_ansi(ansi_info, "Note: Please run as admin/root to enable a higher thread priority."); // Initialize a thread pool with the default number of threads, and ensure that the threads have the highest possible priority, so that other processes do not interfere with the benchmarks. BS::thread_pool pool(try_os_thread_priority); #else // If native extensions are disabled, just initialize a thread pool with the default number of threads. BS::thread_pool pool; #endif // Store the number of available hardware threads for easy access. const std::size_t thread_count = pool.get_thread_count(); logln("Using ", thread_count, " threads."); // Set the formatting of floating point numbers. log(std::fixed, std::setprecision(1)); // Initialize a timer object to measure execution time. timer tmr; // The target execution time, in milliseconds, of the multithreaded test with the number of blocks equal to the number of threads. The total time spent on that test will be approximately equal to `repeat * target_ms`. constexpr std::chrono::milliseconds::rep target_ms = 50; // Find the Mandelbrot image size that will roughly achieve the target execution time. logln("Determining the Mandelbrot image size needed to achieve an approximate mean execution time of ", target_ms, " ms with ", thread_count, " tasks..."); std::size_t image_size = thread_count; image_matrix image; std::size_t jump = 1; std::size_t offset = 0; // Define the loop function. const auto loop = [&image, &jump, &offset](const std::size_t start, const std::size_t end) { calculate_mandelbrot(image, start, end, jump, offset); }; // Increase the image size gradually until the target execution time is reached. do { image_size *= 2; image = image_matrix(image_size, image_size); tmr.start(); pool.detach_blocks(0, image_size * image_size, loop); pool.wait(); tmr.stop(); } while (tmr.ms() < target_ms); // Scale the image size to fit the target execution time more precisely, keeping in mind that the time complexity is O(image_size^2). image_size = static_cast(std::llround(static_cast(image_size) * std::sqrt(static_cast(target_ms) / static_cast(tmr.ms())))); logln("Result: ", image_size, 'x', image_size, " pixels."); if (benchmark) { print_header("Performing full benchmarks:"); // Define vectors to store statistics. std::vector different_n_timings; std::vector same_n_timings; // The number of times to repeat each run of the test in order to collect reliable statistics. constexpr std::size_t num_repeats = 30; // Since we are repeating the same test multiple times, we might as well use different parts of the complex plane in each repetition. However, we have to spread the calculations evenly to avoid biasing the results, as some regions have much higher escape times than others. So we calculate the whole image, but at an offset from 0 to `num_repeats`. jump = num_repeats; const std::size_t benchmark_image_size = static_cast(std::floor(static_cast(image_size) * std::sqrt(num_repeats))); logln("Generating a ", benchmark_image_size, 'x', benchmark_image_size, " plot of the Mandelbrot set..."); logln("Each test will be repeated ", num_repeats, " times to collect reliable statistics."); // Perform the test. std::vector try_tasks; std::size_t num_tasks = 0; double last_timing = std::numeric_limits::max(); constexpr int width_tasks = 4; while (true) { image = image_matrix(benchmark_image_size, benchmark_image_size); try_tasks.push_back(num_tasks); if (num_tasks == 0) log(std::setw(width_tasks), 1, " task: "); else log(std::setw(width_tasks), num_tasks, " tasks: "); log('[', BS::synced_stream::flush); for (std::size_t i = 0; i < num_repeats; ++i) { // Measure execution time for this test. tmr.start(); if (num_tasks > 0) { pool.detach_blocks(0, benchmark_image_size * benchmark_image_size, loop, num_tasks); pool.wait(); } else { loop(0, benchmark_image_size * benchmark_image_size); } tmr.stop(); // Save the measurement for later analysis. same_n_timings.push_back(tmr.ms()); // Print a dot to inform the user that we've made progress. log('.', BS::synced_stream::flush); // Increase the offset so we calculate a different part of the image in each repetition of the test. offset = (offset + 1) % num_repeats; } logln(']', (num_tasks == 0) ? " (single-threaded)" : ""); // Analyze, print, and save the mean and standard deviation of all the tests with the same number of tasks. const mean_sd stats = analyze(same_n_timings); const std::chrono::milliseconds::rep total_time = std::reduce(same_n_timings.begin(), same_n_timings.end()); const double pixels_per_ms = static_cast(benchmark_image_size * benchmark_image_size) / static_cast(total_time); same_n_timings.clear(); print_timing(stats, pixels_per_ms); different_n_timings.push_back(stats.mean); if (num_tasks == 0) { num_tasks = std::max(thread_count / 4, 2); } else { if ((num_tasks > thread_count) && (stats.mean > last_timing)) break; last_timing = stats.mean; num_tasks *= 2; } } print_speedup(different_n_timings, try_tasks); } if (plot) { print_header("Performing quick benchmarks:"); // Just plot whatever we can in 5 seconds. Feel free to increase this to get higher resolution images. constexpr std::chrono::milliseconds::rep total_ms = 5000; const std::size_t plot_image_size = static_cast(std::floor(static_cast(image_size) * std::sqrt(static_cast(total_ms) / static_cast(target_ms)))); image = image_matrix(plot_image_size, plot_image_size); log("Generating a ", plot_image_size, 'x', plot_image_size, " plot of the Mandelbrot set with ", thread_count, " tasks: [", BS::synced_stream::flush); pool.detach_blocks(0, plot_image_size * plot_image_size, [&image](const std::size_t start, const std::size_t end) { calculate_mandelbrot(image, start, end, 1, 0); log('.', BS::synced_stream::flush); }); pool.wait(); tmr.stop(); logln("]\nDone in ", tmr.ms(), " ms (", static_cast(plot_image_size * plot_image_size) / static_cast(tmr.ms()), " pixels/ms)."); } logln(); // Set the plot width in terminal characters. constexpr std::size_t plot_width = 120; // Generate a colored or monochrome plot as needed. std::string plot_color; std::string plot_mono; if (use_stdout && !no_color) plot_color = plot_image_chars(image, plot_width, true); if (use_log || (use_stdout && no_color)) plot_mono = plot_image_chars(image, plot_width, false); // Write the plots to stdout and/or the log file. if (use_stdout) sync_cout.print(no_color ? plot_mono : plot_color); if (use_log) sync_log.print(plot_mono); // Save the plot to a BMP file if requested. if (save) save_bmp(image, "BS_thread_pool_benchmark_mandelbrot.bmp"); print_header("Thread pool performance test completed successfully!", '+', ansi_success); } // ================================== // The main function and related code // ================================== /** * @brief Show basic information about the program. */ void show_intro() { logln_ansi(ansi_title, R"( ██████ ███████ ████████ ██ ██ ██████ ███████ █████ ██████ ██████ ██████ ██████ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██████ ███████ ██ ███████ ██████ █████ ███████ ██ ██ ██████ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██████ ███████ ██ ██ ██ ██ ██ ███████ ██ ██ ██████ ███████ ██ ██████ ██████ ███████ )"); logln_ansi(ansi_title_italic, "BS::thread_pool:", " a fast, lightweight, modern, and easy-to-use C++17/C++20/C++23 thread pool library"); logln_ansi(ansi_title_italic, "Copyright (c) 2021-2026 Barak Shoshany (baraksh@gmail.com) (https://baraksh.com/)"); logln_ansi(ansi_title_italic, "GitHub: https://github.com/bshoshany/thread-pool"); logln(); print_key_values("Thread pool library version is: ", 'v', BS::thread_pool_version); print_key_values("Thread pool library imported using: ", BS::thread_pool_module ? "import BS.thread_pool (" : "#include \"BS_thread_pool.hpp\" (no ", "C++20 modules)"); logln(); logln_ansi(ansi_title_underline, "C++ Standard Library imported using:"); print_key_values("* Thread pool library: ", BS::thread_pool_import_std ? "import std (" : "#include <...> (no ", "C++23 std module)"); print_key_values("* Test program: ", using_import_std ? "import std (" : "#include <...> (no ", "C++23 std module)"); logln(); print_key_values("Detected OS: ", detect_os()); print_key_values("Detected compiler: ", detect_compiler()); print_key_values("Detected standard library: ", detect_lib()); print_key_values("Detected C++ standard: ", detect_cpp_standard(), " (__cplusplus = ", __cplusplus, ")"); logln(); logln_ansi(ansi_title_underline, "Detected features:"); print_features(); print_key_values("Native extensions are: ", BS::thread_pool_native_extensions ? "Enabled" : "Disabled"); print_key_values("Hardware concurrency is: ", std::thread::hardware_concurrency()); logln(); log_ansi(ansi_title_underline, "Important:"); logln_ansi(ansi_title, " Please do not run any other applications, especially multithreaded applications, in parallel with this test!"); } /** * @brief Get a string representing the current time. * * @return The string. */ std::string get_time() { #ifdef __cpp_lib_format // Things are much easier with C++20 `std::format`. return std::format("{:%Y-%m-%d_%H.%M.%S}", std::chrono::time_point_cast(std::chrono::system_clock::now())); #else std::string time_string = "YYYY-MM-DD_HH.MM.SS"; std::tm local_tm = {}; const std::time_t epoch = std::time(nullptr); #if defined(_MSC_VER) && !defined(__cpp_lib_modules) // If MSVC is detected, use `localtime_s()` to avoid warning C4996. (This doesn't work if we used `import std`, so we check that to be on the safe side, although in that case `std::format` should be available anyway). if (localtime_s(&local_tm, &epoch) != 0) time_string = ""; #elif defined(__linux__) || defined(__APPLE__) // On Linux or macOS, use `localtime_r()` to avoid clang-tidy warning `concurrency-mt-unsafe`. if (localtime_r(&epoch, &local_tm) == nullptr) time_string = ""; #else local_tm = *std::localtime(&epoch); // NOLINT(concurrency-mt-unsafe) #endif if (!time_string.empty()) { const std::size_t bytes = std::strftime(time_string.data(), time_string.length() + 1, "%Y-%m-%d_%H.%M.%S", &local_tm); if (bytes != time_string.length()) time_string = ""; } return time_string; #endif } /** * @brief A class to parse command line arguments. All arguments are simple on/off flags. */ class [[nodiscard]] arg_parser { public: /** * @brief Convert the command line arguments passed to the `main()` function into an `std::vector`. * * @param argc The number of arguments. * @param argv An array containing the arguments. */ arg_parser(int argc, char* argv[]) : args(argv + 1, argv + argc), executable(argv[0]) {}; /** * @brief Check if a specific command line argument has been passed to the program. If no arguments were passed, use the default value instead. * * @param arg The argument to check for. * @return `true` if the argument exists, `false` otherwise. */ [[nodiscard]] bool operator[](const std::string_view arg) { if (size() > 0) return (args.count(arg) == 1); return allowed[arg].def; } /** * @brief Add an argument to the list of allowed arguments. * * @param arg The argument. * @param desc The description of the argument. * @param def The default value of the argument. */ void add_argument(const std::string_view arg, const std::string_view desc, const bool def) { allowed[arg] = {desc, def}; } /** * @brief Get the name of the executable. * * @return The name of the executable. */ std::string_view get_executable() { return executable; } void show_help() const { int width = 1; for (const auto& [arg, opt] : allowed) width = std::max(width, static_cast(arg.size())); logln("\nAvailable options (all are on/off and default to off):"); for (const auto& [arg, opt] : allowed) logln(" ", std::left, std::setw(width), arg, " ", opt.desc); log("If no options are entered, the default is:\n "); for (const auto& [arg, opt] : allowed) { if (opt.def) log(arg, " "); } logln(); } /** * @brief Get the number of command line arguments. * * @return The number of arguments. */ [[nodiscard]] std::size_t size() const { return args.size(); } /** * @brief Verify that the command line arguments belong to the list of allowed arguments. * * @return `true` if all arguments are allowed, `false` otherwise. */ [[nodiscard]] bool verify() const { return std::all_of(args.begin(), args.end(), [this](const std::string_view arg) { return allowed.count(arg) == 1; }); } private: struct arg_spec { std::string_view desc; bool def = false; }; /** * @brief A set containing string views of the command line arguments. */ std::set args; /** * @brief A map containing the allowed arguments and their descriptions. */ std::map allowed; /** * @brief A string view containing the name of the executable. */ std::string_view executable; }; // class arg_parser } // anonymous namespace int main(int argc, char* argv[]) // NOLINT(bugprone-exception-escape) { #ifdef __cpp_exceptions try { #endif // Disable ANSI colors if the environment variable `NO_COLOR` is set. no_color = (std::getenv("NO_COLOR") != nullptr); // NOLINT(concurrency-mt-unsafe) // If the file default_args.txt exists in either this folder or the parent folder, read the default arguments from it (space separated in a single line). Otherwise, use the built-in defaults. This is useful when debugging. std::map defaults; std::ifstream default_args_file("default_args.txt"); if (!default_args_file.is_open()) default_args_file.open("../default_args.txt"); if (default_args_file.is_open()) { std::string line; std::getline(default_args_file, line); std::istringstream iss(line); std::string arg; while (iss >> arg) defaults[arg] = true; default_args_file.close(); } else { defaults = {{"help", false}, {"stdout", true}, {"log", true}, {"tests", true}, {"deadlock", false}, {"benchmarks", true}, {"plot", false}, {"save", false}}; } // Parse the command line arguments. arg_parser args(argc, argv); args.add_argument("help", "Show this help message and exit.", defaults["help"]); args.add_argument("stdout", "Print to the standard output.", defaults["stdout"]); args.add_argument("log", "Print to a log file.", defaults["log"]); args.add_argument("tests", "Perform standard tests.", defaults["tests"]); args.add_argument("deadlock", "Perform long deadlock tests.", defaults["deadlock"]); args.add_argument("benchmarks", "Perform full Mandelbrot plot benchmarks.", defaults["benchmarks"]); args.add_argument("plot", "Perform quick Mandelbrot plot benchmarks.", defaults["plot"]); args.add_argument("save", "Save the Mandelbrot plot to a file.", defaults["save"]); if (args.size() > 0) { if (args["help"] || !args.verify()) { show_intro(); args.show_help(); return 0; } if (!args["stdout"] && !args["log"]) { show_intro(); args.show_help(); logln_ansi(ansi_error, "\nERROR: No output stream specified! Please enter one or more of: log, stdout. Aborting."); return 0; } if (!args["benchmarks"] && !args["deadlock"] && !args["plot"] && !args["tests"]) { show_intro(); args.show_help(); logln_ansi(ansi_error, "\nERROR: No tests or benchmarks requested! Please enter one or more of: benchmarks, deadlock, plot, tests. Aborting."); return 0; } } // A stream object used to access the log file. std::ofstream log_file; sync_log.remove_stream(std::cout); if (args["log"]) { // Extract the name of the executable file, or use a default value if it is not available. const std::string_view executable = args.get_executable(); const std::size_t last_slash = executable.find_last_of("/\\") + 1; std::string exe_file(executable.substr(last_slash, executable.find('.', last_slash) - last_slash)); if (exe_file.empty()) exe_file = "BS_thread_pool_test"; // Create a log file using the name of the executable, followed by the current date and time. const std::string log_filename = exe_file + "-" + get_time() + ".log"; log_file.open(log_filename); if (log_file.is_open()) { logln_ansi(ansi_info, "Generating log file: ", log_filename); sync_log.add_stream(log_file); } else { logln_ansi(ansi_error, "ERROR: Could not create a log file."); return 1; } } use_stdout = args["stdout"]; use_log = args["log"]; show_intro(); if (args["tests"]) { print_header("Checking the constructor:"); check_constructor(); print_header("Checking reset():"); check_reset(); print_header("Checking detach_task() and submit_task():"); check_task("detach_task()"); check_task("submit_task()"); print_header("Checking submission of member functions as tasks:"); check_member_function(); check_member_function_within_object(); print_header("Checking submission of different callable types:"); check_callables(); print_header("Checking wait(), wait_for(), and wait_until():"); check_wait(); check_wait_blocks(); check_wait_for(); check_wait_until(); check_wait_multiple_deadlock(); #ifdef __cpp_exceptions check_wait_self_deadlock(); print_header("Checking exception handling:"); check_exceptions_submit(); check_exceptions_multi_future(); #else logln_ansi(ansi_info, "NOTE: Exceptions are disabled, skipping wait deadlock check and exception handling tests."); #endif print_header("Checking detach_loop() and submit_loop():"); check_loop(); print_header("Checking detach_blocks() and submit_blocks():"); check_blocks(); print_header("Checking detach_sequence() and submit_sequence():"); check_sequence(); print_header("Checking task monitoring:"); check_task_monitoring(); print_header("Checking pausing:"); check_pausing(); print_header("Checking purge():"); check_purge(); print_header("Checking parallelized vector operations:"); check_vectors(); print_header("Checking task priority:"); check_priority(); print_header("Checking thread initialization/cleanup functions and BS::this_thread:"); check_init(); check_cleanup(); check_get_pool(); print_header("Checking that parallelized tasks do not get copied:"); check_copy_all(); print_header("Checking that shared pointers are correctly shared:"); check_shared_ptr_all(); print_header("Checking that tasks are destructed immediately after running:"); check_task_destruct(); print_header("Checking BS::common_index_type:"); check_common_index_type(); #ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS print_header("Checking native extensions:"); #ifndef _WIN32 if ((args["benchmarks"] || args["plot"]) && !BS::set_os_process_priority(BS::os_process_priority::realtime)) { logln_ansi(ansi_info, "NOTE: Skipping process/thread priority checks since the test is running on Linux/macOS without root privileges and benchmarks are enabled. On Linux/macOS, if priorities are decreased, they cannot be increased back to normal without root privileges, so the process will be stuck on the lowest priority, and the benchmarks will be unreliable.\n"); } else #endif { // Note: We have to check thread priorities first, because the check for process priorities lowers the priority of the process to the lowest level, and on Linux, if not running as root, we can only lower the priority, not raise it, so the process gets stuck on the lowest priority. Since the thread priorities cannot be set to higher than the process priorities, this means the thread priorities will also be stuck on the lowest priority, and the test will fail. check_os_thread_priorities(); logln(); check_os_process_priorities(); logln(); } check_os_thread_names(); logln(); #if defined(_WIN32) || defined(__linux__) check_os_thread_affinity(); logln(); check_os_process_affinity(); #else logln_ansi(ansi_info, "NOTE: macOS does not support affinity, skipping the corresponding test."); #endif #else logln_ansi(ansi_info, "NOTE: Native extensions disabled, skipping the corresponding test."); #endif } if (args["deadlock"]) { print_header("Checking for deadlocks:"); logln("Checking for destruction deadlocks..."); check_deadlock( [] { BS::thread_pool temp_pool; temp_pool.detach_task([] {}); }); logln("Checking for reset deadlocks..."); BS::thread_pool temp_pool; check_deadlock( [&temp_pool] { temp_pool.reset(); }); } if (test_results::tests_failed > 0) { print_header("FAILURE: Passed " + std::to_string(test_results::tests_succeeded) + " checks, but failed " + std::to_string(test_results::tests_failed) + "!", '+', ansi_error); logln_ansi(ansi_error, "\nPlease submit a bug report at https://github.com/bshoshany/thread-pool/issues including the exact specifications of your system (OS, CPU, compiler, etc.) and the generated log file."); log_file.close(); return static_cast(test_results::tests_failed); } if (test_results::tests_succeeded > 0) print_header("SUCCESS: Passed all " + std::to_string(test_results::tests_succeeded) + " checks!", '+', ansi_success); if (args["benchmarks"] || args["plot"]) check_performance(args["benchmarks"], args["plot"], args["save"]); log_file.close(); return 0; #ifdef __cpp_exceptions } catch (const std::exception& e) { logln_ansi(ansi_error, "ERROR: Tests failed due to exception: ", e.what()); return 1; } #endif }