pax_global_header00006660000000000000000000000064145723702330014520gustar00rootroot0000000000000052 comment=6c3b84670e065d8cf81ce7146c740881c2623a8d performance-test-0.8.0/000077500000000000000000000000001457237023300150035ustar00rootroot00000000000000performance-test-0.8.0/.clang-format000066400000000000000000000053651457237023300173670ustar00rootroot00000000000000--- Language: Cpp # BasedOnStyle: LLVM AccessModifierOffset: -2 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false AlignEscapedNewlinesLeft: false AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true BinPackArguments: true BinPackParameters: true BraceWrapping: AfterClass: false AfterControlStatement: false AfterEnum: false AfterFunction: false AfterNamespace: false AfterObjCDeclaration: false AfterStruct: false AfterUnion: false BeforeCatch: false BeforeElse: false IndentBraces: false BreakBeforeBinaryOperators: All BreakBeforeBraces: Allman BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true ColumnLimit: 80 CommentPragmas: '^ IWYU pragma:' ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DerivePointerAlignment: false DisableFormat: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IncludeCategories: - Regex: '^"(llvm|llvm-c|clang|clang-c)/' Priority: 2 - Regex: '^(<|"(gtest|isl|json)/)' Priority: 3 - Regex: '.*' Priority: 1 IncludeIsMainRegex: '$' IndentCaseLabels: false IndentWidth: 2 IndentWrappedFunctionNames: false JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left ReflowComments: true SortIncludes: true SpaceAfterCStyleCast: false SpaceAfterTemplateKeyword: true SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Cpp11 TabWidth: 8 UseTab: Never ... performance-test-0.8.0/.github/000077500000000000000000000000001457237023300163435ustar00rootroot00000000000000performance-test-0.8.0/.github/FUNDING.yml000066400000000000000000000002141457237023300201550ustar00rootroot00000000000000# These are supported funding model platforms github: FEniCS # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] performance-test-0.8.0/.github/workflows/000077500000000000000000000000001457237023300204005ustar00rootroot00000000000000performance-test-0.8.0/.github/workflows/ccpp.yml000066400000000000000000000137671457237023300220660ustar00rootroot00000000000000name: FEniCS Performance Test CI on: push: branches: - "**" pull_request: branches: - main schedule: # * is a special character in YAML so you have to quote this string - cron: "0 3 * * 0,3" jobs: build: runs-on: ubuntu-latest container: fenicsproject/test-env:current-openmpi env: PETSC_ARCH: linux-gnu-real64-64 OMPI_ALLOW_RUN_AS_ROOT: 1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1 steps: - uses: actions/checkout@v4 - name: Get DOLFINx uses: actions/checkout@v4 with: path: ./dolfinx repository: FEniCS/dolfinx ref: main - name: Install FEniCS Python components run: | apt-get -qq update apt-get -y install libboost-program-options-dev pip3 install pip --upgrade pip3 install git+https://github.com/FEniCS/ufl.git pip3 install git+https://github.com/FEniCS/basix.git pip3 install git+https://github.com/FEniCS/ffcx - name: Build dolfinx cpp run: | cmake -G Ninja -DCMAKE_BUILD_TYPE=Developer -B build -S dolfinx/cpp/ cmake --build build cmake --install build - name: Build performance test run: | cmake -G Ninja -DCMAKE_BUILD_TYPE=Developer -B build-dir -S src cmake --build build-dir cmake --install build-dir - name: Run Poisson test (BoomerAMG, serial) run: | dolfinx-scaling-test \ --problem_type poisson \ --scaling_type weak \ --ndofs 50000 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type hypre \ -pc_hypre_type boomeramg \ -pc_hypre_boomeramg_strong_threshold 0.7 \ -pc_hypre_boomeramg_agg_nl 4 \ -pc_hypre_boomeramg_agg_num_paths 2 - name: Run Poisson test (BoomerAMG, weak) run: | mpirun -np 2 dolfinx-scaling-test \ --problem_type poisson \ --scaling_type weak \ --ndofs 50000 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type hypre \ -pc_hypre_type boomeramg \ -pc_hypre_boomeramg_strong_threshold 0.7 \ -pc_hypre_boomeramg_agg_nl 4 \ -pc_hypre_boomeramg_agg_num_paths 2 - name: Run Poisson test (BoomerAMG, 3rd order, weak) run: | mpirun -np 2 dolfinx-scaling-test \ --problem_type poisson \ --scaling_type weak \ --ndofs 50000 \ --order 3 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type hypre \ -pc_hypre_type boomeramg \ -pc_hypre_boomeramg_strong_threshold 0.7 \ -pc_hypre_boomeramg_agg_nl 4 \ -pc_hypre_boomeramg_agg_num_paths 2 - name: Run Poisson test (BoomerAMG, weak, unstructured mesh) run: | mpirun -np 2 dolfinx-scaling-test \ --problem_type poisson \ --mesh_type unstructured \ --scaling_type weak \ --ndofs 50000 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type hypre \ -pc_hypre_type boomeramg \ -pc_hypre_boomeramg_strong_threshold 0.7 \ -pc_hypre_boomeramg_agg_nl 4 \ -pc_hypre_boomeramg_agg_num_paths 2 - name: Run Poisson test (BoomerAMG, strong) run: | mpirun -np 2 dolfinx-scaling-test \ --problem_type poisson \ --scaling_type strong \ --ndofs 1000000 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type hypre \ -pc_hypre_type boomeramg \ -pc_hypre_boomeramg_strong_threshold 0.7 \ -pc_hypre_boomeramg_agg_nl 4 \ -pc_hypre_boomeramg_agg_num_paths 2 - name: Run elasticity test (GAMG, serial) run: | dolfinx-scaling-test \ --problem_type elasticity \ --scaling_type weak \ --ndofs 100000 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type gamg \ -pc_gamg_coarse_eq_limit 1000 \ -mg_levels_ksp_type chebyshev \ -mg_levels_pc_type jacobi \ -mg_levels_esteig_ksp_type cg \ -matptap_via scalable - name: Run elasticity test (GAMG, weak) run: | mpirun -np 2 dolfinx-scaling-test \ --problem_type elasticity \ --scaling_type weak \ --ndofs 100000 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type gamg \ -pc_gamg_coarse_eq_limit 1000 \ -mg_levels_ksp_type chebyshev \ -mg_levels_pc_type jacobi \ -mg_levels_esteig_ksp_type cg \ -matptap_via scalable - name: Run elasticity test (GAMG, 3rd order, weak) run: | mpirun -np 2 dolfinx-scaling-test \ --problem_type elasticity \ --scaling_type weak \ --ndofs 100000 \ --order 3 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type gamg \ -pc_gamg_coarse_eq_limit 1000 \ -mg_levels_ksp_type chebyshev \ -mg_levels_pc_type jacobi \ -mg_levels_esteig_ksp_type cg \ -matptap_via scalable - name: Run elasticity test (GAMG, strong) run: | mpirun -np 2 dolfinx-scaling-test \ --problem_type elasticity \ --scaling_type strong \ --ndofs 500000 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type gamg \ -pc_gamg_coarse_eq_limit 1000 \ -mg_levels_ksp_type chebyshev \ -mg_levels_pc_type jacobi \ -mg_levels_esteig_ksp_type cg \ -matptap_via scalable performance-test-0.8.0/.gitignore000066400000000000000000000000341457237023300167700ustar00rootroot00000000000000build .vscode .devcontainerperformance-test-0.8.0/Dockerfile000066400000000000000000000036131457237023300170000ustar00rootroot00000000000000# Builds a Docker image with the necessary libraries for compiling # FEniCS. The image is at # https://hub.docker.com/r/fenicsproject/performance-tests # # Authors: Garth N. Wells ARG PETSC_VERSION=3.12.4 FROM ubuntu:20.04 WORKDIR /tmp # Environment variables ENV OPENBLAS_NUM_THREADS=1 # Non-Python utilities and libraries RUN apt-get -qq update && \ apt-get -y --with-new-pkgs \ -o Dpkg::Options::="--force-confold" upgrade && \ apt-get -y install \ bison \ clang \ cmake \ flex \ g++ \ gfortran \ git \ libboost-filesystem-dev \ libboost-iostreams-dev \ libboost-math-dev \ libboost-program-options-dev \ libboost-system-dev \ libboost-thread-dev \ libboost-timer-dev \ liblapack-dev \ libmpich-dev \ libopenblas-dev \ libhdf5-mpich-dev \ mpich \ ninja-build \ python3 \ python3-dev \ pkg-config \ wget && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # Install PETSc from source ARG PETSC_VERSION RUN git clone --branch v${PETSC_VERSION} --depth 1 https://gitlab.com/petsc/petsc.git && \ cd petsc && \ python3 ./configure --with-64-bit-indices=0 \ --COPTFLAGS="-O3" \ --CXXOPTFLAGS="-O3" \ --FOPTFLAGS="-O3" \ --with-c-support \ --with-fortran-bindings=no \ --with-debugging=0 \ --with-shared-libraries \ --download-hypre \ --download-ptscotch \ --prefix=/usr/local/petsc-32 && \ make && \ make install && \ git clean -fdx . && \ python3 ./configure --with-64-bit-indices=1 \ --COPTFLAGS="-O3" \ --CXXOPTFLAGS="-O3" \ --FOPTFLAGS="-O3" \ --with-c-support \ --with-fortran-bindings=no \ --with-debugging=0 \ --with-shared-libraries \ --download-hypre \ --download-ptscotch \ --prefix=/usr/local/petsc-64 && \ make && \ make install && \ rm -rf /tmp/* performance-test-0.8.0/LICENSE.md000066400000000000000000000020651457237023300164120ustar00rootroot00000000000000Copyright 2017 Chris N. Richardson and Garth N. Wells Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.performance-test-0.8.0/README.md000066400000000000000000000100331457237023300162570ustar00rootroot00000000000000# Performance test codes for FEniCSx/DOLFINx This repository contains solvers for testing the parallel performance of DOLFINx and the underlying linear solvers. It tests elliptic equations - Poisson equation and elasticity - in three dimensions. Representative performance data is available at https://fenics.github.io/performance-test-results/. [![FEniCS Performance Test CI](https://github.com/FEniCS/performance-test/workflows/FEniCS%20Performance%20Test%20CI/badge.svg)](https://github.com/FEniCS/performance-test/actions?query=branch%3Amain) ## Building The source of the tests is in `src/` directory. ### Requirements - FEniCSx/DOLFINx installation (development version of DOLFINx **required**) - PETSc installation - Boost Program Options ### Compilation In the `src/` directory, build the program: cmake . make ## Running tests Options for the test are: - Problem type (`--problem_type`): `poisson` or `elasticity` - Scaling type (`--scaling_type`): `strong` (fixed problem size) or `weak` (fixed problem size per process) - Number of degrees-of-freedom (`--ndofs`): total (in case of strong scaling) or per process (for weak scaling) - Order (`--order`): polynomial order (1, 2, or 3) - only on cube mesh, defaults to 1. - File output (`--output`): `true` or `false` (IO performance depends heavily on the underlying filesystem) - Data output directory (`--output_dir`): directory to write solution data to Linear solver options are configured via PETSc command line options, (single hyphen) as shown below. ## Recommended test configuration Suggested options for running tests are listed below. The options include PETSc performance logging which is useful for assessing performance. ### Elasticity For elasticity, a conjugate gradient (CG) solver with a smoothed aggregation algebraic multigrid (GAMG) preconditioner is recommended. For a weak scaling test with 8 MPI processes and 500k degrees-of-freedom per process: ``` mpirun -np 8 ./dolfinx-scaling-test \ --problem_type elasticity \ --scaling_type weak \ --ndofs 500000 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type gamg \ -pc_gamg_coarse_eq_limit 1000 \ -mg_levels_ksp_type chebyshev \ -mg_levels_pc_type jacobi \ -mg_levels_esteig_ksp_type cg \ -matptap_via scalable \ -options_left ``` For a strong scaling test, with 8 MPI processes and 10M degrees-of-freedom in total: ``` mpirun -np 8 ./dolfinx-scaling-test \ --problem_type elasticity \ --scaling_type strong \ --ndofs 10000000 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type gamg \ -pc_gamg_coarse_eq_limit 1000 \ -mg_levels_ksp_type chebyshev \ -mg_levels_pc_type jacobi \ -mg_levels_esteig_ksp_type cg \ -matptap_via scalable \ -options_left ``` ### Poisson For the Poisson equation, a conjugate gradient (CG) solver with a classical algebraic multigrid (BoomerAMG) preconditioner is recommended. For a weak scaling test with 8 MPI processes and 500k degrees-of-freedom per process: ``` mpirun -np 8 ./dolfinx-scaling-test \ --problem_type poisson \ --scaling_type weak \ --ndofs 500000 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type hypre \ -pc_hypre_type boomeramg \ -pc_hypre_boomeramg_strong_threshold 0.7 \ -pc_hypre_boomeramg_agg_nl 4 \ -pc_hypre_boomeramg_agg_num_paths 2 \ -options_left ``` For a strong scaling test, with 8 MPI processes and 10M degrees-of-freedom in total: ``` mpirun -np 8 ./dolfinx-scaling-test \ --problem_type poisson \ --scaling_type strong \ --ndofs 10000000 \ -log_view \ -ksp_view \ -ksp_type cg \ -ksp_rtol 1.0e-8 \ -pc_type hypre \ -pc_hypre_type boomeramg \ -pc_hypre_boomeramg_strong_threshold 0.7 \ -pc_hypre_boomeramg_agg_nl 4 \ -pc_hypre_boomeramg_agg_num_paths 2 \ -options_left ``` ## Reference performance data Reference performance data is provided [here](performance.md) to help in assessing performance on a given system. ## Authors and license The tests have been developed by Chris N. Richardson () and Garth N. Wells (). The code is covered by the MIT license. See LICENSE.md. performance-test-0.8.0/performance.md000066400000000000000000000003561457237023300176320ustar00rootroot00000000000000# Performance test results * [Some test results on CSD3 (University of Cambridge HPC system)](https://fenics.github.io/performance-test-results) up to 512 cores on 16 nodes. [Raw data](https://github.com/FEniCS/performance-test-results) performance-test-0.8.0/src/000077500000000000000000000000001457237023300155725ustar00rootroot00000000000000performance-test-0.8.0/src/CMakeLists.txt000066400000000000000000000027671457237023300203460ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.16) set(PROJECT_NAME dolfinx-scaling-test) project(${PROJECT_NAME}) include(GNUInstallDirs) set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) # Use C++20 set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) # Get DOLFIN configuration data (DOLFINConfig.cmake must be in # DOLFIN_CMAKE_CONFIG_PATH) find_package(DOLFINX REQUIRED) # set(CMAKE_BUILD_TYPE "Release") set(CMAKE_CXX_FLAGS "-Ofast ${CMAKE_CXX_FLAGS} -g -Wall") set(CMAKE_C_FLAGS "-Ofast ${CMAKE_C_FLAGS} -g -Wall") # Compile UFL files add_custom_command( OUTPUT Poisson.c COMMAND ffcx ${CMAKE_CURRENT_SOURCE_DIR}/Poisson.py DEPENDS Poisson.py ) add_custom_command( OUTPUT Elasticity.c COMMAND ffcx ${CMAKE_CURRENT_SOURCE_DIR}/Elasticity.py DEPENDS Elasticity.py ) set(CMAKE_INCLUDE_CURRENT_DIR ON) # Executable add_executable(${PROJECT_NAME} main.cpp mesh.cpp elasticity_problem.cpp cgpoisson_problem.cpp poisson_problem.cpp mem.cpp ${CMAKE_CURRENT_BINARY_DIR}/Elasticity.c ${CMAKE_CURRENT_BINARY_DIR}/Poisson.c) # Find Boost program_options if(DEFINED ENV{BOOST_ROOT} OR DEFINED BOOST_ROOT) set(Boost_NO_SYSTEM_PATHS on) endif() # set(Boost_USE_MULTITHREADED $ENV{BOOST_USE_MULTITHREADED}) set(Boost_VERBOSE TRUE) find_package(Boost 1.70 REQUIRED program_options) # Target libraries target_link_libraries(${PROJECT_NAME} dolfinx Boost::program_options pthread) message(STATUS ${CMAKE_CXX_FLAGS}) install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) performance-test-0.8.0/src/Elasticity.py000066400000000000000000000022661457237023300202640ustar00rootroot00000000000000# Copyright (C) 2017-2022 Chris N. Richardson and Garth N. Wells # # This file is part of FEniCS-miniapp (https://www.fenicsproject.org) # # SPDX-License-Identifier: MIT import basix.ufl from ufl import (Coefficient, Identity, FunctionSpace, Mesh, TestFunction, TrialFunction, dx, grad, inner, tetrahedron, tr) # Elasticity parameters E = 1.0e6 nu = 0.3 mu = E / (2.0 * (1.0 + nu)) lmbda = E * nu / ((1.0 + nu) * (1.0 - 2.0 * nu)) cell = tetrahedron # Load namespace ns = vars() forms = [] for degree in range(1, 4): element = basix.ufl.element("Lagrange", "tetrahedron", degree, shape=(3, )) domain = Mesh(basix.ufl.element("Lagrange", "tetrahedron", 1, shape=(3, ))) space = FunctionSpace(domain, element) u, v = TrialFunction(space), TestFunction(space) f = Coefficient(space) def eps(v): return 0.5*(grad(v) + grad(v).T) def sigma(v): return 2.0*mu*eps(v) + lmbda*tr(eps(v))*Identity(3) # Add forms to namespace with names a1, a2, a3 etc. aname = 'a' + str(degree) Lname = 'L' + str(degree) ns[aname] = inner(sigma(u), eps(v))*dx ns[Lname] = inner(f, v)*dx del u, v, f forms += [ns[aname], ns[Lname]] performance-test-0.8.0/src/Poisson.py000066400000000000000000000022531457237023300176000ustar00rootroot00000000000000# Copyright (C) 2017-2022 Chris N. Richardson and Garth N. Wells # # This file is part of FEniCS-miniapp (https://www.fenicsproject.org) # # SPDX-License-Identifier: MIT import basix.ufl from ufl import (Coefficient, FunctionSpace, TestFunction, TrialFunction, Mesh, action, ds, dx, grad, inner, tetrahedron) # Load namespace ns = vars() forms = [] for degree in range(1, 4): element = basix.ufl.element("Lagrange", "tetrahedron", degree) domain = Mesh(basix.ufl.element("Lagrange", "tetrahedron", 1, shape=(3,))) space = FunctionSpace(domain, element) u = TrialFunction(space) v = TestFunction(space) f = Coefficient(space) g = Coefficient(space) un = Coefficient(space) aname = 'a' + str(degree) Lname = 'L' + str(degree) Mname = 'M' + str(degree) # Insert into namespace so that the forms will be named a1, a2, a3 etc. ns[aname] = inner(grad(u), grad(v))*dx ns[Lname] = f*v*dx + g*v*ds ns[Mname] = action(ns[aname], un) # Delete, so that the forms will get unnamed args and coefficients # and default to v_0, v_1, w0, w1 etc. del u, v, f, g, un forms += [ns[aname], ns[Lname], ns[Mname]] performance-test-0.8.0/src/cg.h000066400000000000000000000044201457237023300163340ustar00rootroot00000000000000// Copyright (C) 2021 Igor A. Baratta, Chris Richardson // SPDX-License-Identifier: MIT #include #include #include #include using namespace dolfinx; namespace linalg { /// Compute vector r = alpha*x + y /// @param[out] r Result /// @param[in] alpha /// @param[in] x /// @param[in] y template void axpy(la::Vector& r, U alpha, const la::Vector& x, const la::Vector& y) { std::transform(x.array().begin(), x.array().end(), y.array().begin(), r.mutable_array().begin(), [alpha](auto x, auto y) { return alpha * x + y; }); } /// Solve problem A.x = b using the Conjugate Gradient method /// @tparam U The scalar type /// @tparam ApplyFunction Type of the function object "action" /// @param[in, out] x Solution vector, may be set to an initial guess /// @param[in] b RHS Vector /// @param[in] action Function that provides the action of the linear operator /// @param[in] kmax Maximum number of iterations /// @param[in] rtol Relative tolerances for convergence /// @return The number if iterations /// @pre It is required that the ghost values of `x` and `b` have been /// updated before this function is called template int cg(la::Vector& x, const la::Vector& b, ApplyFunction&& action, int kmax = 50, double rtol = 1e-8) { // Create working vectors la::Vector r(b), y(b); // Compute initial residual r0 = b - Ax0 action(x, y); axpy(r, U(-1), y, b); // Create p work vector la::Vector p(r); // Iterations of CG auto rnorm0 = la::squared_norm(r); const auto rtol2 = rtol * rtol; auto rnorm = rnorm0; int k = 0; while (k < kmax) { ++k; // Compute y = A p action(p, y); // Compute alpha = r.r/p.y const U alpha = rnorm / la::inner_product(p, y); // Update x (x <- x + alpha*p) axpy(x, alpha, p, x); // Update r (r <- r - alpha*y) axpy(r, -alpha, y, r); // Update residual norm const auto rnorm_new = la::squared_norm(r); const U beta = rnorm_new / rnorm; rnorm = rnorm_new; if (rnorm / rnorm0 < rtol2) break; // Update p (p <- beta*p + r) axpy(p, beta, p, r); } return k; } } // namespace linalg performance-test-0.8.0/src/cgpoisson_problem.cpp000066400000000000000000000176261457237023300220360ustar00rootroot00000000000000// Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells // // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) // // SPDX-License-Identifier: MIT #include "cgpoisson_problem.h" #include "Poisson.h" #include "cg.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace dolfinx; using T = PetscScalar; namespace { void pack_fn(std::span in, std::span idx, std::span out) { for (std::size_t i = 0; i < idx.size(); ++i) out[i] = in[idx[i]]; } void unpack_fn(std::span in, std::span idx, std::span out, std::function op) { for (std::size_t i = 0; i < idx.size(); ++i) out[idx[i]] = op(out[idx[i]], in[i]); } } // namespace std::tuple>, std::shared_ptr>, std::function&, const la::Vector&)>> cgpoisson::problem(std::shared_ptr> mesh, int order, std::string scatterer) { common::Timer t0("ZZZ FunctionSpace"); std::vector fs_poisson_a = {functionspace_form_Poisson_a1, functionspace_form_Poisson_a2, functionspace_form_Poisson_a3}; auto V = std::make_shared>( fem::create_functionspace(*fs_poisson_a.at(order - 1), "v_0", mesh)); t0.stop(); common::Timer t1("ZZZ Assemble"); common::Timer t2("ZZZ Create boundary conditions"); // Define boundary condition auto u0 = std::make_shared>(V); u0->x()->set(0); // Find facets with bc applied const int tdim = mesh->topology()->dim(); const std::vector bc_facets = mesh::locate_entities( *mesh, tdim - 1, [](auto x) { constexpr double eps = 1.0e-8; std::vector marker(x.extent(1), false); for (std::size_t p = 0; p < x.extent(1); ++p) { double x0 = x(0, p); if (std::abs(x0) < eps or std::abs(x0 - 1) < eps) marker[p] = true; } return marker; }); // Find constrained dofs const std::vector bdofs = fem::locate_dofs_topological( *V->mesh()->topology_mutable(), *V->dofmap(), tdim - 1, bc_facets); auto bc = std::make_shared>(u0, bdofs); t2.stop(); // Define coefficients common::Timer t3("ZZZ Create RHS function"); auto f = std::make_shared>(V); auto g = std::make_shared>(V); f->interpolate( [](auto x) -> std::pair, std::vector> { std::vector v(x.extent(1)); for (std::size_t p = 0; p < x.extent(1); ++p) { double dx = x(0, p) - 0.5; double dy = x(1, p) - 0.5; double dr = dx * dx + dy * dy; v[p] = 10 * std::exp(-dr / 0.02); } return {std::move(v), {v.size()}}; }); g->interpolate( [](auto x) -> std::pair, std::vector> { std::vector f(x.extent(1)); for (std::size_t p = 0; p < x.extent(1); ++p) f[p] = std::sin(5 * x(0, p)); return {f, {f.size()}}; }); t3.stop(); std::vector form_poisson_L = {form_Poisson_L1, form_Poisson_L2, form_Poisson_L3}; std::vector form_poisson_a = {form_Poisson_a1, form_Poisson_a2, form_Poisson_a3}; std::vector form_poisson_M = {form_Poisson_M1, form_Poisson_M2, form_Poisson_M3}; // Define variational forms auto L = std::make_shared>(fem::create_form( *form_poisson_L.at(order - 1), {V}, {{"w0", f}, {"w1", g}}, {}, {})); // auto a = std::make_shared>(fem::create_form( // *form_poisson_a.at(order - 1), {V, V}, // std::vector>>{}, {}, {})); auto un = std::make_shared>(V); auto M = std::make_shared>(fem::create_form( *form_poisson_M.at(order - 1), {V}, {{"w0", un}}, {{}}, {})); // Create la::Vector la::Vector b(L->function_spaces()[0]->dofmap()->index_map, L->function_spaces()[0]->dofmap()->index_map_bs()); b.set(0); common::Timer t5("ZZZ Assemble vector"); const std::vector constants_L = fem::pack_constants(*L); auto coeffs_L = fem::allocate_coefficient_storage(*L); fem::pack_coefficients(*L, coeffs_L); fem::assemble_vector(b.mutable_array(), *L, constants_L, fem::make_coefficients_span(coeffs_L)); // Apply lifting to account for Dirichlet boundary condition // b <- b - A * x_bc fem::set_bc(un->x()->mutable_array(), {bc}, -1.0); fem::assemble_vector(b.mutable_array(), *M); // Communicate ghost values b.scatter_rev(std::plus()); // Set BC dofs to zero (effectively zeroes columns of A) fem::set_bc(b.mutable_array(), {bc}, 0.0); b.scatter_fwd(); // Pack coefficients and constants if (un->x()->array().size() != b.array().size()) throw std::runtime_error("error"); // Create Function to hold solution auto u = std::make_shared>(V); std::function&, const la::Vector&)> solver_function = [M, un, bc, scatterer](fem::Function& u, const la::Vector& b) { const std::vector constants; auto coeff = fem::allocate_coefficient_storage(*M); auto V = M->function_spaces()[0]; auto idx_map = V->dofmap()->index_map; int bs = V->dofmap()->bs(); common::Scatterer sct(*idx_map, bs); std::vector local_buffer(sct.local_buffer_size(), 0); std::vector remote_buffer(sct.remote_buffer_size(), 0); common::Scatterer<>::type type; if (scatterer == "neighbor") type = common::Scatterer<>::type::neighbor; if (scatterer == "p2p") type = common::Scatterer<>::type::p2p; std::vector request = sct.create_request_vector(type); // Create function for computing the action of A on x (y = Ax) auto action = [&](la::Vector& x, la::Vector& y) { // Zero y y.set(0.0); // Update coefficient un (just copy data from x to un) std::copy(x.array().begin(), x.array().end(), un->x()->mutable_array().begin()); // Compute action of A on x fem::pack_coefficients(*M, coeff); fem::assemble_vector(y.mutable_array(), *M, std::span(constants), fem::make_coefficients_span(coeff)); // Set BC dofs to zero (effectively zeroes rows of A) fem::set_bc(y.mutable_array(), {bc}, 0.0); // Accumuate ghost values // y.scatter_rev(std::plus()); const std::int32_t local_size = bs * idx_map->size_local(); const std::int32_t num_ghosts = bs * idx_map->num_ghosts(); std::span remote_data(y.mutable_array().data() + local_size, num_ghosts); std::span local_data(y.mutable_array().data(), local_size); sct.scatter_rev_begin(remote_data, remote_buffer, local_buffer, pack_fn, request, type); sct.scatter_rev_end(local_buffer, local_data, unpack_fn, std::plus(), request); // Update ghost values sct.scatter_fwd_begin(local_data, local_buffer, remote_buffer, pack_fn, request, type); sct.scatter_fwd_end(remote_buffer, remote_data, unpack_fn, request); }; int num_it = linalg::cg(*u.x(), b, action, 100, 1e-6); return num_it; }; return {std::make_shared>(std::move(b)), u, solver_function}; } performance-test-0.8.0/src/cgpoisson_problem.h000066400000000000000000000013661457237023300214750ustar00rootroot00000000000000// Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells // // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) // // SPDX-License-Identifier: MIT #pragma once #include #include #include #include #include #include namespace cgpoisson { std::tuple>, std::shared_ptr>, std::function&, const dolfinx::la::Vector&)>> problem(std::shared_ptr> mesh, int order, std::string scatterer); } // namespace poisson performance-test-0.8.0/src/elasticity_problem.cpp000066400000000000000000000207601457237023300221750ustar00rootroot00000000000000// Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells // // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) // // SPDX-License-Identifier: MIT #include "elasticity_problem.h" #include "Elasticity.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace dolfinx; using T = PetscScalar; namespace { // Function to compute the near nullspace for elasticity - it is made up // of the six rigid body modes MatNullSpace build_near_nullspace(const fem::FunctionSpace& V) { // Create vectors for nullspace basis auto map = V.dofmap()->index_map; int bs = V.dofmap()->index_map_bs(); std::vector> basis(6, la::Vector(map, bs)); // x0, x1, x2 translations std::int32_t length_block = map->size_local() + map->num_ghosts(); for (int k = 0; k < 3; ++k) { std::span x = basis[k].mutable_array(); for (std::int32_t i = 0; i < length_block; ++i) x[bs * i + k] = 1.0; } // Rotations auto x3 = basis[3].mutable_array(); auto x4 = basis[4].mutable_array(); auto x5 = basis[5].mutable_array(); const std::vector x = V.tabulate_dof_coordinates(false); const std::int32_t* dofs = V.dofmap()->map().data_handle(); for (std::size_t i = 0; i < V.dofmap()->map().size(); ++i) { std::span xd(x.data() + 3 * dofs[i], 3); x3[bs * dofs[i] + 0] = -xd[1]; x3[bs * dofs[i] + 1] = xd[0]; x4[bs * dofs[i] + 0] = xd[2]; x4[bs * dofs[i] + 2] = -xd[0]; x5[bs * dofs[i] + 2] = xd[1]; x5[bs * dofs[i] + 1] = -xd[2]; } // Orthonormalize basis la::orthonormalize(std::vector>>( basis.begin(), basis.end())); if (!la::is_orthonormal( std::vector>>( basis.begin(), basis.end()))) { throw std::runtime_error("Space not orthonormal"); } // Build PETSc nullspace object std::int32_t length = bs * map->size_local(); std::vector> basis_local; std::transform(basis.cbegin(), basis.cend(), std::back_inserter(basis_local), [length](auto& x) { return std::span(x.array().data(), length); }); MPI_Comm comm = V.mesh()->comm(); std::vector v = la::petsc::create_vectors(comm, basis_local); MatNullSpace ns = la::petsc::create_nullspace(comm, v); std::for_each(v.begin(), v.end(), [](auto v) { VecDestroy(&v); }); return ns; } } // namespace std::tuple>, std::shared_ptr>, std::function&, const la::Vector&)>> elastic::problem(std::shared_ptr> mesh, int order) { common::Timer t0("ZZZ FunctionSpace"); std::vector fs_elasticity = {functionspace_form_Elasticity_a1, functionspace_form_Elasticity_a2, functionspace_form_Elasticity_a3}; auto V = std::make_shared>( fem::create_functionspace(*fs_elasticity.at(order - 1), "v_0", mesh)); t0.stop(); common::Timer t0a("ZZZ Create boundary conditions"); // Define boundary condition auto u0 = std::make_shared>(V); u0->x()->set(0); const int tdim = mesh->topology()->dim(); // Find facets with bc applied const std::vector bc_facets = mesh::locate_entities( *mesh, tdim - 1, [](auto x) { constexpr double eps = 1.0e-8; std::vector marker(x.extent(1), false); for (std::size_t p = 0; p < x.extent(1); ++p) { double x1 = x(1, p); if (std::abs(x1) < eps) marker[p] = true; } return marker; }); // Find constrained dofs const std::vector bdofs = fem::locate_dofs_topological( *V->mesh()->topology_mutable(), *V->dofmap(), tdim - 1, bc_facets); // Bottom (x[1] = 0) surface auto bc = std::make_shared>(u0, bdofs); t0a.stop(); common::Timer t0b("ZZZ Create RHS function"); // Define coefficients auto f = std::make_shared>(V); f->interpolate( [](auto x) -> std::pair, std::vector> { std::vector vdata(x.extent(0) * x.extent(1)); namespace stdex = MDSPAN_IMPL_STANDARD_NAMESPACE::MDSPAN_IMPL_PROPOSED_NAMESPACE; MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< T, MDSPAN_IMPL_STANDARD_NAMESPACE::extents< std::size_t, 3, MDSPAN_IMPL_STANDARD_NAMESPACE::dynamic_extent>> v(vdata.data(), x.extent(0), x.extent(1)); for (std::size_t p = 0; p < x.extent(1); ++p) { double dx = x(0, p) - 0.5; double dz = x(2, p) - 0.5; double r = std::sqrt(dx * dx + dz * dz); v(0, p) = -dz * r * x(1, p); v(1, p) = 1.0; v(2, p) = dx * r * x(1, p); } return {vdata, {v.extent(0), v.extent(1)}}; }); t0b.stop(); common::Timer t0c("ZZZ Create forms"); // Define variational forms std::vector form_elasticity_L = {form_Elasticity_L1, form_Elasticity_L2, form_Elasticity_L3}; std::vector form_elasticity_a = {form_Elasticity_a1, form_Elasticity_a2, form_Elasticity_a3}; auto L = std::make_shared>(fem::create_form( *form_elasticity_L.at(order - 1), {V}, {{"w0", f}}, {}, {})); auto a = std::make_shared>(fem::create_form( *form_elasticity_a.at(order - 1), {V, V}, {}, {}, {})); t0c.stop(); // Create matrices and vector, and assemble system std::shared_ptr A = std::make_shared( fem::petsc::create_matrix(*a), false); common::Timer t2("ZZZ Assemble matrix"); const std::vector constants_a = fem::pack_constants(*a); auto coeffs_a = fem::allocate_coefficient_storage(*a); fem::pack_coefficients(*a, coeffs_a); fem::assemble_matrix(la::petsc::Matrix::set_block_fn(A->mat(), ADD_VALUES), *a, std::span(constants_a), fem::make_coefficients_span(coeffs_a), {bc}); MatAssemblyBegin(A->mat(), MAT_FLUSH_ASSEMBLY); MatAssemblyEnd(A->mat(), MAT_FLUSH_ASSEMBLY); fem::set_diagonal(la::petsc::Matrix::set_fn(A->mat(), INSERT_VALUES), *V, {bc}); MatAssemblyBegin(A->mat(), MAT_FINAL_ASSEMBLY); MatAssemblyEnd(A->mat(), MAT_FINAL_ASSEMBLY); t2.stop(); // Wrap la::Vector with Petsc Vec la::Vector b(L->function_spaces()[0]->dofmap()->index_map, L->function_spaces()[0]->dofmap()->index_map_bs()); b.set(0); common::Timer t3("ZZZ Assemble vector"); const std::vector constants_L = fem::pack_constants(*L); auto coeffs_L = fem::allocate_coefficient_storage(*L); fem::pack_coefficients(*L, coeffs_L); fem::assemble_vector(b.mutable_array(), *L, constants_L, fem::make_coefficients_span(coeffs_L)); fem::apply_lifting(b.mutable_array(), {a}, {constants_L}, {fem::make_coefficients_span(coeffs_L)}, {{bc}}, {}, 1.0); b.scatter_rev(std::plus<>()); fem::set_bc(b.mutable_array(), {bc}); t3.stop(); common::Timer t4("ZZZ Create near-nullspace"); // Create Function to hold solution auto u = std::make_shared>(V); // Build near-nullspace and attach to matrix MatNullSpace ns = build_near_nullspace(*V); MatSetNearNullSpace(A->mat(), ns); MatNullSpaceDestroy(&ns); t4.stop(); std::function&, const la::Vector&)> solver_function = [A](fem::Function& u, const la::Vector& b) { // Create solver la::petsc::KrylovSolver solver(MPI_COMM_WORLD); solver.set_from_options(); solver.set_operator(A->mat()); // Wrap la::Vector la::petsc::Vector _b(la::petsc::create_vector_wrap(b), false); la::petsc::Vector x(la::petsc::create_vector_wrap(*u.x()), false); // Solve int num_iter = solver.solve(x.vec(), _b.vec()); return num_iter; }; return {std::make_shared>(std::move(b)), u, solver_function}; } performance-test-0.8.0/src/elasticity_problem.h000066400000000000000000000014061457237023300216360ustar00rootroot00000000000000// Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells // // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) // // SPDX-License-Identifier: MIT #pragma once #include #include #include #include #include namespace dolfinx::mesh { template class Mesh; } namespace elastic { std::tuple>, std::shared_ptr>, std::function&, const dolfinx::la::Vector&)>> problem(std::shared_ptr> mesh, int order); } // namespace elastic performance-test-0.8.0/src/main.cpp000066400000000000000000000211171457237023300172240ustar00rootroot00000000000000// Copyright (C) 2017-2022 Chris N. Richardson and Garth N. Wells // // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) // // SPDX-License-Identifier: MIT #include "cgpoisson_problem.h" #include "elasticity_problem.h" #include "mem.h" #include "mesh.h" #include "poisson_problem.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace po = boost::program_options; std::string int64_to_human(std::int64_t n) { double r = static_cast(n); const std::string name[] = {"", "thousand", "million", "billion", "trillion"}; int i = 0; while (r > 1000.0) { r /= 1000.0; i++; } if (i > 4) throw std::runtime_error("number too big"); std::stringstream s; if (i == 0) return s.str(); s << " (" << std::setprecision(3) << r << " " << name[i] << ")"; return s.str(); } void solve(int argc, char* argv[]) { po::options_description desc("Allowed options"); bool mem_profile; bool use_subcomm; desc.add_options()("help,h", "print usage message")( "problem_type", po::value()->default_value("poisson"), "problem (poisson, cgpoisson, or elasticity)")( "mesh_type", po::value()->default_value("cube"), "mesh (cube or unstructured)")( "memory_profiling", po::bool_switch(&mem_profile)->default_value(false), "turn on memory logging")( "subcomm_partition", po::bool_switch(&use_subcomm)->default_value(false), "Use sub-communicator for partitioning")( "scaling_type", po::value()->default_value("weak"), "scaling (weak or strong)")( "output", po::value()->default_value(""), "output directory (no output unless this is set)")( "ndofs", po::value()->default_value(50000), "number of degrees of freedom")( "order", po::value()->default_value(1), "polynomial order")( "scatterer", po::value()->default_value("neighbor"), "scatterer for CG (neighbor or p2p)"); po::variables_map vm; po::store(po::command_line_parser(argc, argv) .options(desc) .allow_unregistered() .run(), vm); po::notify(vm); if (vm.count("help")) { std::cout << desc << std::endl; ; return; } const std::string problem_type = vm["problem_type"].as(); const std::string mesh_type = vm["mesh_type"].as(); const std::string scaling_type = vm["scaling_type"].as(); const std::size_t ndofs = vm["ndofs"].as(); const int order = vm["order"].as(); const std::string scatterer = vm["scatterer"].as(); const std::string output_dir = vm["output"].as(); const bool output = (output_dir.size() > 0); const int mpi_rank = dolfinx::MPI::rank(MPI_COMM_WORLD); bool quit_flag = false; std::thread mem_thread; if (mem_profile and mpi_rank == 0) { mem_thread = std::thread(process_mem_usage, std::ref(quit_flag)); } bool strong_scaling; if (scaling_type == "strong") strong_scaling = true; else if (scaling_type == "weak") strong_scaling = false; else throw std::runtime_error("Scaling type '" + scaling_type + "` unknown"); // Get number of processes const std::size_t num_processes = dolfinx::MPI::size(MPI_COMM_WORLD); // Assemble problem std::shared_ptr> mesh; std::shared_ptr> b; std::shared_ptr> u; std::function&, const dolfinx::la::Vector&)> solver_function; const int ndofs_per_node = (problem_type == "elasticity") ? 3 : 1; dolfinx::common::Timer t0("ZZZ Create Mesh"); if (mesh_type == "cube") { mesh = std::make_shared>( create_cube_mesh(MPI_COMM_WORLD, ndofs, strong_scaling, ndofs_per_node, order, use_subcomm)); } else { mesh = create_spoke_mesh(MPI_COMM_WORLD, ndofs, strong_scaling, ndofs_per_node); } t0.stop(); dolfinx::common::Timer t_ent( "ZZZ Create facets and facet->cell connectivity"); mesh->topology_mutable()->create_entities(2); mesh->topology_mutable()->create_connectivity(2, 3); t_ent.stop(); if (problem_type == "poisson") { // Create Poisson problem std::tie(b, u, solver_function) = poisson::problem(mesh, order); } else if (problem_type == "cgpoisson") { // Create Poisson problem std::tie(b, u, solver_function) = cgpoisson::problem(mesh, order, scatterer); } else if (problem_type == "elasticity") { // Create elasticity problem. Near-nullspace will be attached to the // linear operator (matrix). std::tie(b, u, solver_function) = elastic::problem(mesh, order); } else throw std::runtime_error("Unknown problem type: " + problem_type); // Print simulation summary if (dolfinx::MPI::rank(MPI_COMM_WORLD) == 0) { char petsc_version[256]; PetscGetVersion(petsc_version, 256); const std::int64_t num_dofs = u->function_space()->dofmap()->index_map->size_global() * u->function_space()->dofmap()->index_map_bs(); const int tdim = mesh->topology()->dim(); const std::int64_t num_cells = mesh->topology()->index_map(tdim)->size_global(); const std::string num_cells_human = int64_to_human(num_cells); const std::string num_dofs_human = int64_to_human(num_dofs); std::cout << "----------------------------------------------------------------" << std::endl; std::cout << "Test problem summary" << std::endl; std::cout << " dolfinx version: " << DOLFINX_VERSION_STRING << std::endl; std::cout << " dolfinx hash: " << DOLFINX_VERSION_GIT << std::endl; std::cout << " ufl hash: " << UFCX_SIGNATURE << std::endl; std::cout << " petsc version: " << petsc_version << std::endl; std::cout << " Problem type: " << problem_type << std::endl; std::cout << " Scaling type: " << scaling_type << std::endl; std::cout << " Num processes: " << num_processes << std::endl; std::cout << " Num cells: " << num_cells << num_cells_human << std::endl; std::cout << " Total degrees of freedom: " << num_dofs << num_dofs_human << std::endl; std::cout << " Average degrees of freedom per process: " << num_dofs / dolfinx::MPI::size(MPI_COMM_WORLD) << std::endl; std::cout << "----------------------------------------------------------------" << std::endl; } dolfinx::common::Timer t5("ZZZ Solve"); int num_iter = solver_function(*u, *b); t5.stop(); if (output) { dolfinx::common::Timer t6("ZZZ Output"); std::string filename = output_dir + "/solution-" + std::to_string(num_processes) + ".xdmf"; dolfinx::io::XDMFFile file(MPI_COMM_WORLD, filename, "w"); file.write_mesh(*mesh); file.write_function(*u, 0.0); t6.stop(); } // Display timings dolfinx::list_timings(MPI_COMM_WORLD, {dolfinx::TimingType::wall}); // Report number of Krylov iterations double norm = dolfinx::la::norm(*(u->x())); if (dolfinx::MPI::rank(MPI_COMM_WORLD) == 0) { std::cout << "*** Number of Krylov iterations: " << num_iter << std::endl; std::cout << "*** Solution norm: " << norm << std::endl; } if (mem_profile and mpi_rank == 0) { quit_flag = true; mem_thread.join(); } } int main(int argc, char* argv[]) { dolfinx::common::Timer t0("Init MPI"); MPI_Init(&argc, &argv); t0.stop(); dolfinx::common::Timer t1("Init logging"); dolfinx::init_logging(argc, argv); t1.stop(); dolfinx::common::Timer t2("Init PETSc"); PetscInitialize(&argc, &argv, nullptr, nullptr); t2.stop(); // Set the logging thread name to show the process rank and enable on // rank 0 (add more here if desired) const int mpi_rank = dolfinx::MPI::rank(MPI_COMM_WORLD); std::string thread_name = "RANK: " + std::to_string(mpi_rank); loguru::set_thread_name(thread_name.c_str()); if (mpi_rank == 0) loguru::g_stderr_verbosity = loguru::Verbosity_INFO; solve(argc, argv); PetscFinalize(); MPI_Finalize(); return 0; } performance-test-0.8.0/src/mem.cpp000066400000000000000000000015311457237023300170540ustar00rootroot00000000000000// Copyright (C) 2021 Chris N. Richardson // // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) // // SPDX-License-Identifier: MIT #include #include #include #include #include #include #include #include #include #include void process_mem_usage(bool& quit) { loguru::set_thread_name("MEMORY"); const int page_size_bytes = sysconf(_SC_PAGE_SIZE); while(!quit) { std::ifstream f("/proc/self/stat", std::ios_base::in); std::istream_iterator it(f); std::advance(it, 21); std::size_t vsize, rss; f >> vsize >> rss; f.close(); LOG(WARNING) << "VSIZE=" << vsize/1024 << " RSS=" << rss*page_size_bytes/1024 ; std::this_thread::sleep_for(std::chrono::milliseconds(100)); } } performance-test-0.8.0/src/mem.h000066400000000000000000000003531457237023300165220ustar00rootroot00000000000000// Copyright (C) 2021 Chris N. Richardson // // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) // // SPDX-License-Identifier: MIT /// Thread to output memory usage to logger void process_mem_usage(bool& quit); performance-test-0.8.0/src/mesh.cpp000066400000000000000000000317141457237023300172400ustar00rootroot00000000000000// Copyright (C) 2019 Chris N. Richardson and Garth N. Wells // Licensed under the MIT License. See LICENSE file in the project // root for full license information. #include "mesh.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace { // Calculate number of vertices, edges, facets, and cells for any given // level of refinement constexpr std::tuple num_entities(std::int64_t i, std::int64_t j, std::int64_t k, int nrefine) { std::int64_t nv = (i + 1) * (j + 1) * (k + 1); std::int64_t ne = 0; std::int64_t nc = (i * j * k) * 6; std::int64_t earr[3] = {1, 3, 7}; std::int64_t farr[2] = {2, 12}; for (int r = 0; r < nrefine; ++r) { ne = earr[0] * (i + j + k) + earr[1] * (i * j + j * k + k * i) + earr[2] * i * j * k; nv += ne; nc *= 8; earr[0] *= 2; earr[1] *= 4; earr[2] *= 8; farr[0] *= 4; farr[1] *= 8; } ne = earr[0] * (i + j + k) + earr[1] * (i * j + j * k + k * i) + earr[2] * i * j * k; std::int64_t nf = farr[0] * (i * j + j * k + k * i) + farr[1] * i * j * k; return {nv, ne, nf, nc}; } std::int64_t num_pdofs(std::int64_t i, std::int64_t j, std::int64_t k, int nrefine, int order) { auto [nv, ne, nf, nc] = num_entities(i, j, k, nrefine); switch (order) { case 1: return nv; case 2: return nv + ne; case 3: return nv + 2 * ne + nf; case 4: return nv + 3 * ne + 3 * nf + nc; default: throw std::runtime_error("Order not supported"); } } } // namespace dolfinx::mesh::Mesh create_cube_mesh(MPI_Comm comm, std::size_t target_dofs, bool target_dofs_total, std::size_t dofs_per_node, int order, bool use_subcomm) { // Get number of processes const std::size_t num_processes = dolfinx::MPI::size(comm); // Target total dofs std::int64_t N = 0; if (target_dofs_total == true) N = target_dofs / dofs_per_node; else N = target_dofs * num_processes / dofs_per_node; std::size_t Nx, Ny, Nz; int r = 0; // Choose Nx_max carefully. If too large, the base mesh may become too // large for the partitioner; likewise, if too small, it will fail on // large numbers of processes. const std::size_t Nx_max = 200; // Get initial guess for Nx, Ny, Nz, r Nx = 1; std::int64_t ndofs = 0; while (ndofs < N) { // Increase base mesh size ++Nx; if (Nx > Nx_max) { // Base mesh got too big, so add refinement levels // Each increase will dramatically (~8x) increase the number of // dofs while (ndofs < N) { // Keep on refining until we have overshot ++r; ndofs = num_pdofs(Nx, Nx, Nx, r, order); } while (ndofs > N) { // Shrink base mesh until dofs are back on target --Nx; ndofs = num_pdofs(Nx, Nx, Nx, r, order); } } ndofs = num_pdofs(Nx, Nx, Nx, r, order); } Ny = Nx; Nz = Nx; // Optimise number of dofs by trying nearby mesh sizes +/- 5 or 10 in // each dimension std::size_t mindiff = 1000000; for (std::size_t i = Nx - 10; i < Nx + 10; ++i) { for (std::size_t j = i - 5; j < i + 5; ++j) { for (std::size_t k = i - 5; k < i + 5; ++k) { std::size_t diff = std::abs(num_pdofs(i, j, k, r, order) - N); if (diff < mindiff) { mindiff = diff; Nx = i; Ny = j; Nz = k; } } } } #ifdef HAS_PARMETIS auto graph_part = dolfinx::graph::parmetis::partitioner(); #elif HAS_PTSCOTCH auto graph_part = dolfinx::graph::scotch::partitioner( dolfinx::graph::scotch::strategy::scalability); #elif HAS_KAHIP auto graph_part = dolfinx::graph::kahip::partitioner(); #else #error "No mesh partitioner has been selected" #endif MPI_Comm sub_comm; if (use_subcomm) { // Create a sub-communicator for mesh partitioning MPI_Comm shm_comm; // Get a local comm on each node MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shm_comm); int shm_comm_rank = dolfinx::MPI::rank(shm_comm); MPI_Comm_free(&shm_comm); // Create a comm across nodes, using rank 0 of the local comm on each node int color = (shm_comm_rank == 0) ? 0 : MPI_UNDEFINED; MPI_Comm_split(comm, color, 0, &sub_comm); } else MPI_Comm_dup(comm, &sub_comm); auto cell_part = dolfinx::mesh::create_cell_partitioner( dolfinx::mesh::GhostMode::none, graph_part); auto mesh = dolfinx::mesh::create_box( comm, sub_comm, {{{0.0, 0.0, 0.0}, {1.0, 1.0, 1.0}}}, {Nx, Ny, Nz}, dolfinx::mesh::CellType::tetrahedron, cell_part); MPI_Comm_free(&sub_comm); if (dolfinx::MPI::rank(mesh.comm()) == 0) { std::cout << "UnitCube (" << Nx << "x" << Ny << "x" << Nz << ") to be refined " << r << " times" << std::endl; } for (int i = 0; i < r; ++i) { mesh.topology_mutable()->create_connectivity(3, 1); mesh = dolfinx::refinement::refine(mesh, false); } return mesh; } //----------------------------------------------------------------------------- std::shared_ptr> create_spoke_mesh(MPI_Comm comm, std::size_t target_dofs, bool target_dofs_total, std::size_t dofs_per_node) { int target = target_dofs / dofs_per_node; int mpi_size = dolfinx::MPI::size(comm); if (!target_dofs_total) target *= mpi_size; // Parameters controlling shape constexpr int n = 17; // number of spokes constexpr double r0 = 0.25; // inner radius of ring constexpr double r1 = 0.5; // outer radius of ring constexpr double h0 = 1.2; // height (inner) constexpr double h1 = 1.0; // height (outer) constexpr int lspur = 6; // number of elements in each spoke constexpr double l0 = 0.5; // length of each element in spoke constexpr double dth = 0.15; // curl (angle increment) as spoke goes out constexpr double tap = 0.9; // taper (fractional height decrease on each element) // Subdivision of a cube into 6 tetrahedra constexpr int cube[6][4] = {{0, 1, 2, 4}, {1, 2, 4, 5}, {2, 4, 5, 6}, {0, 2, 3, 4}, {6, 7, 4, 2}, {2, 3, 4, 7}}; // Calculate number of points and cells (only on process 0) int npoints = 0; int ncells = 0; const int mpi_rank = dolfinx::MPI::rank(comm); if (mpi_rank == 0) { npoints = n * 4 + n * lspur * 4; ncells = n * 6 + n * lspur * 6; } std::vector x(npoints * 3); std::vector topo(4 * ncells); if (mpi_rank == 0) { int p = 0; int c = 0; // Add n 'cubes' to make a joined up ring. for (int i = 0; i < n; ++i) { std::cout << "Adding cube " << i << std::endl; // Get the points for current cube std::array pts; for (std::size_t j = 0; j < pts.size(); ++j) pts[j] = (i * 4 + j) % (n * 4); // Add to topology for (int k = 0; k < 6; ++k) { for (int j = 0; j < 4; ++j) topo[4 * c + j] = pts[cube[k][j]]; ++c; } // Calculate the position of points const double th = 2 * std::numbers::pi * i / n; std::array p0 = {r0 * std::cos(th), r0 * std::sin(th), h0}; std::copy(p0.begin(), p0.end(), std::next(x.begin(), 3 * p)); std::array p1 = {r0 * std::cos(th), r0 * std::sin(th), -h0}; std::copy(p1.begin(), p1.end(), std::next(x.begin(), 3 * (p + 1))); std::array p2 = {r1 * std::cos(th), r1 * std::sin(th), -h1}; std::copy(p2.begin(), p2.end(), std::next(x.begin(), 3 * (p + 2))); std::array p3 = {r1 * std::cos(th), r1 * std::sin(th), h1}; std::copy(p3.begin(), p3.end(), std::next(x.begin(), 3 * (p + 3))); p += 4; } // Add spurs to ring for (int i = 0; i < n; ++i) { std::cout << "Adding spur " << i << std::endl; // Intermediate angle between two faces const double th0 = 2 * std::numbers::pi * (i + 0.5) / n; // Starting points on outer edge of ring std::array pts = {(i * 4 + 2) % (n * 4), (i * 4 + 3) % (n * 4), (i * 4 + 7) % (n * 4), (i * 4 + 6) % (n * 4), 0, 0, 0, 0}; // Build each spur outwards for (int k = 0; k < lspur; ++k) { // Add new points for (int j = 0; j < 4; ++j) { pts[j + 4] = p; std::span xp(x.data() + 3 * p, 3); std::copy_n(std::next(x.begin(), 3 * pts[j]), 3, xp.begin()); xp[0] += l0 * std::cos(th0 + k * dth); xp[1] += l0 * std::sin(th0 + k * dth); xp[2] *= std::pow(tap, k); ++p; } // Add new cells for (int m = 0; m < 6; ++m) { for (int j = 0; j < 4; ++j) topo[4 * c + j] = pts[cube[m][j]]; ++c; } // Outer face becomes inner face of next cube std::span _pts(pts.data(), 8); auto pts0 = _pts.first<4>(); auto pts1 = _pts.last<4>(); std::copy(pts1.begin(), pts1.end(), pts0.begin()); } } // Check geometric sizes and rescale double x0min(0), x0max(0), x1min(0), x1max(0), x2min(0), x2max(0); for (std::size_t i = 0; i < x.size(); i += 3) { x0min = std::min(std::abs(x[i]), x0min); x0max = std::max(std::abs(x[i]), x0max); x1min = std::min(std::abs(x[i + 1]), x1min); x1max = std::max(std::abs(x[i + 1]), x1max); x2min = std::min(std::abs(x[i + 2]), x2min); x2max = std::max(std::abs(x[i + 2]), x2max); } for (std::size_t i = 0; i < x.size(); i += 3) x[i] -= 0.9 * x0min; std::transform(x.begin(), x.end(), x.begin(), [scale = 0.9 * x0max](auto x) { return x / scale; }); LOG(INFO) << "x range = " << x0min << " - " << x0max << std::endl; LOG(INFO) << "y range = " << x1min << " - " << x1max << std::endl; LOG(INFO) << "z range = " << x2min << " - " << x2max << std::endl; } // New Mesh dolfinx::fem::CoordinateElement element( dolfinx::mesh::CellType::tetrahedron, 1); auto mesh = std::make_shared>( dolfinx::mesh::create_mesh(comm, topo, element, x, {x.size() / 3, 3}, dolfinx::mesh::GhostMode::none)); mesh->topology_mutable()->create_entities(1); while (mesh->topology()->index_map(0)->size_global() + mesh->topology()->index_map(1)->size_global() < target) { mesh = std::make_shared>( dolfinx::refinement::refine(*mesh, false)); mesh->topology_mutable()->create_entities(1); } double fraction = (double)(target - mesh->topology()->index_map(0)->size_global()) / mesh->topology()->index_map(1)->size_global(); if (mpi_rank == 0) { std::cout << "Create unstructured mesh: desired fraction=" << fraction << std::endl; } // Estimate step needed to get desired refinement fraction // using some heuristics and bisection method int nmarked = pow(fraction, 1.6) * 2000; double f_lower = 0.0; double f_upper = 1.0; int lmark = 0; int umark = 2000; std::shared_ptr> meshi; for (int k = 0; k < 5; ++k) { // Trial step mesh->topology_mutable()->create_entities(1); std::vector marked_edges; const std::int32_t num_edges = mesh->topology()->index_map(1)->size_local(); for (int i = 0; i < num_edges; ++i) if (i % 2000 < nmarked) marked_edges.push_back(i); meshi = std::make_shared>( dolfinx::refinement::refine(*mesh, marked_edges, false)); double actual_fraction = (double)(meshi->topology()->index_map(0)->size_global() - mesh->topology()->index_map(0)->size_global()) / mesh->topology()->index_map(1)->size_global(); if (mpi_rank == 0) { std::cout << "Edges marked = " << nmarked << "/2000" << std::endl; std::cout << "Step " << k << " achieved actual fraction = " << actual_fraction << std::endl; } if (actual_fraction > fraction) { umark = nmarked; f_upper = actual_fraction; } else { lmark = nmarked; f_lower = actual_fraction; } int new_mark = (lmark * (f_upper - fraction) + umark * (fraction - f_lower)) / (f_upper - f_lower); if (nmarked == new_mark) break; else nmarked = new_mark; } return meshi; } performance-test-0.8.0/src/mesh.h000066400000000000000000000013361457237023300167020ustar00rootroot00000000000000// Copyright (C) 2017 Chris N. Richardson and Garth N. Wells // Licensed under the MIT License. See LICENSE file in the project // root for full license information. #pragma once #include #include namespace dolfinx::fem { template class CoordinateElement; } namespace dolfinx::mesh { template class Mesh; } dolfinx::mesh::Mesh create_cube_mesh(MPI_Comm comm, std::size_t target_dofs, bool target_dofs_total, std::size_t dofs_per_node, int order, bool use_subcomm); std::shared_ptr> create_spoke_mesh(MPI_Comm comm, std::size_t target_dofs, bool target_dofs_total, std::size_t dofs_per_node); performance-test-0.8.0/src/poisson_problem.cpp000066400000000000000000000135261457237023300215170ustar00rootroot00000000000000// Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells // // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) // // SPDX-License-Identifier: MIT #include "poisson_problem.h" #include "Poisson.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace dolfinx; using T = PetscScalar; std::tuple>, std::shared_ptr>, std::function&, const la::Vector&)>> poisson::problem(std::shared_ptr> mesh, int order) { common::Timer t0("ZZZ FunctionSpace"); std::vector fs_poisson_a = {functionspace_form_Poisson_a1, functionspace_form_Poisson_a2, functionspace_form_Poisson_a3}; auto V = std::make_shared>( fem::create_functionspace(*fs_poisson_a.at(order - 1), "v_0", mesh)); t0.stop(); common::Timer t1("ZZZ Assemble"); common::Timer t2("ZZZ Create boundary conditions"); // Define boundary condition auto u0 = std::make_shared>(V); u0->x()->set(0); // Find facets with bc applied const int tdim = mesh->topology()->dim(); const std::vector bc_facets = mesh::locate_entities( *mesh, tdim - 1, [](auto x) { constexpr double eps = 1.0e-8; std::vector marker(x.extent(1), false); for (std::size_t p = 0; p < x.extent(1); ++p) { double x0 = x(0, p); if (std::abs(x0) < eps or std::abs(x0 - 1) < eps) marker[p] = true; } return marker; }); // Find constrained dofs const std::vector bdofs = fem::locate_dofs_topological( *V->mesh()->topology_mutable(), *V->dofmap(), tdim - 1, bc_facets); auto bc = std::make_shared>(u0, bdofs); t2.stop(); // Define coefficients common::Timer t3("ZZZ Create RHS function"); auto f = std::make_shared>(V); auto g = std::make_shared>(V); f->interpolate( [](auto x) -> std::pair, std::vector> { std::vector v(x.extent(1)); for (std::size_t p = 0; p < x.extent(1); ++p) { double dx = x(0, p) - 0.5; double dy = x(1, p) - 0.5; double dr = dx * dx + dy * dy; v[p] = 10 * std::exp(-dr / 0.02); } return {std::move(v), {v.size()}}; }); g->interpolate( [](auto x) -> std::pair, std::vector> { std::vector f(x.extent(1)); for (std::size_t p = 0; p < x.extent(1); ++p) f[p] = std::sin(5 * x(0, p)); return {f, {f.size()}}; }); t3.stop(); std::vector form_poisson_L = {form_Poisson_L1, form_Poisson_L2, form_Poisson_L3}; std::vector form_poisson_a = {form_Poisson_a1, form_Poisson_a2, form_Poisson_a3}; // Define variational forms auto L = std::make_shared>(fem::create_form( *form_poisson_L.at(order - 1), {V}, {{"w0", f}, {"w1", g}}, {}, {})); auto a = std::make_shared>(fem::create_form( *form_poisson_a.at(order - 1), {V, V}, {}, {}, {})); // Create matrices and vector, and assemble system std::shared_ptr A = std::make_shared( fem::petsc::create_matrix(*a), false); common::Timer t4("ZZZ Assemble matrix"); const std::vector constants_a = fem::pack_constants(*a); auto coeffs_a = fem::allocate_coefficient_storage(*a); fem::pack_coefficients(*a, coeffs_a); fem::assemble_matrix(la::petsc::Matrix::set_block_fn(A->mat(), ADD_VALUES), *a, constants_a, fem::make_coefficients_span(coeffs_a), {bc}); MatAssemblyBegin(A->mat(), MAT_FLUSH_ASSEMBLY); MatAssemblyEnd(A->mat(), MAT_FLUSH_ASSEMBLY); fem::set_diagonal(la::petsc::Matrix::set_fn(A->mat(), INSERT_VALUES), *V, {bc}); MatAssemblyBegin(A->mat(), MAT_FINAL_ASSEMBLY); MatAssemblyEnd(A->mat(), MAT_FINAL_ASSEMBLY); t4.stop(); // Create la::Vector la::Vector b(L->function_spaces()[0]->dofmap()->index_map, L->function_spaces()[0]->dofmap()->index_map_bs()); b.set(0); common::Timer t5("ZZZ Assemble vector"); const std::vector constants_L = fem::pack_constants(*L); auto coeffs_L = fem::allocate_coefficient_storage(*L); fem::pack_coefficients(*L, coeffs_L); fem::assemble_vector(b.mutable_array(), *L, constants_L, fem::make_coefficients_span(coeffs_L)); fem::apply_lifting(b.mutable_array(), {a}, {constants_L}, {fem::make_coefficients_span(coeffs_L)}, {{bc}}, {}, 1.0); b.scatter_rev(std::plus<>()); fem::set_bc(b.mutable_array(), {bc}); t5.stop(); t1.stop(); // Create Function to hold solution auto u = std::make_shared>(V); std::function&, const la::Vector&)> solver_function = [A](fem::Function& u, const la::Vector& b) { // Create solver la::petsc::KrylovSolver solver(MPI_COMM_WORLD); solver.set_from_options(); solver.set_operator(A->mat()); // Wrap la::Vector la::petsc::Vector _b(la::petsc::create_vector_wrap(b), false); la::petsc::Vector x(la::petsc::create_vector_wrap(*u.x()), false); // Solve int num_iter = solver.solve(x.vec(), _b.vec()); return num_iter; }; return {std::make_shared>(std::move(b)), u, solver_function}; } performance-test-0.8.0/src/poisson_problem.h000066400000000000000000000013331457237023300211550ustar00rootroot00000000000000// Copyright (C) 2017-2019 Chris N. Richardson and Garth N. Wells // // This file is part of FEniCS-miniapp (https://www.fenicsproject.org) // // SPDX-License-Identifier: MIT #pragma once #include #include #include #include #include #include namespace poisson { std::tuple>, std::shared_ptr>, std::function&, const dolfinx::la::Vector&)>> problem(std::shared_ptr> mesh, int order); } // namespace poisson